From: Heikki Tuuri Date: March 19 2004 8:51pm Subject: bk commit into 4.0 tree (heikki:1.1737) List-Archive: http://lists.mysql.com/internals/12980 Message-Id: <200403192051.i2JKpVoI016844@hundin.mysql.fi> Below is the list of changes that have just been committed into a local 4.0 repository of heikki. When heikki does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://www.mysql.com/doc/I/n/Installing_source_tree.html ChangeSet 1.1737 04/03/19 22:51:25 heikki@stripped +2 -0 btr0btr.c: Improve space utilization if we have 3 kB - 8 kB rows to insert in the order of the primary key btr0cur.c: Fix bug: The row count and key cardinality estimate was grossly too small if each clustered index page only contained one record innobase/btr/btr0btr.c 1.24 04/03/19 22:50:41 heikki@stripped +29 -32 Improve space utilization if we have 3 kB - 8 kB rows to insert in the order of the primary key innobase/btr/btr0cur.c 1.33 04/03/19 22:49:08 heikki@stripped +20 -4 Fix bug: The row count and key cardinality estimate was grossly too small if each clustered index page only contained one record # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: heikki # Host: hundin.mysql.fi # Root: /home/heikki/mysql-4.0 --- 1.23/innobase/btr/btr0btr.c Sat Mar 13 22:43:06 2004 +++ 1.24/innobase/btr/btr0btr.c Fri Mar 19 22:50:41 2004 @@ -76,9 +76,6 @@ we allocate pages for the non-leaf levels of the tree. */ -/* If this many inserts occur sequentially, it affects page split */ -#define BTR_PAGE_SEQ_INSERT_LIMIT 5 - /****************************************************************** Creates a new index page to the tree (not the root, and also not used in page reorganization). */ @@ -1086,18 +1083,18 @@ page = btr_cur_get_page(cursor); insert_point = btr_cur_get_rec(cursor); - if ((page_header_get_ptr(page, PAGE_LAST_INSERT) - == page_rec_get_next(insert_point)) - && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_LEFT) - && ((page_header_get_field(page, PAGE_N_DIRECTION) - >= BTR_PAGE_SEQ_INSERT_LIMIT) - || (page_header_get_field(page, PAGE_N_DIRECTION) + 1 - >= page_get_n_recs(page)))) { + if (page_header_get_ptr(page, PAGE_LAST_INSERT) + == page_rec_get_next(insert_point)) { infimum = page_get_infimum_rec(page); - - if ((infimum != insert_point) - && (page_rec_get_next(infimum) != insert_point)) { + + /* If the convergence is in the middle of a page, include also + the record immediately before the new insert to the upper + page. Otherwise, we could repeatedly move from page to page + lots of records smaller than the convergence point. */ + + if (infimum != insert_point + && page_rec_get_next(infimum) != insert_point) { *split_rec = insert_point; } else { @@ -1131,29 +1128,29 @@ page = btr_cur_get_page(cursor); insert_point = btr_cur_get_rec(cursor); - if ((page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) - && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT) - && ((page_header_get_field(page, PAGE_N_DIRECTION) - >= BTR_PAGE_SEQ_INSERT_LIMIT) - || (page_header_get_field(page, PAGE_N_DIRECTION) + 1 - >= page_get_n_recs(page)))) { + /* We use eager heuristics: if the new insert would be right after + the previous insert on the same page, we assume that there is a + pattern of sequential inserts here. */ + + if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) { supremum = page_get_supremum_rec(page); - if ((page_rec_get_next(insert_point) != supremum) - && (page_rec_get_next(page_rec_get_next(insert_point)) - != supremum) - && (page_rec_get_next(page_rec_get_next( - page_rec_get_next(insert_point))) - != supremum)) { - - /* If there are >= 3 user records up from the insert - point, split all but 2 off */ - - *split_rec = page_rec_get_next(page_rec_get_next( - page_rec_get_next(insert_point))); + if (page_rec_get_next(insert_point) != supremum + && page_rec_get_next(page_rec_get_next(insert_point)) + != supremum) { + + /* If there are >= 2 user records up from the insert + point, split all but 1 off. We want to keep one because + then sequential inserts can use the adaptive hash + index, as they can do the necessary checks of the right + search position just by looking at the records on this + page. */ + + *split_rec = page_rec_get_next( + page_rec_get_next(insert_point)); } else { - /* Else split at inserted record */ + /* Else split at the new record to insert */ *split_rec = NULL; } --- 1.32/innobase/btr/btr0cur.c Tue Mar 16 15:23:18 2004 +++ 1.33/innobase/btr/btr0cur.c Fri Mar 19 22:49:08 2004 @@ -2671,10 +2671,11 @@ btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - /* Count the number of different key values minus one - for each prefix of the key on this index page: we subtract - one because otherwise our algorithm would give a wrong - estimate for an index where there is just one key value */ + /* Count the number of different key values for each prefix of + the key on this index page. If the prefix does not determine + the index record uniquely in te B-tree, then we subtract one + because otherwise our algorithm would give a wrong estimate + for an index where there is just one key value. */ page = btr_cur_get_page(&cursor); @@ -2696,6 +2697,9 @@ &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { + /* We add one if this index record has + a different prefix from the previous */ + n_diff[j]++; } @@ -2705,6 +2709,18 @@ rec = page_rec_get_next(rec); } + if (n_cols == dict_index_get_n_unique_in_tree(index)) { + /* We add one because we know that the first record + on the page certainly had a different prefix than the + last record on the previous index page in the + alphabetical order. Before this fix, if there was + just one big record on each clustered index page, the + algorithm grossly underestimated the number of rows + in the table. */ + + n_diff[n_cols]++; + } + total_external_size += btr_rec_get_externally_stored_len(rec); mtr_commit(&mtr);