From: Bjorn Munch Date: August 30 2011 10:55am Subject: bzr push into mysql-5.5-mtr branch (bjorn.munch:3239 to 3244) List-Archive: http://lists.mysql.com/commits/140852 Message-Id: <201108301055.p7UAtm2k021167@khepri15.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3244 Bjorn Munch 2011-08-30 [merge] null upmerge 3243 Bjorn Munch 2011-08-30 [merge] new merge from 5.5 modified: mysql-test/r/group_by.result mysql-test/t/group_by.test mysql-test/valgrind.supp sql/filesort.cc 3242 Bjorn Munch 2011-08-29 [merge] null upmerge 3241 Bjorn Munch 2011-08-29 [merge] merge from 5.5 main modified: extra/innochecksum.c include/decimal.h mysql-test/r/type_newdecimal.result mysql-test/t/type_newdecimal.test mysql-test/valgrind.supp sql/filesort.cc sql/my_decimal.h storage/innobase/btr/btr0btr.c storage/innobase/btr/btr0cur.c storage/innobase/buf/buf0buf.c storage/innobase/dict/dict0dict.c storage/innobase/dict/dict0load.c storage/innobase/fsp/fsp0fsp.c storage/innobase/include/btr0btr.h storage/innobase/include/btr0cur.h storage/innobase/include/buf0buf.h storage/innobase/include/dict0dict.h storage/innobase/include/fsp0fsp.h storage/innobase/include/mtr0mtr.h storage/innobase/include/mtr0mtr.ic storage/innobase/include/sync0sync.ic storage/innobase/mtr/mtr0mtr.c storage/innobase/row/row0ins.c storage/innobase/row/row0row.c storage/innobase/row/row0upd.c storage/innobase/sync/sync0sync.c storage/innobase/trx/trx0undo.c strings/decimal.c 3240 Bjorn Munch 2011-08-24 MTR: small fix to read_plugin_defs for RPM builds: Also look into directories lib64 in addition to lib modified: mysql-test/mysql-test-run.pl 3239 Bjorn Munch 2011-08-22 Bug #12793170 MYSQLTEST: PROVIDE ACCESS TO ERROR NAMES THROUGH NUMERIC CODES AND VICE VERSA Followup: Some statement may give errors not in the list, map these to "" rather than failing. modified: client/mysqltest.cc === modified file 'extra/innochecksum.c' --- a/extra/innochecksum.c 2011-07-03 23:48:19 +0000 +++ b/extra/innochecksum.c 2011-08-22 14:12:27 +0000 @@ -25,12 +25,7 @@ Published with a permission. */ -/* needed to have access to 64 bit file functions */ -#define _LARGEFILE_SOURCE -#define _LARGEFILE64_SOURCE - -#define _XOPEN_SOURCE 500 /* needed to include getopt.h on some platforms. */ - +#include #include #include #include @@ -53,7 +48,6 @@ /* another argument to specify page ranges... seek to right spot and go from there */ typedef unsigned long int ulint; -typedef unsigned char uchar; /* innodb function in name; modified slightly to not have the ASM version (lots of #ifs that didn't apply) */ ulint mach_read_from_4(uchar *b) === modified file 'include/decimal.h' --- a/include/decimal.h 2011-06-30 15:46:53 +0000 +++ b/include/decimal.h 2011-08-29 09:34:48 +0000 @@ -21,6 +21,15 @@ typedef enum decimal_round_mode; typedef int32 decimal_digit_t; +/** + intg is the number of *decimal* digits (NOT number of decimal_digit_t's !) + before the point + frac is the number of decimal digits after the point + len is the length of buf (length of allocated space) in decimal_digit_t's, + not in bytes + sign false means positive, true means negative + buf is an array of decimal_digit_t's + */ typedef struct st_decimal_t { int intg, frac, len; my_bool sign; === modified file 'mysql-test/mysql-test-run.pl' --- a/mysql-test/mysql-test-run.pl 2011-08-16 09:08:10 +0000 +++ b/mysql-test/mysql-test-run.pl 2011-08-24 12:38:57 +0000 @@ -2132,8 +2132,10 @@ sub find_plugin($$) my $lib_plugin= mtr_file_exists(vs_config_dirs($location,$plugin_filename), "$basedir/lib/plugin/".$plugin_filename, + "$basedir/lib64/plugin/".$plugin_filename, "$basedir/$location/.libs/".$plugin_filename, "$basedir/lib/mysql/plugin/".$plugin_filename, + "$basedir/lib64/mysql/plugin/".$plugin_filename, ); return $lib_plugin; } === modified file 'mysql-test/r/group_by.result' --- a/mysql-test/r/group_by.result 2011-02-18 10:55:24 +0000 +++ b/mysql-test/r/group_by.result 2011-08-30 08:16:23 +0000 @@ -1892,6 +1892,38 @@ a AVG(t1.b) t11c t12c 1 4.0000 6 6 2 2.0000 7 7 DROP TABLE t1; +# +# Bug#11765254 (58200): Assertion failed: param.sort_length when grouping +# by functions +# +SET SQL_BIG_TABLES=1; +CREATE TABLE t1(a INT); +INSERT INTO t1 VALUES (0),(0); +SELECT 1 FROM t1 GROUP BY IF(`a`,'',''); +1 +1 +SELECT 1 FROM t1 GROUP BY TRIM(LEADING RAND() FROM ''); +1 +1 +SELECT 1 FROM t1 GROUP BY SUBSTRING('',SLEEP(0),''); +1 +1 +Warnings: +Warning 1292 Truncated incorrect INTEGER value: '' +Warning 1292 Truncated incorrect INTEGER value: '' +Warning 1292 Truncated incorrect INTEGER value: '' +SELECT 1 FROM t1 GROUP BY SUBSTRING(SYSDATE() FROM 'K' FOR 'jxW<'); +1 +1 +Warnings: +Warning 1292 Truncated incorrect INTEGER value: 'K' +Warning 1292 Truncated incorrect INTEGER value: 'jxW<' +Warning 1292 Truncated incorrect INTEGER value: 'K' +Warning 1292 Truncated incorrect INTEGER value: 'jxW<' +Warning 1292 Truncated incorrect INTEGER value: 'K' +Warning 1292 Truncated incorrect INTEGER value: 'jxW<' +DROP TABLE t1; +SET SQL_BIG_TABLES=0; # End of 5.1 tests # # Bug#49771: Incorrect MIN (date) when minimum value is 0000-00-00 === modified file 'mysql-test/r/type_newdecimal.result' --- a/mysql-test/r/type_newdecimal.result 2011-05-12 03:05:12 +0000 +++ b/mysql-test/r/type_newdecimal.result 2011-08-29 09:34:48 +0000 @@ -1934,3 +1934,14 @@ f1 0.000000000000000000000000 DROP TABLE IF EXISTS t1; End of 5.1 tests +# +# BUG#12911710 - VALGRIND FAILURE IN +# ROW-DEBUG:PERFSCHEMA.SOCKET_SUMMARY_BY_INSTANCE_FUNC +# +CREATE TABLE t1(d1 DECIMAL(60,0) NOT NULL, +d2 DECIMAL(60,0) NOT NULL); +INSERT INTO t1 (d1, d2) VALUES(0.0, 0.0); +SELECT d1 * d2 FROM t1; +d1 * d2 +0 +DROP TABLE t1; === modified file 'mysql-test/t/group_by.test' --- a/mysql-test/t/group_by.test 2011-02-18 10:55:24 +0000 +++ b/mysql-test/t/group_by.test 2011-08-30 08:16:23 +0000 @@ -1284,6 +1284,20 @@ FROM t1 GROUP BY a; DROP TABLE t1; +--echo # +--echo # Bug#11765254 (58200): Assertion failed: param.sort_length when grouping +--echo # by functions +--echo # + +SET SQL_BIG_TABLES=1; +CREATE TABLE t1(a INT); +INSERT INTO t1 VALUES (0),(0); +SELECT 1 FROM t1 GROUP BY IF(`a`,'',''); +SELECT 1 FROM t1 GROUP BY TRIM(LEADING RAND() FROM ''); +SELECT 1 FROM t1 GROUP BY SUBSTRING('',SLEEP(0),''); +SELECT 1 FROM t1 GROUP BY SUBSTRING(SYSDATE() FROM 'K' FOR 'jxW<'); +DROP TABLE t1; +SET SQL_BIG_TABLES=0; --echo # End of 5.1 tests === modified file 'mysql-test/t/type_newdecimal.test' --- a/mysql-test/t/type_newdecimal.test 2011-05-12 03:05:12 +0000 +++ b/mysql-test/t/type_newdecimal.test 2011-08-29 09:34:48 +0000 @@ -1535,3 +1535,17 @@ DROP TABLE IF EXISTS t1; --echo End of 5.1 tests + +--echo # +--echo # BUG#12911710 - VALGRIND FAILURE IN +--echo # ROW-DEBUG:PERFSCHEMA.SOCKET_SUMMARY_BY_INSTANCE_FUNC +--echo # + +CREATE TABLE t1(d1 DECIMAL(60,0) NOT NULL, + d2 DECIMAL(60,0) NOT NULL); + +INSERT INTO t1 (d1, d2) VALUES(0.0, 0.0); +SELECT d1 * d2 FROM t1; + +DROP TABLE t1; + === modified file 'mysql-test/valgrind.supp' --- a/mysql-test/valgrind.supp 2011-06-30 15:46:53 +0000 +++ b/mysql-test/valgrind.supp 2011-08-30 08:51:23 +0000 @@ -876,3 +876,42 @@ fun:buf_buddy_free_low fun:buf_buddy_free } + +# Note the wildcard in the (mangled) function signatures of +# write_keys() and find_all_keys(). +# They both return ha_rows, which is platform dependent. +{ + Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / one + Memcheck:Param + write(buf) + obj:*/libpthread*.so + fun:my_write + fun:inline_mysql_file_write + fun:my_b_flush_io_cache + fun:_my_b_write + fun:_Z*10write_keysP13st_sort_paramPPhjP11st_io_cacheS4_ + fun:_Z*13find_all_keysP13st_sort_paramP10SQL_SELECTPPhP11st_io_cacheS6_S6_ + fun:_Z8filesortP3THDP5TABLEP13st_sort_fieldjP10SQL_SELECTybPy +} + +## { +## Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / two +## Memcheck:Param +## write(buf) +## obj:*/libpthread*.so +## fun:my_write +## fun:my_b_flush_io_cache +## fun:_Z15merge_many_buffP13st_sort_paramPhP10st_buffpekPjP11st_io_cache +## fun:_Z8filesortP3THDP8st_tableP13st_sort_fieldjP10SQL_SELECTybPy +## } + +{ + Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / three + Memcheck:Param + write(buf) + obj:*/libpthread*.so + fun:my_write + fun:inline_mysql_file_write + fun:my_b_flush_io_cache + fun:_Z8filesortP3THDP5TABLEP13st_sort_fieldjP10SQL_SELECTybPy +} === modified file 'sql/filesort.cc' --- a/sql/filesort.cc 2011-06-30 15:46:53 +0000 +++ b/sql/filesort.cc 2011-08-30 08:16:23 +0000 @@ -147,8 +147,6 @@ ha_rows filesort(THD *thd, TABLE *table, error= 1; bzero((char*) ¶m,sizeof(param)); param.sort_length= sortlength(thd, sortorder, s_length, &multi_byte_charset); - /* filesort cannot handle zero-length records. */ - DBUG_ASSERT(param.sort_length); param.ref_length= table->file->ref_length; param.addon_field= 0; param.addon_length= 0; @@ -260,6 +258,9 @@ ha_rows filesort(THD *thd, TABLE *table, } else { + /* filesort cannot handle zero-length records during merge. */ + DBUG_ASSERT(param.sort_length != 0); + if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer) { my_free(table_sort.buffpek); @@ -972,21 +973,10 @@ static void make_sortkey(register SORTPA if (addonf->null_bit && field->is_null()) { nulls[addonf->null_offset]|= addonf->null_bit; -#ifdef HAVE_purify - bzero(to, addonf->length); -#endif } else { -#ifdef HAVE_purify - uchar *end= field->pack(to, field->ptr); - uint length= (uint) ((to + addonf->length) - end); - DBUG_ASSERT((int) length >= 0); - if (length) - bzero(end, length); -#else (void) field->pack(to, field->ptr); -#endif } to+= addonf->length; } === modified file 'sql/my_decimal.h' --- a/sql/my_decimal.h 2011-07-05 23:13:50 +0000 +++ b/sql/my_decimal.h 2011-08-29 09:34:48 +0000 @@ -124,12 +124,8 @@ public: { len= DECIMAL_BUFF_LENGTH; buf= buffer; -#if !defined (HAVE_purify) && !defined(DBUG_OFF) - /* Set buffer to 'random' value to find wrong buffer usage */ - for (uint i= 0; i < DECIMAL_BUFF_LENGTH; i++) - buffer[i]= i; -#endif } + my_decimal() { init(); === modified file 'storage/innobase/btr/btr0btr.c' --- a/storage/innobase/btr/btr0btr.c 2011-08-15 09:18:34 +0000 +++ b/storage/innobase/btr/btr0btr.c 2011-08-29 08:22:43 +0000 @@ -906,28 +906,29 @@ btr_page_alloc_for_ibuf( /**************************************************************//** Allocates a new file page to be used in an index tree. NOTE: we assume that the caller has made the reservation for free extents! -@return new allocated block, x-latched; NULL if out of space */ -UNIV_INTERN -buf_block_t* -btr_page_alloc( -/*===========*/ +@return allocated page number, FIL_NULL if out of space */ +static __attribute__((nonnull(1,5), warn_unused_result)) +ulint +btr_page_alloc_low( +/*===============*/ dict_index_t* index, /*!< in: index */ ulint hint_page_no, /*!< in: hint of a good page */ byte file_direction, /*!< in: direction where a possible page split is made */ ulint level, /*!< in: level where the page is placed in the tree */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction + for the allocation */ + mtr_t* init_mtr) /*!< in/out: mini-transaction + in which the page should be + initialized (may be the same + as mtr), or NULL if it should + not be initialized (the page + at hint was previously freed + in mtr) */ { fseg_header_t* seg_header; page_t* root; - buf_block_t* new_block; - ulint new_page_no; - - if (dict_index_is_ibuf(index)) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } root = btr_root_get(index, mtr); @@ -941,8 +942,42 @@ btr_page_alloc( reservation for free extents, and thus we know that a page can be allocated: */ - new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, - file_direction, TRUE, mtr); + return(fseg_alloc_free_page_general( + seg_header, hint_page_no, file_direction, + TRUE, mtr, init_mtr)); +} + +/**************************************************************//** +Allocates a new file page to be used in an index tree. NOTE: we assume +that the caller has made the reservation for free extents! +@return new allocated block, x-latched; NULL if out of space */ +UNIV_INTERN +buf_block_t* +btr_page_alloc( +/*===========*/ + dict_index_t* index, /*!< in: index */ + ulint hint_page_no, /*!< in: hint of a good page */ + byte file_direction, /*!< in: direction where a possible + page split is made */ + ulint level, /*!< in: level where the page is placed + in the tree */ + mtr_t* mtr, /*!< in/out: mini-transaction + for the allocation */ + mtr_t* init_mtr) /*!< in/out: mini-transaction + for x-latching and initializing + the page */ +{ + buf_block_t* new_block; + ulint new_page_no; + + if (dict_index_is_ibuf(index)) { + + return(btr_page_alloc_for_ibuf(index, mtr)); + } + + new_page_no = btr_page_alloc_low( + index, hint_page_no, file_direction, level, mtr, init_mtr); + if (new_page_no == FIL_NULL) { return(NULL); @@ -950,9 +985,16 @@ btr_page_alloc( new_block = buf_page_get(dict_index_get_space(index), dict_table_zip_size(index->table), - new_page_no, RW_X_LATCH, mtr); + new_page_no, RW_X_LATCH, init_mtr); buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW); + if (mtr->freed_clust_leaf) { + mtr_memo_release(mtr, new_block, MTR_MEMO_FREE_CLUST_LEAF); + ut_ad(!mtr_memo_contains(mtr, new_block, + MTR_MEMO_FREE_CLUST_LEAF)); + } + + ut_ad(btr_freed_leaves_validate(mtr)); return(new_block); } @@ -1065,6 +1107,15 @@ btr_page_free_low( fseg_free_page(seg_header, buf_block_get_space(block), buf_block_get_page_no(block), mtr); + + /* The page was marked free in the allocation bitmap, but it + should remain buffer-fixed until mtr_commit(mtr) or until it + is explicitly freed from the mini-transaction. */ + ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + /* TODO: Discard any operations on the page from the redo log + and remove the block from the flush list and the buffer pool. + This would free up buffer pool earlier and reduce writes to + both the tablespace and the redo log. */ } /**************************************************************//** @@ -1078,12 +1129,139 @@ btr_page_free( buf_block_t* block, /*!< in: block to be freed, x-latched */ mtr_t* mtr) /*!< in: mtr */ { - ulint level; - - level = btr_page_get_level(buf_block_get_frame(block), mtr); + const page_t* page = buf_block_get_frame(block); + ulint level = btr_page_get_level(page, mtr); + ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX); btr_page_free_low(index, block, level, mtr); + + /* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */ + ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + + if (level == 0 && dict_index_is_clust(index)) { + /* We may have to call btr_mark_freed_leaves() to + temporarily mark the block nonfree for invoking + btr_store_big_rec_extern_fields_func() after an + update. Remember that the block was freed. */ + mtr->freed_clust_leaf = TRUE; + mtr_memo_push(mtr, block, MTR_MEMO_FREE_CLUST_LEAF); + } + + ut_ad(btr_freed_leaves_validate(mtr)); +} + +/**************************************************************//** +Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free. +For invoking btr_store_big_rec_extern_fields() after an update, +we must temporarily mark freed clustered index pages allocated, so +that off-page columns will not be allocated from them. Between the +btr_store_big_rec_extern_fields() and mtr_commit() we have to +mark the pages free again, so that no pages will be leaked. */ +UNIV_INTERN +void +btr_mark_freed_leaves( +/*==================*/ + dict_index_t* index, /*!< in/out: clustered index */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */ +{ + /* This is loosely based on mtr_memo_release(). */ + + ulint offset; + + ut_ad(dict_index_is_clust(index)); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); + + if (!mtr->freed_clust_leaf) { + return; + } + + offset = dyn_array_get_data_size(&mtr->memo); + + while (offset > 0) { + mtr_memo_slot_t* slot; + buf_block_t* block; + + offset -= sizeof *slot; + + slot = dyn_array_get_element(&mtr->memo, offset); + + if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) { + continue; + } + + /* Because btr_page_alloc() does invoke + mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all + blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the + memo must still be clustered index leaf tree pages. */ + block = slot->object; + ut_a(buf_block_get_space(block) + == dict_index_get_space(index)); + ut_a(fil_page_get_type(buf_block_get_frame(block)) + == FIL_PAGE_INDEX); + ut_a(page_is_leaf(buf_block_get_frame(block))); + + if (nonfree) { + /* Allocate the same page again. */ + ulint page_no; + page_no = btr_page_alloc_low( + index, buf_block_get_page_no(block), + FSP_NO_DIR, 0, mtr, NULL); + ut_a(page_no == buf_block_get_page_no(block)); + } else { + /* Assert that the page is allocated and free it. */ + btr_page_free_low(index, block, 0, mtr); + } + } + + ut_ad(btr_freed_leaves_validate(mtr)); +} + +#ifdef UNIV_DEBUG +/**************************************************************//** +Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF. +@see btr_mark_freed_leaves() +@return TRUE */ +UNIV_INTERN +ibool +btr_freed_leaves_validate( +/*======================*/ + mtr_t* mtr) /*!< in: mini-transaction */ +{ + ulint offset; + + ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); + + offset = dyn_array_get_data_size(&mtr->memo); + + while (offset > 0) { + const mtr_memo_slot_t* slot; + const buf_block_t* block; + + offset -= sizeof *slot; + + slot = dyn_array_get_element(&mtr->memo, offset); + + if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) { + continue; + } + + ut_a(mtr->freed_clust_leaf); + /* Because btr_page_alloc() does invoke + mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all + blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the + memo must still be clustered index leaf tree pages. */ + block = slot->object; + ut_a(fil_page_get_type(buf_block_get_frame(block)) + == FIL_PAGE_INDEX); + ut_a(page_is_leaf(buf_block_get_frame(block))); + } + + return(TRUE); } +#endif /* UNIV_DEBUG */ /**************************************************************//** Sets the child node file address in a node pointer. */ @@ -1809,7 +1987,7 @@ btr_root_raise_and_insert( level = btr_page_get_level(root, mtr); - new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr); + new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); ut_a(!new_page_zip == !root_page_zip); @@ -2545,7 +2723,7 @@ func_start: /* 2. Allocate a new page to the index */ new_block = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr); + btr_page_get_level(page, mtr), mtr, mtr); new_page = buf_block_get_frame(new_block); new_page_zip = buf_block_get_page_zip(new_block); btr_page_create(new_block, new_page_zip, cursor->index, === modified file 'storage/innobase/btr/btr0cur.c' --- a/storage/innobase/btr/btr0cur.c 2011-08-15 09:18:34 +0000 +++ b/storage/innobase/btr/btr0cur.c 2011-08-29 08:22:43 +0000 @@ -2532,39 +2532,6 @@ return_after_reservations: return(err); } -/**************************************************************//** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ -UNIV_INTERN -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /*!< in: cursor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - - block = btr_cur_get_block(cursor); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* Keep the locks across the mtr_commit(mtr). */ - rw_lock_x_lock(dict_index_get_lock(cursor->index)); - rw_lock_x_lock(&block->lock); - mutex_enter(&block->mutex); - buf_block_buf_fix_inc(block, __FILE__, __LINE__); - mutex_exit(&block->mutex); - /* Write out the redo log. */ - mtr_commit(mtr); - mtr_start(mtr); - /* Reassociate the locks with the mini-transaction. - They will be released on mtr_commit(mtr). */ - mtr_memo_push(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); -} - /*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /****************************************************************//** @@ -4190,6 +4157,9 @@ btr_store_big_rec_extern_fields_func( the "external storage" flags in offsets will not correspond to rec when this function returns */ + const big_rec_t*big_rec_vec, /*!< in: vector containing fields + to be stored externally */ + #ifdef UNIV_DEBUG mtr_t* local_mtr, /*!< in: mtr containing the latch to rec and to the tree */ @@ -4198,9 +4168,11 @@ btr_store_big_rec_extern_fields_func( ibool update_in_place,/*! in: TRUE if the record is updated in place (not delete+insert) */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - const big_rec_t*big_rec_vec) /*!< in: vector containing fields - to be stored externally */ - + mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; + in an update, local_mtr for + allocating BLOB pages and + updating BLOB pointers; alloc_mtr + must not have freed any leaf pages */ { ulint rec_page_no; byte* field_ref; @@ -4219,6 +4191,9 @@ btr_store_big_rec_extern_fields_func( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); + ut_ad(local_mtr); + ut_ad(!alloc_mtr || alloc_mtr == local_mtr); + ut_ad(!update_in_place || alloc_mtr); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); @@ -4234,6 +4209,25 @@ btr_store_big_rec_extern_fields_func( rec_page_no = buf_block_get_page_no(rec_block); ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); + if (alloc_mtr) { + /* Because alloc_mtr will be committed after + mtr, it is possible that the tablespace has been + extended when the B-tree record was updated or + inserted, or it will be extended while allocating + pages for big_rec. + + TODO: In mtr (not alloc_mtr), write a redo log record + about extending the tablespace to its current size, + and remember the current size. Whenever the tablespace + grows as pages are allocated, write further redo log + records to mtr. (Currently tablespace extension is not + covered by the redo log. If it were, the record would + only be written to alloc_mtr, which is committed after + mtr.) */ + } else { + alloc_mtr = &mtr; + } + if (UNIV_LIKELY_NULL(page_zip)) { int err; @@ -4310,7 +4304,7 @@ btr_store_big_rec_extern_fields_func( } block = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, &mtr); + FSP_NO_DIR, 0, alloc_mtr, &mtr); if (UNIV_UNLIKELY(block == NULL)) { mtr_commit(&mtr); @@ -4437,11 +4431,15 @@ btr_store_big_rec_extern_fields_func( goto next_zip_page; } - rec_block = buf_page_get(space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(rec_block, - SYNC_NO_ORDER_CHECK); + if (alloc_mtr == &mtr) { + rec_block = buf_page_get( + space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level( + rec_block, + SYNC_NO_ORDER_CHECK); + } if (err == Z_STREAM_END) { mach_write_to_4(field_ref @@ -4475,7 +4473,8 @@ btr_store_big_rec_extern_fields_func( page_zip_write_blob_ptr( page_zip, rec, index, offsets, - big_rec_vec->fields[i].field_no, &mtr); + big_rec_vec->fields[i].field_no, + alloc_mtr); next_zip_page: prev_page_no = page_no; @@ -4520,19 +4519,23 @@ next_zip_page: extern_len -= store_len; - rec_block = buf_page_get(space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(rec_block, - SYNC_NO_ORDER_CHECK); + if (alloc_mtr == &mtr) { + rec_block = buf_page_get( + space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level( + rec_block, + SYNC_NO_ORDER_CHECK); + } mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4, big_rec_vec->fields[i].len - extern_len, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); if (prev_page_no == FIL_NULL) { btr_blob_dbg_add_blob( @@ -4542,18 +4545,19 @@ next_zip_page: mlog_write_ulint(field_ref + BTR_EXTERN_SPACE_ID, - space_id, - MLOG_4BYTES, &mtr); + space_id, MLOG_4BYTES, + alloc_mtr); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, - page_no, - MLOG_4BYTES, &mtr); + page_no, MLOG_4BYTES, + alloc_mtr); mlog_write_ulint(field_ref + BTR_EXTERN_OFFSET, FIL_PAGE_DATA, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, + alloc_mtr); } prev_page_no = page_no; === modified file 'storage/innobase/buf/buf0buf.c' --- a/storage/innobase/buf/buf0buf.c 2011-08-17 03:51:40 +0000 +++ b/storage/innobase/buf/buf0buf.c 2011-08-29 08:22:43 +0000 @@ -1716,31 +1716,6 @@ buf_page_set_accessed_make_young( } /********************************************************************//** -Resets the check_index_page_at_flush field of a page if found in the buffer -pool. */ -UNIV_INTERN -void -buf_reset_check_index_page_at_flush( -/*================================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_block_t* block; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - buf_pool_mutex_enter(buf_pool); - - block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset); - - if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) { - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page)); - block->check_index_page_at_flush = FALSE; - } - - buf_pool_mutex_exit(buf_pool); -} - -/********************************************************************//** Returns the current state of is_hashed of a page. FALSE if the page is not in the pool. NOTE that this operation does not fix the page in the pool if it is found there. === modified file 'storage/innobase/dict/dict0dict.c' --- a/storage/innobase/dict/dict0dict.c 2011-08-17 01:07:59 +0000 +++ b/storage/innobase/dict/dict0dict.c 2011-08-29 09:44:28 +0000 @@ -5206,7 +5206,8 @@ UNIV_INTERN void dict_set_corrupted_index_cache_only( /*================================*/ - dict_index_t* index) /*!< in/out: index */ + dict_index_t* index, /*!< in/out: index */ + dict_table_t* table) /*!< in/out: table */ { ut_ad(index); ut_ad(mutex_own(&dict_sys->mutex)); @@ -5216,7 +5217,14 @@ dict_set_corrupted_index_cache_only( /* Mark the table as corrupted only if the clustered index is corrupted */ if (dict_index_is_clust(index)) { - index->table->corrupted = TRUE; + dict_table_t* corrupt_table; + + corrupt_table = table ? table : index->table; + ut_ad(!index->table || !table || index->table == table); + + if (corrupt_table) { + corrupt_table->corrupted = TRUE; + } } index->type |= DICT_CORRUPT; === modified file 'storage/innobase/dict/dict0load.c' --- a/storage/innobase/dict/dict0load.c 2011-08-17 01:07:59 +0000 +++ b/storage/innobase/dict/dict0load.c 2011-08-29 09:44:28 +0000 @@ -1497,7 +1497,8 @@ dict_load_indexes( dictionary cache for such metadata corruption, since we would always be able to set it when loading the dictionary cache */ - dict_set_corrupted_index_cache_only(index); + dict_set_corrupted_index_cache_only( + index, table); fprintf(stderr, "InnoDB: Index is corrupt but forcing" === modified file 'storage/innobase/fsp/fsp0fsp.c' --- a/storage/innobase/fsp/fsp0fsp.c 2011-02-02 13:58:01 +0000 +++ b/storage/innobase/fsp/fsp0fsp.c 2011-08-29 08:22:43 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -311,8 +311,9 @@ fsp_fill_free_list( descriptor page and ibuf bitmap page; then we do not allocate more extents */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in: space header */ - mtr_t* mtr); /*!< in: mtr */ + fsp_header_t* header, /*!< in/out: space header */ + mtr_t* mtr) /*!< in/out: mini-transaction */ + UNIV_COLD __attribute__((nonnull)); /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space @@ -325,14 +326,20 @@ fseg_alloc_free_page_low( ulint space, /*!< in: space */ ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ - fseg_inode_t* seg_inode, /*!< in: segment inode */ + fseg_inode_t* seg_inode, /*!< in/out: segment inode */ ulint hint, /*!< in: hint of which page would be desirable */ byte direction, /*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mini-transaction in which the + page should be initialized + (may be the same as mtr), or NULL if it + should not be initialized (the page at hint + was previously freed in mtr) */ + __attribute__((warn_unused_result, nonnull(3,6))); #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** @@ -700,17 +707,18 @@ list, if not free limit == space size. T descriptor defined, as they are uninitialized above the free limit. @return pointer to the extent descriptor, NULL if the page does not exist in the space or if the offset exceeds the free limit */ -UNIV_INLINE +UNIV_INLINE __attribute__((nonnull, warn_unused_result)) xdes_t* xdes_get_descriptor_with_space_hdr( /*===============================*/ - fsp_header_t* sp_header,/*!< in/out: space header, x-latched */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page offset; - if equal to the free limit, - we try to add new extents to - the space free list */ - mtr_t* mtr) /*!< in: mtr handle */ + fsp_header_t* sp_header, /*!< in/out: space header, x-latched + in mtr */ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: page offset; if equal + to the free limit, we try to + add new extents to the space + free list */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint limit; ulint size; @@ -718,11 +726,9 @@ xdes_get_descriptor_with_space_hdr( ulint descr_page_no; page_t* descr_page; - ut_ad(mtr); ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET); /* Read free limit and space size */ limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT); @@ -772,7 +778,7 @@ is necessary to make the descriptor defi above the free limit. @return pointer to the extent descriptor, NULL if the page does not exist in the space or if the offset exceeds the free limit */ -static +static __attribute__((nonnull, warn_unused_result)) xdes_t* xdes_get_descriptor( /*================*/ @@ -781,7 +787,7 @@ xdes_get_descriptor( or 0 for uncompressed pages */ ulint offset, /*!< in: page offset; if equal to the free limit, we try to add new extents to the space free list */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { buf_block_t* block; fsp_header_t* sp_header; @@ -1159,14 +1165,14 @@ fsp_header_get_tablespace_size(void) Tries to extend a single-table tablespace so that a page would fit in the data file. @return TRUE if success */ -static +static UNIV_COLD __attribute__((nonnull, warn_unused_result)) ibool fsp_try_extend_data_file_with_pages( /*================================*/ ulint space, /*!< in: space */ ulint page_no, /*!< in: page number */ - fsp_header_t* header, /*!< in: space header */ - mtr_t* mtr) /*!< in: mtr */ + fsp_header_t* header, /*!< in/out: space header */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ibool success; ulint actual_size; @@ -1191,7 +1197,7 @@ fsp_try_extend_data_file_with_pages( /***********************************************************************//** Tries to extend the last data file of a tablespace if it is auto-extending. @return FALSE if not auto-extending */ -static +static UNIV_COLD __attribute__((nonnull)) ibool fsp_try_extend_data_file( /*=====================*/ @@ -1201,8 +1207,8 @@ fsp_try_extend_data_file( the actual file size rounded down to megabyte */ ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in: space header */ - mtr_t* mtr) /*!< in: mtr */ + fsp_header_t* header, /*!< in/out: space header */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint size; ulint zip_size; @@ -1338,7 +1344,7 @@ fsp_fill_free_list( then we do not allocate more extents */ ulint space, /*!< in: space */ fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in: mtr */ + mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint limit; ulint size; @@ -1537,9 +1543,46 @@ fsp_alloc_free_extent( } /**********************************************************************//** +Allocates a single free page from a space. */ +static __attribute__((nonnull)) +void +fsp_alloc_from_free_frag( +/*=====================*/ + fsp_header_t* header, /*!< in/out: tablespace header */ + xdes_t* descr, /*!< in/out: extent descriptor */ + ulint bit, /*!< in: slot to allocate in the extent */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint frag_n_used; + + ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); + ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr)); + xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr); + + /* Update the FRAG_N_USED field */ + frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, + mtr); + frag_n_used++; + mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, + mtr); + if (xdes_is_full(descr, mtr)) { + /* The fragment is full: move it to another list */ + flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, + mtr); + xdes_set_state(descr, XDES_FULL_FRAG, mtr); + + flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, + mtr); + mlog_write_ulint(header + FSP_FRAG_N_USED, + frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, + mtr); + } +} + +/**********************************************************************//** Allocates a single free page from a space. The page is marked as used. @return the page offset, FIL_NULL if no page could be allocated */ -static +static __attribute__((nonnull, warn_unused_result)) ulint fsp_alloc_free_page( /*================*/ @@ -1547,19 +1590,22 @@ fsp_alloc_free_page( ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ ulint hint, /*!< in: hint of which page would be desirable */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mini-transaction in which the + page should be initialized + (may be the same as mtr) */ { fsp_header_t* header; fil_addr_t first; xdes_t* descr; buf_block_t* block; ulint free; - ulint frag_n_used; ulint page_no; ulint space_size; ibool success; ut_ad(mtr); + ut_ad(init_mtr); header = fsp_get_space_header(space, zip_size, mtr); @@ -1641,38 +1687,19 @@ fsp_alloc_free_page( } } - xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr); - - /* Update the FRAG_N_USED field */ - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - frag_n_used++; - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, - mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FULL_FRAG, mtr); - - flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, - mtr); - } + fsp_alloc_from_free_frag(header, descr, free, mtr); /* Initialize the allocated page to the buffer pool, so that it can be obtained immediately with buf_page_get without need for a disk read. */ - buf_page_create(space, page_no, zip_size, mtr); + buf_page_create(space, page_no, zip_size, init_mtr); - block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); + block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, init_mtr); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); /* Prior contents of the page should be ignored */ - fsp_init_file_page(block, mtr); + fsp_init_file_page(block, init_mtr); return(page_no); } @@ -1908,7 +1935,7 @@ fsp_alloc_seg_inode_page( zip_size = dict_table_flags_to_zip_size( mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - page_no = fsp_alloc_free_page(space, zip_size, 0, mtr); + page_no = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr); if (page_no == FIL_NULL) { @@ -2320,7 +2347,7 @@ fseg_create_general( if (page == 0) { page = fseg_alloc_free_page_low(space, zip_size, - inode, 0, FSP_UP, mtr); + inode, 0, FSP_UP, mtr, mtr); if (page == FIL_NULL) { @@ -2569,14 +2596,19 @@ fseg_alloc_free_page_low( ulint space, /*!< in: space */ ulint zip_size,/*!< in: compressed page size in bytes or 0 for uncompressed pages */ - fseg_inode_t* seg_inode, /*!< in: segment inode */ + fseg_inode_t* seg_inode, /*!< in/out: segment inode */ ulint hint, /*!< in: hint of which page would be desirable */ byte direction, /*!< in: if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mini-transaction in which the + page should be initialized + (may be the same as mtr), or NULL if it + should not be initialized (the page at hint + was previously freed in mtr) */ { fsp_header_t* space_header; ulint space_size; @@ -2587,7 +2619,6 @@ fseg_alloc_free_page_low( ulint ret_page; /*!< the allocated page offset, FIL_NULL if could not be allocated */ xdes_t* ret_descr; /*!< the extent of the allocated page */ - ibool frag_page_allocated = FALSE; ibool success; ulint n; @@ -2609,6 +2640,8 @@ fseg_alloc_free_page_low( if (descr == NULL) { /* Hint outside space or too high above free limit: reset hint */ + ut_a(init_mtr); + /* The file space header page is always allocated. */ hint = 0; descr = xdes_get_descriptor(space, zip_size, hint, mtr); } @@ -2619,15 +2652,20 @@ fseg_alloc_free_page_low( && mach_read_from_8(descr + XDES_ID) == seg_id && (xdes_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { - +take_hinted_page: /* 1. We can take the hinted page =================================*/ ret_descr = descr; ret_page = hint; + /* Skip the check for extending the tablespace. If the + page hint were not within the size of the tablespace, + we would have got (descr == NULL) above and reset the hint. */ + goto got_hinted_page; /*-----------------------------------------------------------*/ - } else if ((xdes_get_state(descr, mtr) == XDES_FREE) - && ((reserved - used) < reserved / FSEG_FILLFACTOR) - && (used >= FSEG_FRAG_LIMIT)) { + } else if (xdes_get_state(descr, mtr) == XDES_FREE + && (!init_mtr + || ((reserved - used < reserved / FSEG_FILLFACTOR) + && used >= FSEG_FRAG_LIMIT))) { /* 2. We allocate the free extent from space and can take ========================================================= @@ -2645,8 +2683,20 @@ fseg_alloc_free_page_low( /* Try to fill the segment free list */ fseg_fill_free_list(seg_inode, space, zip_size, hint + FSP_EXTENT_SIZE, mtr); - ret_page = hint; + goto take_hinted_page; /*-----------------------------------------------------------*/ + } else if (!init_mtr) { + ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); + fsp_alloc_from_free_frag(space_header, descr, + hint % FSP_EXTENT_SIZE, mtr); + ret_page = hint; + ret_descr = NULL; + + /* Put the page in the fragment page array of the segment */ + n = fseg_find_free_frag_page_slot(seg_inode, mtr); + ut_a(n != FIL_NULL); + fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr); + goto got_hinted_page; } else if ((direction != FSP_NO_DIR) && ((reserved - used) < reserved / FSEG_FILLFACTOR) && (used >= FSEG_FRAG_LIMIT) @@ -2705,11 +2755,10 @@ fseg_alloc_free_page_low( } else if (used < FSEG_FRAG_LIMIT) { /* 6. We allocate an individual page from the space ===================================================*/ - ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr); + ret_page = fsp_alloc_free_page(space, zip_size, hint, + mtr, init_mtr); ret_descr = NULL; - frag_page_allocated = TRUE; - if (ret_page != FIL_NULL) { /* Put the page in the fragment page array of the segment */ @@ -2719,6 +2768,10 @@ fseg_alloc_free_page_low( fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr); } + + /* fsp_alloc_free_page() invoked fsp_init_file_page() + already. */ + return(ret_page); /*-----------------------------------------------------------*/ } else { /* 7. We allocate a new extent and take its first page @@ -2766,26 +2819,34 @@ fseg_alloc_free_page_low( } } - if (!frag_page_allocated) { +got_hinted_page: + { /* Initialize the allocated page to buffer pool, so that it can be obtained immediately with buf_page_get without need for a disk read */ buf_block_t* block; ulint zip_size = dict_table_flags_to_zip_size( mach_read_from_4(FSP_SPACE_FLAGS + space_header)); + mtr_t* block_mtr = init_mtr ? init_mtr : mtr; - block = buf_page_create(space, ret_page, zip_size, mtr); + block = buf_page_create(space, ret_page, zip_size, block_mtr); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size, ret_page, RW_X_LATCH, - mtr))) { + block_mtr))) { ut_error; } - /* The prior contents of the page should be ignored */ - fsp_init_file_page(block, mtr); + if (init_mtr) { + /* The prior contents of the page should be ignored */ + fsp_init_file_page(block, init_mtr); + } + } + /* ret_descr == NULL if the block was allocated from free_frag + (XDES_FREE_FRAG) */ + if (ret_descr != NULL) { /* At this point we know the extent and the page offset. The extent is still in the appropriate list (FSEG_NOT_FULL or FSEG_FREE), and the page is not yet marked as used. */ @@ -2798,8 +2859,6 @@ fseg_alloc_free_page_low( fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr); } - buf_reset_check_index_page_at_flush(space, ret_page); - return(ret_page); } @@ -2812,7 +2871,7 @@ UNIV_INTERN ulint fseg_alloc_free_page_general( /*=========================*/ - fseg_header_t* seg_header,/*!< in: segment header */ + fseg_header_t* seg_header,/*!< in/out: segment header */ ulint hint, /*!< in: hint of which page would be desirable */ byte direction,/*!< in: if the new page is needed because of an index page split, and records are @@ -2824,7 +2883,11 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr) /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction handle */ + mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + in which the page should be initialized, + or NULL if this is a "fake allocation" of + a page that was previously freed in mtr */ { fseg_inode_t* inode; ulint space; @@ -2866,7 +2929,8 @@ fseg_alloc_free_page_general( } page_no = fseg_alloc_free_page_low(space, zip_size, - inode, hint, direction, mtr); + inode, hint, direction, + mtr, init_mtr); if (!has_done_reservation) { fil_space_release_free_extents(space, n_reserved); } @@ -2875,28 +2939,6 @@ fseg_alloc_free_page_general( } /**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. -@return allocated page offset, FIL_NULL if no page could be allocated */ -UNIV_INTERN -ulint -fseg_alloc_free_page( -/*=================*/ - fseg_header_t* seg_header,/*!< in: segment header */ - ulint hint, /*!< in: hint of which page would be desirable */ - byte direction,/*!< in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /*!< in: mtr handle */ -{ - return(fseg_alloc_free_page_general(seg_header, hint, direction, - FALSE, mtr)); -} - -/**********************************************************************//** Checks that we have at least 2 frag pages free in the first extent of a single-table tablespace, and they are also physically initialized to the data file. That is we have already extended the data file so that those pages are === modified file 'storage/innobase/include/btr0btr.h' --- a/storage/innobase/include/btr0btr.h 2011-08-15 09:18:34 +0000 +++ b/storage/innobase/include/btr0btr.h 2011-08-29 08:22:43 +0000 @@ -568,7 +568,12 @@ btr_page_alloc( page split is made */ ulint level, /*!< in: level where the page is placed in the tree */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr, /*!< in/out: mini-transaction + for the allocation */ + mtr_t* init_mtr) /*!< in/out: mini-transaction + for x-latching and initializing + the page */ + __attribute__((nonnull, warn_unused_result)); /**************************************************************//** Frees a file page used in an index tree. NOTE: cannot free field external storage pages because the page must contain info on its level. */ @@ -591,6 +596,33 @@ btr_page_free_low( buf_block_t* block, /*!< in: block to be freed, x-latched */ ulint level, /*!< in: page level */ mtr_t* mtr); /*!< in: mtr */ +/**************************************************************//** +Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free. +For invoking btr_store_big_rec_extern_fields() after an update, +we must temporarily mark freed clustered index pages allocated, so +that off-page columns will not be allocated from them. Between the +btr_store_big_rec_extern_fields() and mtr_commit() we have to +mark the pages free again, so that no pages will be leaked. */ +UNIV_INTERN +void +btr_mark_freed_leaves( +/*==================*/ + dict_index_t* index, /*!< in/out: clustered index */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */ + UNIV_COLD __attribute__((nonnull)); +#ifdef UNIV_DEBUG +/**************************************************************//** +Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF. +@see btr_mark_freed_leaves() +@return TRUE */ +UNIV_INTERN +ibool +btr_freed_leaves_validate( +/*======================*/ + mtr_t* mtr) /*!< in: mini-transaction */ + __attribute__((nonnull, warn_unused_result)); +#endif /* UNIV_DEBUG */ #ifdef UNIV_BTR_PRINT /*************************************************************//** Prints size info of a B-tree. */ === modified file 'storage/innobase/include/btr0cur.h' --- a/storage/innobase/include/btr0cur.h 2011-06-16 09:07:49 +0000 +++ b/storage/innobase/include/btr0cur.h 2011-08-29 08:22:43 +0000 @@ -327,16 +327,6 @@ btr_cur_pessimistic_update( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in: mtr; must be committed before latching any further pages */ -/***************************************************************** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ -UNIV_INTERN -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /*!< in: cursor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - UNIV_COLD __attribute__((nonnull)); /***********************************************************//** Marks a clustered index record deleted. Writes an undo log record to undo log on this delete marking. Writes in the trx id field the id @@ -528,6 +518,8 @@ btr_store_big_rec_extern_fields_func( the "external storage" flags in offsets will not correspond to rec when this function returns */ + const big_rec_t*big_rec_vec, /*!< in: vector containing fields + to be stored externally */ #ifdef UNIV_DEBUG mtr_t* local_mtr, /*!< in: mtr containing the latch to rec and to the tree */ @@ -536,9 +528,12 @@ btr_store_big_rec_extern_fields_func( ibool update_in_place,/*! in: TRUE if the record is updated in place (not delete+insert) */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - const big_rec_t*big_rec_vec) /*!< in: vector containing fields - to be stored externally */ - __attribute__((nonnull)); + mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; + in an update, local_mtr for + allocating BLOB pages and + updating BLOB pointers; alloc_mtr + must not have freed any leaf pages */ + __attribute__((nonnull(1,2,3,4,5), warn_unused_result)); /** Stores the fields in big_rec_vec to the tablespace and puts pointers to them in rec. The extern flags in rec will have to be set beforehand. @@ -547,21 +542,22 @@ file segment of the index tree. @param index in: clustered index; MUST be X-latched by mtr @param b in/out: block containing rec; MUST be X-latched by mtr @param rec in/out: clustered index record -@param offsets in: rec_get_offsets(rec, index); +@param offs in: rec_get_offsets(rec, index); the "external storage" flags in offsets will not be adjusted +@param big in: vector containing fields to be stored externally @param mtr in: mini-transaction that holds x-latch on index and b @param upd in: TRUE if the record is updated in place (not delete+insert) -@param big in: vector containing fields to be stored externally +@param rmtr in/out: in updates, the mini-transaction that holds rec @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ #ifdef UNIV_DEBUG -# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big) +# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,mtr,upd,rmtr) #elif defined UNIV_BLOB_LIGHT_DEBUG -# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big) +# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,upd,rmtr) #else -# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \ - btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big) +# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ + btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,rmtr) #endif /*******************************************************************//** === modified file 'storage/innobase/include/buf0buf.h' --- a/storage/innobase/include/buf0buf.h 2011-07-19 14:54:59 +0000 +++ b/storage/innobase/include/buf0buf.h 2011-08-29 08:22:43 +0000 @@ -491,15 +491,6 @@ buf_page_peek( /*==========*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: page number */ -/********************************************************************//** -Resets the check_index_page_at_flush field of a page if found in the buffer -pool. */ -UNIV_INTERN -void -buf_reset_check_index_page_at_flush( -/*================================*/ - ulint space, /*!< in: space id */ - ulint offset);/*!< in: page number */ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. === modified file 'storage/innobase/include/dict0dict.h' --- a/storage/innobase/include/dict0dict.h 2011-08-17 01:07:59 +0000 +++ b/storage/innobase/include/dict0dict.h 2011-08-29 09:44:28 +0000 @@ -1298,7 +1298,8 @@ UNIV_INTERN void dict_set_corrupted_index_cache_only( /*================================*/ - dict_index_t* index); /*!< in/out: index */ + dict_index_t* index, /*!< in/out: index */ + dict_table_t* table); /*!< in/out: table */ /**********************************************************************//** Flags a table with specified space_id corrupted in the table dictionary === modified file 'storage/innobase/include/fsp0fsp.h' --- a/storage/innobase/include/fsp0fsp.h 2010-06-22 15:58:28 +0000 +++ b/storage/innobase/include/fsp0fsp.h 2011-08-29 08:22:43 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -176,19 +176,18 @@ fseg_n_reserved_pages( Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space fragmentation. -@return the allocated page offset FIL_NULL if no page could be allocated */ -UNIV_INTERN -ulint -fseg_alloc_free_page( -/*=================*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint hint, /*!< in: hint of which page would be desirable */ - byte direction, /*!< in: if the new page is needed because +@param[in/out] seg_header segment header +@param[in] hint hint of which page would be desirable +@param[in] direction if the new page is needed because of an index page split, and records are inserted there in order, into which direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /*!< in: mtr handle */ + FSP_UP, FSP_NO_DIR +@param[in/out] mtr mini-transaction +@return the allocated page offset FIL_NULL if no page could be allocated */ +#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \ + fseg_alloc_free_page_general(seg_header, hint, direction, \ + FALSE, mtr, mtr) /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize file space @@ -198,7 +197,7 @@ UNIV_INTERN ulint fseg_alloc_free_page_general( /*=========================*/ - fseg_header_t* seg_header,/*!< in: segment header */ + fseg_header_t* seg_header,/*!< in/out: segment header */ ulint hint, /*!< in: hint of which page would be desirable */ byte direction,/*!< in: if the new page is needed because of an index page split, and records are @@ -210,7 +209,12 @@ fseg_alloc_free_page_general( with fsp_reserve_free_extents, then there is no need to do the check for this individual page */ - mtr_t* mtr); /*!< in: mtr handle */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction + in which the page should be initialized, + or NULL if this is a "fake allocation" of + a page that was previously freed in mtr */ + __attribute__((warn_unused_result, nonnull(1,5))); /**********************************************************************//** Reserves free pages from a tablespace. All mini-transactions which may use several pages from the tablespace should call this function beforehand === modified file 'storage/innobase/include/mtr0mtr.h' --- a/storage/innobase/include/mtr0mtr.h 2011-04-15 13:46:11 +0000 +++ b/storage/innobase/include/mtr0mtr.h 2011-08-29 08:22:43 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -53,6 +53,8 @@ first 3 values must be RW_S_LATCH, RW_X_ #define MTR_MEMO_MODIFY 54 #define MTR_MEMO_S_LOCK 55 #define MTR_MEMO_X_LOCK 56 +/** The mini-transaction freed a clustered index leaf page. */ +#define MTR_MEMO_FREE_CLUST_LEAF 57 /** @name Log item types The log items are declared 'byte' so that the compiler can warn if val @@ -378,11 +380,14 @@ struct mtr_struct{ #endif dyn_array_t memo; /*!< memo stack for locks etc. */ dyn_array_t log; /*!< mini-transaction log */ - ibool inside_ibuf; + unsigned inside_ibuf:1; /*!< TRUE if inside ibuf changes */ - ibool modifications; - /* TRUE if the mtr made modifications to - buffer pool pages */ + unsigned modifications:1; + /*!< TRUE if the mini-transaction + modified buffer pool pages */ + unsigned freed_clust_leaf:1; + /*!< TRUE if MTR_MEMO_FREE_CLUST_LEAF + was logged in the mini-transaction */ ulint n_log_recs; /* count of how many page initial log records have been written to the mtr log */ === modified file 'storage/innobase/include/mtr0mtr.ic' --- a/storage/innobase/include/mtr0mtr.ic 2011-05-31 07:55:29 +0000 +++ b/storage/innobase/include/mtr0mtr.ic 2011-08-29 08:22:43 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -43,8 +43,9 @@ mtr_start( dyn_array_create(&(mtr->log)); mtr->log_mode = MTR_LOG_ALL; - mtr->modifications = FALSE; mtr->inside_ibuf = FALSE; + mtr->modifications = FALSE; + mtr->freed_clust_leaf = FALSE; mtr->n_log_recs = 0; ut_d(mtr->state = MTR_ACTIVE); @@ -66,7 +67,8 @@ mtr_memo_push( ut_ad(object); ut_ad(type >= MTR_MEMO_PAGE_S_FIX); - ut_ad(type <= MTR_MEMO_X_LOCK); + ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF); + ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf); ut_ad(mtr); ut_ad(mtr->magic_n == MTR_MAGIC_N); ut_ad(mtr->state == MTR_ACTIVE); === modified file 'storage/innobase/include/sync0sync.ic' --- a/storage/innobase/include/sync0sync.ic 2010-07-15 13:47:50 +0000 +++ b/storage/innobase/include/sync0sync.ic 2011-08-29 08:29:57 +0000 @@ -272,11 +272,10 @@ pfs_mutex_enter_nowait_func( ulint ret; struct PSI_mutex_locker* locker = NULL; PSI_mutex_locker_state state; - int result = 0; if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { locker = PSI_server->get_thread_mutex_locker( - &state, mutex->pfs_psi, PSI_MUTEX_LOCK); + &state, mutex->pfs_psi, PSI_MUTEX_TRYLOCK); if (locker) { PSI_server->start_mutex_wait(locker, file_name, line); } @@ -285,7 +284,7 @@ pfs_mutex_enter_nowait_func( ret = mutex_enter_nowait_func(mutex, file_name, line); if (locker) { - PSI_server->end_mutex_wait(locker, result); + PSI_server->end_mutex_wait(locker, ret); } return(ret); === modified file 'storage/innobase/mtr/mtr0mtr.c' --- a/storage/innobase/mtr/mtr0mtr.c 2011-05-31 07:55:29 +0000 +++ b/storage/innobase/mtr/mtr0mtr.c 2011-08-29 08:22:43 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -64,12 +64,11 @@ mtr_memo_slot_release( buf_page_release((buf_block_t*)object, type); } else if (type == MTR_MEMO_S_LOCK) { rw_lock_s_unlock((rw_lock_t*)object); -#ifdef UNIV_DEBUG } else if (type != MTR_MEMO_X_LOCK) { - ut_ad(type == MTR_MEMO_MODIFY); + ut_ad(type == MTR_MEMO_MODIFY + || type == MTR_MEMO_FREE_CLUST_LEAF); ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX)); -#endif /* UNIV_DEBUG */ } else { rw_lock_x_unlock((rw_lock_t*)object); } === modified file 'storage/innobase/row/row0ins.c' --- a/storage/innobase/row/row0ins.c 2011-08-17 01:07:59 +0000 +++ b/storage/innobase/row/row0ins.c 2011-08-29 08:22:43 +0000 @@ -2088,15 +2088,20 @@ row_ins_index_entry_low( if (big_rec) { ut_a(err == DB_SUCCESS); /* Write out the externally stored - columns while still x-latching - index->lock and block->lock. We have - to mtr_commit(mtr) first, so that the - redo log will be written in the - correct order. Otherwise, we would run - into trouble on crash recovery if mtr - freed B-tree pages on which some of - the big_rec fields will be written. */ - btr_cur_mtr_commit_and_start(&cursor, &mtr); + columns, but allocate the pages and + write the pointers using the + mini-transaction of the record update. + If any pages were freed in the update, + temporarily mark them allocated so + that off-page columns will not + overwrite them. We must do this, + because we will write the redo log for + the BLOB writes before writing the + redo log for the record update. Thus, + redo log application at crash recovery + will see BLOBs being written to free pages. */ + + btr_mark_freed_leaves(index, &mtr, TRUE); rec = btr_cur_get_rec(&cursor); offsets = rec_get_offsets( @@ -2105,7 +2110,8 @@ row_ins_index_entry_low( err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, &mtr, FALSE, big_rec); + rec, offsets, big_rec, &mtr, + FALSE, &mtr); /* If writing big_rec fails (for example, because of DB_OUT_OF_FILE_SPACE), the record will be corrupted. Even if @@ -2118,6 +2124,9 @@ row_ins_index_entry_low( undo log, and thus the record cannot be rolled back. */ ut_a(err == DB_SUCCESS); + /* Free the pages again + in order to avoid a leak. */ + btr_mark_freed_leaves(index, &mtr, FALSE); goto stored_big_rec; } } else { @@ -2159,7 +2168,7 @@ function_exit: err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(&cursor), - rec, offsets, &mtr, FALSE, big_rec); + rec, offsets, big_rec, &mtr, FALSE, NULL); stored_big_rec: if (modify) { @@ -2434,7 +2443,7 @@ row_ins( node->index = dict_table_get_next_index(node->index); node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); - /* Skip corrupted secondar index and its entry */ + /* Skip corrupted secondary index and its entry */ while (node->index && dict_index_is_corrupted(node->index)) { node->index = dict_table_get_next_index(node->index); === modified file 'storage/innobase/row/row0row.c' --- a/storage/innobase/row/row0row.c 2011-07-08 13:16:23 +0000 +++ b/storage/innobase/row/row0row.c 2011-08-29 08:22:43 +0000 @@ -243,19 +243,20 @@ row_build( } #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - /* This condition can occur during crash recovery before - trx_rollback_active() has completed execution. + if (rec_offs_any_null_extern(rec, offsets)) { + /* This condition can occur during crash recovery + before trx_rollback_active() has completed execution. - This condition is possible if the server crashed - during an insert or update before - btr_store_big_rec_extern_fields() did mtr_commit() all - BLOB pointers to the clustered index record. - - If the record contains a null BLOB pointer, look up the - transaction that holds the implicit lock on this record, and - assert that it was recovered (and will soon be rolled back). */ - ut_a(!rec_offs_any_null_extern(rec, offsets) - || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets))); + This condition is possible if the server crashed + during an insert or update-by-delete-and-insert before + btr_store_big_rec_extern_fields() did mtr_commit() all + BLOB pointers to the freshly inserted clustered index + record. */ + ut_a(trx_assert_recovered( + row_get_rec_trx_id(rec, index, offsets))); + ut_a(trx_undo_roll_ptr_is_insert( + row_get_rec_roll_ptr(rec, index, offsets))); + } #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ if (type != ROW_COPY_POINTERS) { === modified file 'storage/innobase/row/row0upd.c' --- a/storage/innobase/row/row0upd.c 2011-08-17 01:07:59 +0000 +++ b/storage/innobase/row/row0upd.c 2011-08-29 08:22:43 +0000 @@ -2008,21 +2008,22 @@ row_upd_clust_rec( rec_offs_init(offsets_); ut_a(err == DB_SUCCESS); - /* Write out the externally stored columns while still - x-latching index->lock and block->lock. We have to - mtr_commit(mtr) first, so that the redo log will be - written in the correct order. Otherwise, we would run - into trouble on crash recovery if mtr freed B-tree - pages on which some of the big_rec fields will be - written. */ - btr_cur_mtr_commit_and_start(btr_cur, mtr); + /* Write out the externally stored columns, but + allocate the pages and write the pointers using the + mini-transaction of the record update. If any pages + were freed in the update, temporarily mark them + allocated so that off-page columns will not overwrite + them. We must do this, because we write the redo log + for the BLOB writes before writing the redo log for + the record update. */ + btr_mark_freed_leaves(index, mtr, TRUE); rec = btr_cur_get_rec(btr_cur); err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - mtr, TRUE, big_rec); + big_rec, mtr, TRUE, mtr); /* If writing big_rec fails (for example, because of DB_OUT_OF_FILE_SPACE), the record will be corrupted. Even if we did not update any externally stored @@ -2032,6 +2033,8 @@ row_upd_clust_rec( to the undo log, and thus the record cannot be rolled back. */ ut_a(err == DB_SUCCESS); + /* Free the pages again in order to avoid a leak. */ + btr_mark_freed_leaves(index, mtr, FALSE); } mtr_commit(mtr); === modified file 'storage/innobase/sync/sync0sync.c' --- a/storage/innobase/sync/sync0sync.c 2011-08-15 09:18:34 +0000 +++ b/storage/innobase/sync/sync0sync.c 2011-08-22 14:12:27 +0000 @@ -1329,7 +1329,13 @@ sync_thread_add_level( TRUE)); break; case SYNC_IBUF_TREE_NODE_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); + /* ibuf_add_free_page() allocates new pages for the + change buffer while only holding the tablespace + x-latch. These pre-allocated new pages may only be + taken in use while holding ibuf_mutex, in + btr_page_alloc_for_ibuf(). */ + ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX) + || sync_thread_levels_contain(array, SYNC_FSP)); break; case SYNC_IBUF_INDEX_TREE: if (sync_thread_levels_contain(array, SYNC_FSP)) { === modified file 'storage/innobase/trx/trx0undo.c' --- a/storage/innobase/trx/trx0undo.c 2011-06-14 05:47:33 +0000 +++ b/storage/innobase/trx/trx0undo.c 2011-08-29 08:22:43 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -918,7 +918,7 @@ trx_undo_add_page( page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, undo->top_page_no + 1, FSP_UP, - TRUE, mtr); + TRUE, mtr, mtr); fil_space_release_free_extents(undo->space, n_reserved); === modified file 'strings/decimal.c' --- a/strings/decimal.c 2011-07-18 07:47:39 +0000 +++ b/strings/decimal.c 2011-08-29 09:34:48 +0000 @@ -1403,11 +1403,18 @@ int bin2decimal(const uchar *from, decim buf++; } my_afree(d_copy); + + /* + No digits? We have read the number zero, of unspecified precision. + Make it a proper zero, with non-zero precision. + */ + if (to->intg == 0 && to->frac == 0) + decimal_make_zero(to); return error; err: my_afree(d_copy); - decimal_make_zero(((decimal_t*) to)); + decimal_make_zero(to); return(E_DEC_BAD_NUM); } No bundle (reason: useless for push emails).