List:Commits« Previous MessageNext Message »
From:Bjorn Munch Date:August 30 2011 10:55am
Subject:bzr push into mysql-5.5-mtr branch (bjorn.munch:3239 to 3244)
View as plain text  
 3244 Bjorn Munch	2011-08-30 [merge]
      null upmerge

 3243 Bjorn Munch	2011-08-30 [merge]
      new merge from 5.5

    modified:
      mysql-test/r/group_by.result
      mysql-test/t/group_by.test
      mysql-test/valgrind.supp
      sql/filesort.cc
 3242 Bjorn Munch	2011-08-29 [merge]
      null upmerge

 3241 Bjorn Munch	2011-08-29 [merge]
      merge from 5.5 main

    modified:
      extra/innochecksum.c
      include/decimal.h
      mysql-test/r/type_newdecimal.result
      mysql-test/t/type_newdecimal.test
      mysql-test/valgrind.supp
      sql/filesort.cc
      sql/my_decimal.h
      storage/innobase/btr/btr0btr.c
      storage/innobase/btr/btr0cur.c
      storage/innobase/buf/buf0buf.c
      storage/innobase/dict/dict0dict.c
      storage/innobase/dict/dict0load.c
      storage/innobase/fsp/fsp0fsp.c
      storage/innobase/include/btr0btr.h
      storage/innobase/include/btr0cur.h
      storage/innobase/include/buf0buf.h
      storage/innobase/include/dict0dict.h
      storage/innobase/include/fsp0fsp.h
      storage/innobase/include/mtr0mtr.h
      storage/innobase/include/mtr0mtr.ic
      storage/innobase/include/sync0sync.ic
      storage/innobase/mtr/mtr0mtr.c
      storage/innobase/row/row0ins.c
      storage/innobase/row/row0row.c
      storage/innobase/row/row0upd.c
      storage/innobase/sync/sync0sync.c
      storage/innobase/trx/trx0undo.c
      strings/decimal.c
 3240 Bjorn Munch	2011-08-24
      MTR: small fix to read_plugin_defs for RPM builds:
           Also look into directories lib64 in addition to lib

    modified:
      mysql-test/mysql-test-run.pl
 3239 Bjorn Munch	2011-08-22
      Bug #12793170 MYSQLTEST: PROVIDE ACCESS TO ERROR NAMES THROUGH NUMERIC
      CODES AND VICE VERSA
        Followup: Some statement may give errors not in the list,
        map these to "<Unknown>" rather than failing.

    modified:
      client/mysqltest.cc
=== modified file 'extra/innochecksum.c'
--- a/extra/innochecksum.c	2011-07-03 23:48:19 +0000
+++ b/extra/innochecksum.c	2011-08-22 14:12:27 +0000
@@ -25,12 +25,7 @@
   Published with a permission.
 */
 
-/* needed to have access to 64 bit file functions */
-#define _LARGEFILE_SOURCE
-#define _LARGEFILE64_SOURCE
-
-#define _XOPEN_SOURCE 500 /* needed to include getopt.h on some platforms. */
-
+#include <my_global.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
@@ -53,7 +48,6 @@
 /* another argument to specify page ranges... seek to right spot and go from there */
 
 typedef unsigned long int ulint;
-typedef unsigned char uchar;
 
 /* innodb function in name; modified slightly to not have the ASM version (lots of #ifs that didn't apply) */
 ulint mach_read_from_4(uchar *b)

=== modified file 'include/decimal.h'
--- a/include/decimal.h	2011-06-30 15:46:53 +0000
+++ b/include/decimal.h	2011-08-29 09:34:48 +0000
@@ -21,6 +21,15 @@ typedef enum
   decimal_round_mode;
 typedef int32 decimal_digit_t;
 
+/**
+    intg is the number of *decimal* digits (NOT number of decimal_digit_t's !)
+         before the point
+    frac is the number of decimal digits after the point
+    len  is the length of buf (length of allocated space) in decimal_digit_t's,
+         not in bytes
+    sign false means positive, true means negative
+    buf  is an array of decimal_digit_t's
+ */
 typedef struct st_decimal_t {
   int    intg, frac, len;
   my_bool sign;

=== modified file 'mysql-test/mysql-test-run.pl'
--- a/mysql-test/mysql-test-run.pl	2011-08-16 09:08:10 +0000
+++ b/mysql-test/mysql-test-run.pl	2011-08-24 12:38:57 +0000
@@ -2132,8 +2132,10 @@ sub find_plugin($$)
   my $lib_plugin=
     mtr_file_exists(vs_config_dirs($location,$plugin_filename),
                     "$basedir/lib/plugin/".$plugin_filename,
+                    "$basedir/lib64/plugin/".$plugin_filename,
                     "$basedir/$location/.libs/".$plugin_filename,
                     "$basedir/lib/mysql/plugin/".$plugin_filename,
+                    "$basedir/lib64/mysql/plugin/".$plugin_filename,
                     );
   return $lib_plugin;
 }

=== modified file 'mysql-test/r/group_by.result'
--- a/mysql-test/r/group_by.result	2011-02-18 10:55:24 +0000
+++ b/mysql-test/r/group_by.result	2011-08-30 08:16:23 +0000
@@ -1892,6 +1892,38 @@ a	AVG(t1.b)	t11c	t12c
 1	4.0000	6	6
 2	2.0000	7	7
 DROP TABLE t1;
+#
+# Bug#11765254 (58200): Assertion failed: param.sort_length when grouping
+# by functions
+#
+SET SQL_BIG_TABLES=1;
+CREATE TABLE t1(a INT);
+INSERT INTO t1 VALUES (0),(0);
+SELECT 1 FROM t1 GROUP BY IF(`a`,'','');
+1
+1
+SELECT 1 FROM t1 GROUP BY TRIM(LEADING RAND() FROM '');
+1
+1
+SELECT 1 FROM t1 GROUP BY SUBSTRING('',SLEEP(0),'');
+1
+1
+Warnings:
+Warning	1292	Truncated incorrect INTEGER value: ''
+Warning	1292	Truncated incorrect INTEGER value: ''
+Warning	1292	Truncated incorrect INTEGER value: ''
+SELECT 1 FROM t1 GROUP BY SUBSTRING(SYSDATE() FROM 'K' FOR 'jxW<');
+1
+1
+Warnings:
+Warning	1292	Truncated incorrect INTEGER value: 'K'
+Warning	1292	Truncated incorrect INTEGER value: 'jxW<'
+Warning	1292	Truncated incorrect INTEGER value: 'K'
+Warning	1292	Truncated incorrect INTEGER value: 'jxW<'
+Warning	1292	Truncated incorrect INTEGER value: 'K'
+Warning	1292	Truncated incorrect INTEGER value: 'jxW<'
+DROP TABLE t1;
+SET SQL_BIG_TABLES=0;
 # End of 5.1 tests
 #
 # Bug#49771: Incorrect MIN (date) when minimum value is 0000-00-00

=== modified file 'mysql-test/r/type_newdecimal.result'
--- a/mysql-test/r/type_newdecimal.result	2011-05-12 03:05:12 +0000
+++ b/mysql-test/r/type_newdecimal.result	2011-08-29 09:34:48 +0000
@@ -1934,3 +1934,14 @@ f1
 0.000000000000000000000000
 DROP TABLE IF EXISTS t1;
 End of 5.1 tests
+#
+# BUG#12911710 - VALGRIND FAILURE IN 
+# ROW-DEBUG:PERFSCHEMA.SOCKET_SUMMARY_BY_INSTANCE_FUNC 
+#
+CREATE TABLE t1(d1 DECIMAL(60,0) NOT NULL,
+d2 DECIMAL(60,0) NOT NULL);
+INSERT INTO t1 (d1, d2) VALUES(0.0, 0.0);
+SELECT d1 * d2 FROM t1;
+d1 * d2
+0
+DROP TABLE t1;

=== modified file 'mysql-test/t/group_by.test'
--- a/mysql-test/t/group_by.test	2011-02-18 10:55:24 +0000
+++ b/mysql-test/t/group_by.test	2011-08-30 08:16:23 +0000
@@ -1284,6 +1284,20 @@ FROM t1 GROUP BY a;
 
 DROP TABLE t1;
 
+--echo #
+--echo # Bug#11765254 (58200): Assertion failed: param.sort_length when grouping
+--echo # by functions
+--echo #
+
+SET SQL_BIG_TABLES=1;
+CREATE TABLE t1(a INT);
+INSERT INTO t1 VALUES (0),(0);
+SELECT 1 FROM t1 GROUP BY IF(`a`,'','');
+SELECT 1 FROM t1 GROUP BY TRIM(LEADING RAND() FROM '');
+SELECT 1 FROM t1 GROUP BY SUBSTRING('',SLEEP(0),'');
+SELECT 1 FROM t1 GROUP BY SUBSTRING(SYSDATE() FROM 'K' FOR 'jxW<');
+DROP TABLE t1;
+SET SQL_BIG_TABLES=0;
 
 --echo # End of 5.1 tests
 

=== modified file 'mysql-test/t/type_newdecimal.test'
--- a/mysql-test/t/type_newdecimal.test	2011-05-12 03:05:12 +0000
+++ b/mysql-test/t/type_newdecimal.test	2011-08-29 09:34:48 +0000
@@ -1535,3 +1535,17 @@ DROP TABLE IF EXISTS t1;
 
 
 --echo End of 5.1 tests
+
+--echo #
+--echo # BUG#12911710 - VALGRIND FAILURE IN 
+--echo # ROW-DEBUG:PERFSCHEMA.SOCKET_SUMMARY_BY_INSTANCE_FUNC 
+--echo #
+
+CREATE TABLE t1(d1 DECIMAL(60,0) NOT NULL,
+                d2 DECIMAL(60,0) NOT NULL);
+
+INSERT INTO t1 (d1, d2) VALUES(0.0, 0.0);
+SELECT d1 * d2 FROM t1;
+
+DROP TABLE t1;
+

=== modified file 'mysql-test/valgrind.supp'
--- a/mysql-test/valgrind.supp	2011-06-30 15:46:53 +0000
+++ b/mysql-test/valgrind.supp	2011-08-30 08:51:23 +0000
@@ -876,3 +876,42 @@
    fun:buf_buddy_free_low
    fun:buf_buddy_free
 }
+
+# Note the wildcard in the (mangled) function signatures of
+# write_keys() and find_all_keys().
+# They both return ha_rows, which is platform dependent.
+{
+   Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / one
+   Memcheck:Param
+   write(buf)
+   obj:*/libpthread*.so
+   fun:my_write
+   fun:inline_mysql_file_write
+   fun:my_b_flush_io_cache
+   fun:_my_b_write
+   fun:_Z*10write_keysP13st_sort_paramPPhjP11st_io_cacheS4_
+   fun:_Z*13find_all_keysP13st_sort_paramP10SQL_SELECTPPhP11st_io_cacheS6_S6_
+   fun:_Z8filesortP3THDP5TABLEP13st_sort_fieldjP10SQL_SELECTybPy
+}
+
+## {
+##    Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / two
+##    Memcheck:Param
+##    write(buf)
+##    obj:*/libpthread*.so
+##    fun:my_write
+##    fun:my_b_flush_io_cache
+##    fun:_Z15merge_many_buffP13st_sort_paramPhP10st_buffpekPjP11st_io_cache
+##    fun:_Z8filesortP3THDP8st_tableP13st_sort_fieldjP10SQL_SELECTybPy
+## }
+
+{
+   Bug#12856915 VALGRIND FAILURE IN FILESORT/CREATE_SORT_INDEX / three
+   Memcheck:Param
+   write(buf)
+   obj:*/libpthread*.so
+   fun:my_write
+   fun:inline_mysql_file_write
+   fun:my_b_flush_io_cache
+   fun:_Z8filesortP3THDP5TABLEP13st_sort_fieldjP10SQL_SELECTybPy
+}

=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc	2011-06-30 15:46:53 +0000
+++ b/sql/filesort.cc	2011-08-30 08:16:23 +0000
@@ -147,8 +147,6 @@ ha_rows filesort(THD *thd, TABLE *table,
   error= 1;
   bzero((char*) &param,sizeof(param));
   param.sort_length= sortlength(thd, sortorder, s_length, &multi_byte_charset);
-  /* filesort cannot handle zero-length records. */
-  DBUG_ASSERT(param.sort_length);
   param.ref_length= table->file->ref_length;
   param.addon_field= 0;
   param.addon_length= 0;
@@ -260,6 +258,9 @@ ha_rows filesort(THD *thd, TABLE *table,
   }
   else
   {
+    /* filesort cannot handle zero-length records during merge. */
+    DBUG_ASSERT(param.sort_length != 0);
+
     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
     {
       my_free(table_sort.buffpek);
@@ -972,21 +973,10 @@ static void make_sortkey(register SORTPA
       if (addonf->null_bit && field->is_null())
       {
         nulls[addonf->null_offset]|= addonf->null_bit;
-#ifdef HAVE_purify
-	bzero(to, addonf->length);
-#endif
       }
       else
       {
-#ifdef HAVE_purify
-        uchar *end= field->pack(to, field->ptr);
-	uint length= (uint) ((to + addonf->length) - end);
-	DBUG_ASSERT((int) length >= 0);
-	if (length)
-	  bzero(end, length);
-#else
         (void) field->pack(to, field->ptr);
-#endif
       }
       to+= addonf->length;
     }

=== modified file 'sql/my_decimal.h'
--- a/sql/my_decimal.h	2011-07-05 23:13:50 +0000
+++ b/sql/my_decimal.h	2011-08-29 09:34:48 +0000
@@ -124,12 +124,8 @@ public:
   {
     len= DECIMAL_BUFF_LENGTH;
     buf= buffer;
-#if !defined (HAVE_purify) && !defined(DBUG_OFF)
-    /* Set buffer to 'random' value to find wrong buffer usage */
-    for (uint i= 0; i < DECIMAL_BUFF_LENGTH; i++)
-      buffer[i]= i;
-#endif
   }
+
   my_decimal()
   {
     init();

=== modified file 'storage/innobase/btr/btr0btr.c'
--- a/storage/innobase/btr/btr0btr.c	2011-08-15 09:18:34 +0000
+++ b/storage/innobase/btr/btr0btr.c	2011-08-29 08:22:43 +0000
@@ -906,28 +906,29 @@ btr_page_alloc_for_ibuf(
 /**************************************************************//**
 Allocates a new file page to be used in an index tree. NOTE: we assume
 that the caller has made the reservation for free extents!
-@return	new allocated block, x-latched; NULL if out of space */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
+@return	allocated page number, FIL_NULL if out of space */
+static __attribute__((nonnull(1,5), warn_unused_result))
+ulint
+btr_page_alloc_low(
+/*===============*/
 	dict_index_t*	index,		/*!< in: index */
 	ulint		hint_page_no,	/*!< in: hint of a good page */
 	byte		file_direction,	/*!< in: direction where a possible
 					page split is made */
 	ulint		level,		/*!< in: level where the page is placed
 					in the tree */
-	mtr_t*		mtr)		/*!< in: mtr */
+	mtr_t*		mtr,		/*!< in/out: mini-transaction
+					for the allocation */
+	mtr_t*		init_mtr)	/*!< in/out: mini-transaction
+					in which the page should be
+					initialized (may be the same
+					as mtr), or NULL if it should
+					not be initialized (the page
+					at hint was previously freed
+					in mtr) */
 {
 	fseg_header_t*	seg_header;
 	page_t*		root;
-	buf_block_t*	new_block;
-	ulint		new_page_no;
-
-	if (dict_index_is_ibuf(index)) {
-
-		return(btr_page_alloc_for_ibuf(index, mtr));
-	}
 
 	root = btr_root_get(index, mtr);
 
@@ -941,8 +942,42 @@ btr_page_alloc(
 	reservation for free extents, and thus we know that a page can
 	be allocated: */
 
-	new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
-						   file_direction, TRUE, mtr);
+	return(fseg_alloc_free_page_general(
+		       seg_header, hint_page_no, file_direction,
+		       TRUE, mtr, init_mtr));
+}
+
+/**************************************************************//**
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents!
+@return	new allocated block, x-latched; NULL if out of space */
+UNIV_INTERN
+buf_block_t*
+btr_page_alloc(
+/*===========*/
+	dict_index_t*	index,		/*!< in: index */
+	ulint		hint_page_no,	/*!< in: hint of a good page */
+	byte		file_direction,	/*!< in: direction where a possible
+					page split is made */
+	ulint		level,		/*!< in: level where the page is placed
+					in the tree */
+	mtr_t*		mtr,		/*!< in/out: mini-transaction
+					for the allocation */
+	mtr_t*		init_mtr)	/*!< in/out: mini-transaction
+					for x-latching and initializing
+					the page */
+{
+	buf_block_t*	new_block;
+	ulint		new_page_no;
+
+	if (dict_index_is_ibuf(index)) {
+
+		return(btr_page_alloc_for_ibuf(index, mtr));
+	}
+
+	new_page_no = btr_page_alloc_low(
+		index, hint_page_no, file_direction, level, mtr, init_mtr);
+
 	if (new_page_no == FIL_NULL) {
 
 		return(NULL);
@@ -950,9 +985,16 @@ btr_page_alloc(
 
 	new_block = buf_page_get(dict_index_get_space(index),
 				 dict_table_zip_size(index->table),
-				 new_page_no, RW_X_LATCH, mtr);
+				 new_page_no, RW_X_LATCH, init_mtr);
 	buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
 
+	if (mtr->freed_clust_leaf) {
+		mtr_memo_release(mtr, new_block, MTR_MEMO_FREE_CLUST_LEAF);
+		ut_ad(!mtr_memo_contains(mtr, new_block,
+					 MTR_MEMO_FREE_CLUST_LEAF));
+	}
+
+	ut_ad(btr_freed_leaves_validate(mtr));
 	return(new_block);
 }
 
@@ -1065,6 +1107,15 @@ btr_page_free_low(
 	fseg_free_page(seg_header,
 		       buf_block_get_space(block),
 		       buf_block_get_page_no(block), mtr);
+
+	/* The page was marked free in the allocation bitmap, but it
+	should remain buffer-fixed until mtr_commit(mtr) or until it
+	is explicitly freed from the mini-transaction. */
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* TODO: Discard any operations on the page from the redo log
+	and remove the block from the flush list and the buffer pool.
+	This would free up buffer pool earlier and reduce writes to
+	both the tablespace and the redo log. */
 }
 
 /**************************************************************//**
@@ -1078,12 +1129,139 @@ btr_page_free(
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	ulint		level;
-
-	level = btr_page_get_level(buf_block_get_frame(block), mtr);
+	const page_t*	page	= buf_block_get_frame(block);
+	ulint		level	= btr_page_get_level(page, mtr);
 
+	ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
 	btr_page_free_low(index, block, level, mtr);
+
+	/* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+	if (level == 0 && dict_index_is_clust(index)) {
+		/* We may have to call btr_mark_freed_leaves() to
+		temporarily mark the block nonfree for invoking
+		btr_store_big_rec_extern_fields_func() after an
+		update. Remember that the block was freed. */
+		mtr->freed_clust_leaf = TRUE;
+		mtr_memo_push(mtr, block, MTR_MEMO_FREE_CLUST_LEAF);
+	}
+
+	ut_ad(btr_freed_leaves_validate(mtr));
+}
+
+/**************************************************************//**
+Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
+For invoking btr_store_big_rec_extern_fields() after an update,
+we must temporarily mark freed clustered index pages allocated, so
+that off-page columns will not be allocated from them. Between the
+btr_store_big_rec_extern_fields() and mtr_commit() we have to
+mark the pages free again, so that no pages will be leaked. */
+UNIV_INTERN
+void
+btr_mark_freed_leaves(
+/*==================*/
+	dict_index_t*	index,	/*!< in/out: clustered index */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool		nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
+{
+	/* This is loosely based on mtr_memo_release(). */
+
+	ulint	offset;
+
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
+	if (!mtr->freed_clust_leaf) {
+		return;
+	}
+
+	offset = dyn_array_get_data_size(&mtr->memo);
+
+	while (offset > 0) {
+		mtr_memo_slot_t*	slot;
+		buf_block_t*		block;
+
+		offset -= sizeof *slot;
+
+		slot = dyn_array_get_element(&mtr->memo, offset);
+
+		if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
+			continue;
+		}
+
+		/* Because btr_page_alloc() does invoke
+		mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
+		blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
+		memo must still be clustered index leaf tree pages. */
+		block = slot->object;
+		ut_a(buf_block_get_space(block)
+		     == dict_index_get_space(index));
+		ut_a(fil_page_get_type(buf_block_get_frame(block))
+		     == FIL_PAGE_INDEX);
+		ut_a(page_is_leaf(buf_block_get_frame(block)));
+
+		if (nonfree) {
+			/* Allocate the same page again. */
+			ulint	page_no;
+			page_no = btr_page_alloc_low(
+				index, buf_block_get_page_no(block),
+				FSP_NO_DIR, 0, mtr, NULL);
+			ut_a(page_no == buf_block_get_page_no(block));
+		} else {
+			/* Assert that the page is allocated and free it. */
+			btr_page_free_low(index, block, 0, mtr);
+		}
+	}
+
+	ut_ad(btr_freed_leaves_validate(mtr));
+}
+
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
+@see btr_mark_freed_leaves()
+@return TRUE */
+UNIV_INTERN
+ibool
+btr_freed_leaves_validate(
+/*======================*/
+	mtr_t*	mtr)	/*!< in: mini-transaction */
+{
+	ulint	offset;
+
+	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
+	offset = dyn_array_get_data_size(&mtr->memo);
+
+	while (offset > 0) {
+		const mtr_memo_slot_t*	slot;
+		const buf_block_t*	block;
+
+		offset -= sizeof *slot;
+
+		slot = dyn_array_get_element(&mtr->memo, offset);
+
+		if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
+			continue;
+		}
+
+		ut_a(mtr->freed_clust_leaf);
+		/* Because btr_page_alloc() does invoke
+		mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
+		blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
+		memo must still be clustered index leaf tree pages. */
+		block = slot->object;
+		ut_a(fil_page_get_type(buf_block_get_frame(block))
+		     == FIL_PAGE_INDEX);
+		ut_a(page_is_leaf(buf_block_get_frame(block)));
+	}
+
+	return(TRUE);
 }
+#endif /* UNIV_DEBUG */
 
 /**************************************************************//**
 Sets the child node file address in a node pointer. */
@@ -1809,7 +1987,7 @@ btr_root_raise_and_insert(
 
 	level = btr_page_get_level(root, mtr);
 
-	new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
+	new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr);
 	new_page = buf_block_get_frame(new_block);
 	new_page_zip = buf_block_get_page_zip(new_block);
 	ut_a(!new_page_zip == !root_page_zip);
@@ -2545,7 +2723,7 @@ func_start:
 
 	/* 2. Allocate a new page to the index */
 	new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
-				   btr_page_get_level(page, mtr), mtr);
+				   btr_page_get_level(page, mtr), mtr, mtr);
 	new_page = buf_block_get_frame(new_block);
 	new_page_zip = buf_block_get_page_zip(new_block);
 	btr_page_create(new_block, new_page_zip, cursor->index,

=== modified file 'storage/innobase/btr/btr0cur.c'
--- a/storage/innobase/btr/btr0cur.c	2011-08-15 09:18:34 +0000
+++ b/storage/innobase/btr/btr0cur.c	2011-08-29 08:22:43 +0000
@@ -2532,39 +2532,6 @@ return_after_reservations:
 	return(err);
 }
 
-/**************************************************************//**
-Commits and restarts a mini-transaction so that it will retain an
-x-lock on index->lock and the cursor page. */
-UNIV_INTERN
-void
-btr_cur_mtr_commit_and_start(
-/*=========================*/
-	btr_cur_t*	cursor,	/*!< in: cursor */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-{
-	buf_block_t*	block;
-
-	block = btr_cur_get_block(cursor);
-
-	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
-				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	/* Keep the locks across the mtr_commit(mtr). */
-	rw_lock_x_lock(dict_index_get_lock(cursor->index));
-	rw_lock_x_lock(&block->lock);
-	mutex_enter(&block->mutex);
-	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
-	mutex_exit(&block->mutex);
-	/* Write out the redo log. */
-	mtr_commit(mtr);
-	mtr_start(mtr);
-	/* Reassociate the locks with the mini-transaction.
-	They will be released on mtr_commit(mtr). */
-	mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
-		      MTR_MEMO_X_LOCK);
-	mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
-}
-
 /*==================== B-TREE DELETE MARK AND UNMARK ===============*/
 
 /****************************************************************//**
@@ -4190,6 +4157,9 @@ btr_store_big_rec_extern_fields_func(
 					the "external storage" flags in offsets
 					will not correspond to rec when
 					this function returns */
+	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
+					to be stored externally */
+
 #ifdef UNIV_DEBUG
 	mtr_t*		local_mtr,	/*!< in: mtr containing the
 					latch to rec and to the tree */
@@ -4198,9 +4168,11 @@ btr_store_big_rec_extern_fields_func(
 	ibool		update_in_place,/*! in: TRUE if the record is updated
 					in place (not delete+insert) */
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-	const big_rec_t*big_rec_vec)	/*!< in: vector containing fields
-					to be stored externally */
-
+	mtr_t*		alloc_mtr)	/*!< in/out: in an insert, NULL;
+					in an update, local_mtr for
+					allocating BLOB pages and
+					updating BLOB pointers; alloc_mtr
+					must not have freed any leaf pages */
 {
 	ulint	rec_page_no;
 	byte*	field_ref;
@@ -4219,6 +4191,9 @@ btr_store_big_rec_extern_fields_func(
 
 	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(rec_offs_any_extern(offsets));
+	ut_ad(local_mtr);
+	ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
+	ut_ad(!update_in_place || alloc_mtr);
 	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
 				MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
@@ -4234,6 +4209,25 @@ btr_store_big_rec_extern_fields_func(
 	rec_page_no = buf_block_get_page_no(rec_block);
 	ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
 
+	if (alloc_mtr) {
+		/* Because alloc_mtr will be committed after
+		mtr, it is possible that the tablespace has been
+		extended when the B-tree record was updated or
+		inserted, or it will be extended while allocating
+		pages for big_rec.
+
+		TODO: In mtr (not alloc_mtr), write a redo log record
+		about extending the tablespace to its current size,
+		and remember the current size. Whenever the tablespace
+		grows as pages are allocated, write further redo log
+		records to mtr. (Currently tablespace extension is not
+		covered by the redo log. If it were, the record would
+		only be written to alloc_mtr, which is committed after
+		mtr.) */
+	} else {
+		alloc_mtr = &mtr;
+	}
+
 	if (UNIV_LIKELY_NULL(page_zip)) {
 		int	err;
 
@@ -4310,7 +4304,7 @@ btr_store_big_rec_extern_fields_func(
 			}
 
 			block = btr_page_alloc(index, hint_page_no,
-					       FSP_NO_DIR, 0, &mtr);
+					       FSP_NO_DIR, 0, alloc_mtr, &mtr);
 			if (UNIV_UNLIKELY(block == NULL)) {
 
 				mtr_commit(&mtr);
@@ -4437,11 +4431,15 @@ btr_store_big_rec_extern_fields_func(
 					goto next_zip_page;
 				}
 
-				rec_block = buf_page_get(space_id, zip_size,
-							 rec_page_no,
-							 RW_X_LATCH, &mtr);
-				buf_block_dbg_add_level(rec_block,
-							SYNC_NO_ORDER_CHECK);
+				if (alloc_mtr == &mtr) {
+					rec_block = buf_page_get(
+						space_id, zip_size,
+						rec_page_no,
+						RW_X_LATCH, &mtr);
+					buf_block_dbg_add_level(
+						rec_block,
+						SYNC_NO_ORDER_CHECK);
+				}
 
 				if (err == Z_STREAM_END) {
 					mach_write_to_4(field_ref
@@ -4475,7 +4473,8 @@ btr_store_big_rec_extern_fields_func(
 
 				page_zip_write_blob_ptr(
 					page_zip, rec, index, offsets,
-					big_rec_vec->fields[i].field_no, &mtr);
+					big_rec_vec->fields[i].field_no,
+					alloc_mtr);
 
 next_zip_page:
 				prev_page_no = page_no;
@@ -4520,19 +4519,23 @@ next_zip_page:
 
 				extern_len -= store_len;
 
-				rec_block = buf_page_get(space_id, zip_size,
-							 rec_page_no,
-							 RW_X_LATCH, &mtr);
-				buf_block_dbg_add_level(rec_block,
-							SYNC_NO_ORDER_CHECK);
+				if (alloc_mtr == &mtr) {
+					rec_block = buf_page_get(
+						space_id, zip_size,
+						rec_page_no,
+						RW_X_LATCH, &mtr);
+					buf_block_dbg_add_level(
+						rec_block,
+						SYNC_NO_ORDER_CHECK);
+				}
 
 				mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
-						 MLOG_4BYTES, &mtr);
+						 MLOG_4BYTES, alloc_mtr);
 				mlog_write_ulint(field_ref
 						 + BTR_EXTERN_LEN + 4,
 						 big_rec_vec->fields[i].len
 						 - extern_len,
-						 MLOG_4BYTES, &mtr);
+						 MLOG_4BYTES, alloc_mtr);
 
 				if (prev_page_no == FIL_NULL) {
 					btr_blob_dbg_add_blob(
@@ -4542,18 +4545,19 @@ next_zip_page:
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_SPACE_ID,
-							 space_id,
-							 MLOG_4BYTES, &mtr);
+							 space_id, MLOG_4BYTES,
+							 alloc_mtr);
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_PAGE_NO,
-							 page_no,
-							 MLOG_4BYTES, &mtr);
+							 page_no, MLOG_4BYTES,
+							 alloc_mtr);
 
 					mlog_write_ulint(field_ref
 							 + BTR_EXTERN_OFFSET,
 							 FIL_PAGE_DATA,
-							 MLOG_4BYTES, &mtr);
+							 MLOG_4BYTES,
+							 alloc_mtr);
 				}
 
 				prev_page_no = page_no;

=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c	2011-08-17 03:51:40 +0000
+++ b/storage/innobase/buf/buf0buf.c	2011-08-29 08:22:43 +0000
@@ -1716,31 +1716,6 @@ buf_page_set_accessed_make_young(
 }
 
 /********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset)	/*!< in: page number */
-{
-	buf_block_t*	block;
-	buf_pool_t*	buf_pool = buf_pool_get(space, offset);
-
-	buf_pool_mutex_enter(buf_pool);
-
-	block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
-	if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
-		ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
-		block->check_index_page_at_flush = FALSE;
-	}
-
-	buf_pool_mutex_exit(buf_pool);
-}
-
-/********************************************************************//**
 Returns the current state of is_hashed of a page. FALSE if the page is
 not in the pool. NOTE that this operation does not fix the page in the
 pool if it is found there.

=== modified file 'storage/innobase/dict/dict0dict.c'
--- a/storage/innobase/dict/dict0dict.c	2011-08-17 01:07:59 +0000
+++ b/storage/innobase/dict/dict0dict.c	2011-08-29 09:44:28 +0000
@@ -5206,7 +5206,8 @@ UNIV_INTERN
 void
 dict_set_corrupted_index_cache_only(
 /*================================*/
-	dict_index_t*	index)		/*!< in/out: index */
+	dict_index_t*	index,		/*!< in/out: index */
+	dict_table_t*	table)		/*!< in/out: table */
 {
 	ut_ad(index);
 	ut_ad(mutex_own(&dict_sys->mutex));
@@ -5216,7 +5217,14 @@ dict_set_corrupted_index_cache_only(
 	/* Mark the table as corrupted only if the clustered index
 	is corrupted */
 	if (dict_index_is_clust(index)) {
-		index->table->corrupted = TRUE;
+		dict_table_t*	corrupt_table;
+
+		corrupt_table = table ? table : index->table;
+		ut_ad(!index->table || !table || index->table  == table);
+
+		if (corrupt_table) {
+			corrupt_table->corrupted = TRUE;
+		}
 	}
 
 	index->type |= DICT_CORRUPT;

=== modified file 'storage/innobase/dict/dict0load.c'
--- a/storage/innobase/dict/dict0load.c	2011-08-17 01:07:59 +0000
+++ b/storage/innobase/dict/dict0load.c	2011-08-29 09:44:28 +0000
@@ -1497,7 +1497,8 @@ dict_load_indexes(
 				dictionary cache for such metadata corruption,
 				since we would always be able to set it
 				when loading the dictionary cache */
-				dict_set_corrupted_index_cache_only(index);
+				dict_set_corrupted_index_cache_only(
+					index, table);
 
 				fprintf(stderr,
 					"InnoDB: Index is corrupt but forcing"

=== modified file 'storage/innobase/fsp/fsp0fsp.c'
--- a/storage/innobase/fsp/fsp0fsp.c	2011-02-02 13:58:01 +0000
+++ b/storage/innobase/fsp/fsp0fsp.c	2011-08-29 08:22:43 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -311,8 +311,9 @@ fsp_fill_free_list(
 					descriptor page and ibuf bitmap page;
 					then we do not allocate more extents */
 	ulint		space,		/*!< in: space */
-	fsp_header_t*	header,		/*!< in: space header */
-	mtr_t*		mtr);		/*!< in: mtr */
+	fsp_header_t*	header,		/*!< in/out: space header */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+	UNIV_COLD __attribute__((nonnull));
 /**********************************************************************//**
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
@@ -325,14 +326,20 @@ fseg_alloc_free_page_low(
 	ulint		space,	/*!< in: space */
 	ulint		zip_size,/*!< in: compressed page size in bytes
 				or 0 for uncompressed pages */
-	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	fseg_inode_t*	seg_inode, /*!< in/out: segment inode */
 	ulint		hint,	/*!< in: hint of which page would be desirable */
 	byte		direction, /*!< in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr);	/*!< in: mtr handle */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	mtr_t*		init_mtr)/*!< in/out: mini-transaction in which the
+				page should be initialized
+				(may be the same as mtr), or NULL if it
+				should not be initialized (the page at hint
+				was previously freed in mtr) */
+	__attribute__((warn_unused_result, nonnull(3,6)));
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
@@ -700,17 +707,18 @@ list, if not free limit == space size. T
 descriptor defined, as they are uninitialized above the free limit.
 @return pointer to the extent descriptor, NULL if the page does not
 exist in the space or if the offset exceeds the free limit */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull, warn_unused_result))
 xdes_t*
 xdes_get_descriptor_with_space_hdr(
 /*===============================*/
-	fsp_header_t*	sp_header,/*!< in/out: space header, x-latched */
-	ulint		space,	/*!< in: space id */
-	ulint		offset,	/*!< in: page offset;
-				if equal to the free limit,
-				we try to add new extents to
-				the space free list */
-	mtr_t*		mtr)	/*!< in: mtr handle */
+	fsp_header_t*	sp_header,	/*!< in/out: space header, x-latched
+					in mtr */
+	ulint		space,		/*!< in: space id */
+	ulint		offset,		/*!< in: page offset; if equal
+					to the free limit, we try to
+					add new extents to the space
+					free list */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	ulint	limit;
 	ulint	size;
@@ -718,11 +726,9 @@ xdes_get_descriptor_with_space_hdr(
 	ulint	descr_page_no;
 	page_t*	descr_page;
 
-	ut_ad(mtr);
 	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
 				MTR_MEMO_X_LOCK));
-	ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
-	      || mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
 	ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
 	/* Read free limit and space size */
 	limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
@@ -772,7 +778,7 @@ is necessary to make the descriptor defi
 above the free limit.
 @return pointer to the extent descriptor, NULL if the page does not
 exist in the space or if the offset exceeds the free limit */
-static
+static __attribute__((nonnull, warn_unused_result))
 xdes_t*
 xdes_get_descriptor(
 /*================*/
@@ -781,7 +787,7 @@ xdes_get_descriptor(
 			or 0 for uncompressed pages */
 	ulint	offset,	/*!< in: page offset; if equal to the free limit,
 			we try to add new extents to the space free list */
-	mtr_t*	mtr)	/*!< in: mtr handle */
+	mtr_t*	mtr)	/*!< in/out: mini-transaction */
 {
 	buf_block_t*	block;
 	fsp_header_t*	sp_header;
@@ -1159,14 +1165,14 @@ fsp_header_get_tablespace_size(void)
 Tries to extend a single-table tablespace so that a page would fit in the
 data file.
 @return	TRUE if success */
-static
+static UNIV_COLD __attribute__((nonnull, warn_unused_result))
 ibool
 fsp_try_extend_data_file_with_pages(
 /*================================*/
 	ulint		space,		/*!< in: space */
 	ulint		page_no,	/*!< in: page number */
-	fsp_header_t*	header,		/*!< in: space header */
-	mtr_t*		mtr)		/*!< in: mtr */
+	fsp_header_t*	header,		/*!< in/out: space header */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	ibool	success;
 	ulint	actual_size;
@@ -1191,7 +1197,7 @@ fsp_try_extend_data_file_with_pages(
 /***********************************************************************//**
 Tries to extend the last data file of a tablespace if it is auto-extending.
 @return	FALSE if not auto-extending */
-static
+static UNIV_COLD __attribute__((nonnull))
 ibool
 fsp_try_extend_data_file(
 /*=====================*/
@@ -1201,8 +1207,8 @@ fsp_try_extend_data_file(
 					the actual file size rounded down to
 					megabyte */
 	ulint		space,		/*!< in: space */
-	fsp_header_t*	header,		/*!< in: space header */
-	mtr_t*		mtr)		/*!< in: mtr */
+	fsp_header_t*	header,		/*!< in/out: space header */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	ulint	size;
 	ulint	zip_size;
@@ -1338,7 +1344,7 @@ fsp_fill_free_list(
 					then we do not allocate more extents */
 	ulint		space,		/*!< in: space */
 	fsp_header_t*	header,		/*!< in/out: space header */
-	mtr_t*		mtr)		/*!< in: mtr */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
 {
 	ulint	limit;
 	ulint	size;
@@ -1537,9 +1543,46 @@ fsp_alloc_free_extent(
 }
 
 /**********************************************************************//**
+Allocates a single free page from a space. */
+static __attribute__((nonnull))
+void
+fsp_alloc_from_free_frag(
+/*=====================*/
+	fsp_header_t*	header,	/*!< in/out: tablespace header */
+	xdes_t*		descr,	/*!< in/out: extent descriptor */
+	ulint		bit,	/*!< in: slot to allocate in the extent */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	ulint		frag_n_used;
+
+	ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
+	ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
+	xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
+
+	/* Update the FRAG_N_USED field */
+	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
+				     mtr);
+	frag_n_used++;
+	mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
+			 mtr);
+	if (xdes_is_full(descr, mtr)) {
+		/* The fragment is full: move it to another list */
+		flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
+			    mtr);
+		xdes_set_state(descr, XDES_FULL_FRAG, mtr);
+
+		flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
+			      mtr);
+		mlog_write_ulint(header + FSP_FRAG_N_USED,
+				 frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
+				 mtr);
+	}
+}
+
+/**********************************************************************//**
 Allocates a single free page from a space. The page is marked as used.
 @return	the page offset, FIL_NULL if no page could be allocated */
-static
+static __attribute__((nonnull, warn_unused_result))
 ulint
 fsp_alloc_free_page(
 /*================*/
@@ -1547,19 +1590,22 @@ fsp_alloc_free_page(
 	ulint	zip_size,/*!< in: compressed page size in bytes
 			or 0 for uncompressed pages */
 	ulint	hint,	/*!< in: hint of which page would be desirable */
-	mtr_t*	mtr)	/*!< in: mtr handle */
+	mtr_t*	mtr,	/*!< in/out: mini-transaction */
+	mtr_t*	init_mtr)/*!< in/out: mini-transaction in which the
+			page should be initialized
+			(may be the same as mtr) */
 {
 	fsp_header_t*	header;
 	fil_addr_t	first;
 	xdes_t*		descr;
 	buf_block_t*	block;
 	ulint		free;
-	ulint		frag_n_used;
 	ulint		page_no;
 	ulint		space_size;
 	ibool		success;
 
 	ut_ad(mtr);
+	ut_ad(init_mtr);
 
 	header = fsp_get_space_header(space, zip_size, mtr);
 
@@ -1641,38 +1687,19 @@ fsp_alloc_free_page(
 		}
 	}
 
-	xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
-
-	/* Update the FRAG_N_USED field */
-	frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
-				     mtr);
-	frag_n_used++;
-	mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
-			 mtr);
-	if (xdes_is_full(descr, mtr)) {
-		/* The fragment is full: move it to another list */
-		flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
-			    mtr);
-		xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
-		flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
-			      mtr);
-		mlog_write_ulint(header + FSP_FRAG_N_USED,
-				 frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
-				 mtr);
-	}
+	fsp_alloc_from_free_frag(header, descr, free, mtr);
 
 	/* Initialize the allocated page to the buffer pool, so that it can
 	be obtained immediately with buf_page_get without need for a disk
 	read. */
 
-	buf_page_create(space, page_no, zip_size, mtr);
+	buf_page_create(space, page_no, zip_size, init_mtr);
 
-	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
+	block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, init_mtr);
 	buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 	/* Prior contents of the page should be ignored */
-	fsp_init_file_page(block, mtr);
+	fsp_init_file_page(block, init_mtr);
 
 	return(page_no);
 }
@@ -1908,7 +1935,7 @@ fsp_alloc_seg_inode_page(
 	zip_size = dict_table_flags_to_zip_size(
 		mach_read_from_4(FSP_SPACE_FLAGS + space_header));
 
-	page_no = fsp_alloc_free_page(space, zip_size, 0, mtr);
+	page_no = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr);
 
 	if (page_no == FIL_NULL) {
 
@@ -2320,7 +2347,7 @@ fseg_create_general(
 
 	if (page == 0) {
 		page = fseg_alloc_free_page_low(space, zip_size,
-						inode, 0, FSP_UP, mtr);
+						inode, 0, FSP_UP, mtr, mtr);
 
 		if (page == FIL_NULL) {
 
@@ -2569,14 +2596,19 @@ fseg_alloc_free_page_low(
 	ulint		space,	/*!< in: space */
 	ulint		zip_size,/*!< in: compressed page size in bytes
 				or 0 for uncompressed pages */
-	fseg_inode_t*	seg_inode, /*!< in: segment inode */
+	fseg_inode_t*	seg_inode, /*!< in/out: segment inode */
 	ulint		hint,	/*!< in: hint of which page would be desirable */
 	byte		direction, /*!< in: if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
 				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr)	/*!< in: mtr handle */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	mtr_t*		init_mtr)/*!< in/out: mini-transaction in which the
+				page should be initialized
+				(may be the same as mtr), or NULL if it
+				should not be initialized (the page at hint
+				was previously freed in mtr) */
 {
 	fsp_header_t*	space_header;
 	ulint		space_size;
@@ -2587,7 +2619,6 @@ fseg_alloc_free_page_low(
 	ulint		ret_page;	/*!< the allocated page offset, FIL_NULL
 					if could not be allocated */
 	xdes_t*		ret_descr;	/*!< the extent of the allocated page */
-	ibool		frag_page_allocated = FALSE;
 	ibool		success;
 	ulint		n;
 
@@ -2609,6 +2640,8 @@ fseg_alloc_free_page_low(
 	if (descr == NULL) {
 		/* Hint outside space or too high above free limit: reset
 		hint */
+		ut_a(init_mtr);
+		/* The file space header page is always allocated. */
 		hint = 0;
 		descr = xdes_get_descriptor(space, zip_size, hint, mtr);
 	}
@@ -2619,15 +2652,20 @@ fseg_alloc_free_page_low(
 	    && mach_read_from_8(descr + XDES_ID) == seg_id
 	    && (xdes_get_bit(descr, XDES_FREE_BIT,
 			     hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-
+take_hinted_page:
 		/* 1. We can take the hinted page
 		=================================*/
 		ret_descr = descr;
 		ret_page = hint;
+		/* Skip the check for extending the tablespace. If the
+		page hint were not within the size of the tablespace,
+		we would have got (descr == NULL) above and reset the hint. */
+		goto got_hinted_page;
 		/*-----------------------------------------------------------*/
-	} else if ((xdes_get_state(descr, mtr) == XDES_FREE)
-		   && ((reserved - used) < reserved / FSEG_FILLFACTOR)
-		   && (used >= FSEG_FRAG_LIMIT)) {
+	} else if (xdes_get_state(descr, mtr) == XDES_FREE
+		   && (!init_mtr
+		       || ((reserved - used < reserved / FSEG_FILLFACTOR)
+			   && used >= FSEG_FRAG_LIMIT))) {
 
 		/* 2. We allocate the free extent from space and can take
 		=========================================================
@@ -2645,8 +2683,20 @@ fseg_alloc_free_page_low(
 		/* Try to fill the segment free list */
 		fseg_fill_free_list(seg_inode, space, zip_size,
 				    hint + FSP_EXTENT_SIZE, mtr);
-		ret_page = hint;
+		goto take_hinted_page;
 		/*-----------------------------------------------------------*/
+	} else if (!init_mtr) {
+		ut_a(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
+		fsp_alloc_from_free_frag(space_header, descr,
+					 hint % FSP_EXTENT_SIZE, mtr);
+		ret_page = hint;
+		ret_descr = NULL;
+
+		/* Put the page in the fragment page array of the segment */
+		n = fseg_find_free_frag_page_slot(seg_inode, mtr);
+		ut_a(n != FIL_NULL);
+		fseg_set_nth_frag_page_no(seg_inode, n, ret_page, mtr);
+		goto got_hinted_page;
 	} else if ((direction != FSP_NO_DIR)
 		   && ((reserved - used) < reserved / FSEG_FILLFACTOR)
 		   && (used >= FSEG_FRAG_LIMIT)
@@ -2705,11 +2755,10 @@ fseg_alloc_free_page_low(
 	} else if (used < FSEG_FRAG_LIMIT) {
 		/* 6. We allocate an individual page from the space
 		===================================================*/
-		ret_page = fsp_alloc_free_page(space, zip_size, hint, mtr);
+		ret_page = fsp_alloc_free_page(space, zip_size, hint,
+					       mtr, init_mtr);
 		ret_descr = NULL;
 
-		frag_page_allocated = TRUE;
-
 		if (ret_page != FIL_NULL) {
 			/* Put the page in the fragment page array of the
 			segment */
@@ -2719,6 +2768,10 @@ fseg_alloc_free_page_low(
 			fseg_set_nth_frag_page_no(seg_inode, n, ret_page,
 						  mtr);
 		}
+
+		/* fsp_alloc_free_page() invoked fsp_init_file_page()
+		already. */
+		return(ret_page);
 		/*-----------------------------------------------------------*/
 	} else {
 		/* 7. We allocate a new extent and take its first page
@@ -2766,26 +2819,34 @@ fseg_alloc_free_page_low(
 		}
 	}
 
-	if (!frag_page_allocated) {
+got_hinted_page:
+	{
 		/* Initialize the allocated page to buffer pool, so that it
 		can be obtained immediately with buf_page_get without need
 		for a disk read */
 		buf_block_t*	block;
 		ulint		zip_size = dict_table_flags_to_zip_size(
 			mach_read_from_4(FSP_SPACE_FLAGS + space_header));
+		mtr_t*		block_mtr = init_mtr ? init_mtr : mtr;
 
-		block = buf_page_create(space, ret_page, zip_size, mtr);
+		block = buf_page_create(space, ret_page, zip_size, block_mtr);
 		buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
 
 		if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
 							ret_page, RW_X_LATCH,
-							mtr))) {
+							block_mtr))) {
 			ut_error;
 		}
 
-		/* The prior contents of the page should be ignored */
-		fsp_init_file_page(block, mtr);
+		if (init_mtr) {
+			/* The prior contents of the page should be ignored */
+			fsp_init_file_page(block, init_mtr);
+		}
+	}
 
+	/* ret_descr == NULL if the block was allocated from free_frag
+	(XDES_FREE_FRAG) */
+	if (ret_descr != NULL) {
 		/* At this point we know the extent and the page offset.
 		The extent is still in the appropriate list (FSEG_NOT_FULL
 		or FSEG_FREE), and the page is not yet marked as used. */
@@ -2798,8 +2859,6 @@ fseg_alloc_free_page_low(
 		fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
 	}
 
-	buf_reset_check_index_page_at_flush(space, ret_page);
-
 	return(ret_page);
 }
 
@@ -2812,7 +2871,7 @@ UNIV_INTERN
 ulint
 fseg_alloc_free_page_general(
 /*=========================*/
-	fseg_header_t*	seg_header,/*!< in: segment header */
+	fseg_header_t*	seg_header,/*!< in/out: segment header */
 	ulint		hint,	/*!< in: hint of which page would be desirable */
 	byte		direction,/*!< in: if the new page is needed because
 				of an index page split, and records are
@@ -2824,7 +2883,11 @@ fseg_alloc_free_page_general(
 				with fsp_reserve_free_extents, then there
 				is no need to do the check for this individual
 				page */
-	mtr_t*		mtr)	/*!< in: mtr handle */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction handle */
+	mtr_t*		init_mtr)/*!< in/out: mtr or another mini-transaction
+				in which the page should be initialized,
+				or NULL if this is a "fake allocation" of
+				a page that was previously freed in mtr */
 {
 	fseg_inode_t*	inode;
 	ulint		space;
@@ -2866,7 +2929,8 @@ fseg_alloc_free_page_general(
 	}
 
 	page_no = fseg_alloc_free_page_low(space, zip_size,
-					   inode, hint, direction, mtr);
+					   inode, hint, direction,
+					   mtr, init_mtr);
 	if (!has_done_reservation) {
 		fil_space_release_free_extents(space, n_reserved);
 	}
@@ -2875,28 +2939,6 @@ fseg_alloc_free_page_general(
 }
 
 /**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@return	allocated page offset, FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page(
-/*=================*/
-	fseg_header_t*	seg_header,/*!< in: segment header */
-	ulint		hint,	/*!< in: hint of which page would be desirable */
-	byte		direction,/*!< in: if the new page is needed because
-				of an index page split, and records are
-				inserted there in order, into which
-				direction they go alphabetically: FSP_DOWN,
-				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr)	/*!< in: mtr handle */
-{
-	return(fseg_alloc_free_page_general(seg_header, hint, direction,
-					    FALSE, mtr));
-}
-
-/**********************************************************************//**
 Checks that we have at least 2 frag pages free in the first extent of a
 single-table tablespace, and they are also physically initialized to the data
 file. That is we have already extended the data file so that those pages are

=== modified file 'storage/innobase/include/btr0btr.h'
--- a/storage/innobase/include/btr0btr.h	2011-08-15 09:18:34 +0000
+++ b/storage/innobase/include/btr0btr.h	2011-08-29 08:22:43 +0000
@@ -568,7 +568,12 @@ btr_page_alloc(
 					page split is made */
 	ulint		level,		/*!< in: level where the page is placed
 					in the tree */
-	mtr_t*		mtr);		/*!< in: mtr */
+	mtr_t*		mtr,		/*!< in/out: mini-transaction
+					for the allocation */
+	mtr_t*		init_mtr)	/*!< in/out: mini-transaction
+					for x-latching and initializing
+					the page */
+	__attribute__((nonnull, warn_unused_result));
 /**************************************************************//**
 Frees a file page used in an index tree. NOTE: cannot free field external
 storage pages because the page must contain info on its level. */
@@ -591,6 +596,33 @@ btr_page_free_low(
 	buf_block_t*	block,	/*!< in: block to be freed, x-latched */
 	ulint		level,	/*!< in: page level */
 	mtr_t*		mtr);	/*!< in: mtr */
+/**************************************************************//**
+Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
+For invoking btr_store_big_rec_extern_fields() after an update,
+we must temporarily mark freed clustered index pages allocated, so
+that off-page columns will not be allocated from them. Between the
+btr_store_big_rec_extern_fields() and mtr_commit() we have to
+mark the pages free again, so that no pages will be leaked. */
+UNIV_INTERN
+void
+btr_mark_freed_leaves(
+/*==================*/
+	dict_index_t*	index,	/*!< in/out: clustered index */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	ibool		nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
+	UNIV_COLD __attribute__((nonnull));
+#ifdef UNIV_DEBUG
+/**************************************************************//**
+Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
+@see btr_mark_freed_leaves()
+@return TRUE */
+UNIV_INTERN
+ibool
+btr_freed_leaves_validate(
+/*======================*/
+	mtr_t*	mtr)	/*!< in: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
 #ifdef UNIV_BTR_PRINT
 /*************************************************************//**
 Prints size info of a B-tree. */

=== modified file 'storage/innobase/include/btr0cur.h'
--- a/storage/innobase/include/btr0cur.h	2011-06-16 09:07:49 +0000
+++ b/storage/innobase/include/btr0cur.h	2011-08-29 08:22:43 +0000
@@ -327,16 +327,6 @@ btr_cur_pessimistic_update(
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr);	/*!< in: mtr; must be committed before
 				latching any further pages */
-/*****************************************************************
-Commits and restarts a mini-transaction so that it will retain an
-x-lock on index->lock and the cursor page. */
-UNIV_INTERN
-void
-btr_cur_mtr_commit_and_start(
-/*=========================*/
-	btr_cur_t*	cursor,	/*!< in: cursor */
-	mtr_t*		mtr)	/*!< in/out: mini-transaction */
-	UNIV_COLD __attribute__((nonnull));
 /***********************************************************//**
 Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
@@ -528,6 +518,8 @@ btr_store_big_rec_extern_fields_func(
 					the "external storage" flags in offsets
 					will not correspond to rec when
 					this function returns */
+	const big_rec_t*big_rec_vec,	/*!< in: vector containing fields
+					to be stored externally */
 #ifdef UNIV_DEBUG
 	mtr_t*		local_mtr,	/*!< in: mtr containing the
 					latch to rec and to the tree */
@@ -536,9 +528,12 @@ btr_store_big_rec_extern_fields_func(
 	ibool		update_in_place,/*! in: TRUE if the record is updated
 					in place (not delete+insert) */
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-	const big_rec_t*big_rec_vec)	/*!< in: vector containing fields
-					to be stored externally */
-	__attribute__((nonnull));
+	mtr_t*		alloc_mtr)	/*!< in/out: in an insert, NULL;
+					in an update, local_mtr for
+					allocating BLOB pages and
+					updating BLOB pointers; alloc_mtr
+					must not have freed any leaf pages */
+	__attribute__((nonnull(1,2,3,4,5), warn_unused_result));
 
 /** Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.
@@ -547,21 +542,22 @@ file segment of the index tree.
 @param index	in: clustered index; MUST be X-latched by mtr
 @param b	in/out: block containing rec; MUST be X-latched by mtr
 @param rec	in/out: clustered index record
-@param offsets	in: rec_get_offsets(rec, index);
+@param offs	in: rec_get_offsets(rec, index);
 		the "external storage" flags in offsets will not be adjusted
+@param big	in: vector containing fields to be stored externally
 @param mtr	in: mini-transaction that holds x-latch on index and b
 @param upd	in: TRUE if the record is updated in place (not delete+insert)
-@param big	in: vector containing fields to be stored externally
+@param rmtr	in/out: in updates, the mini-transaction that holds rec
 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 #ifdef UNIV_DEBUG
-# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
-	btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big)
+# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
+	btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,mtr,upd,rmtr)
 #elif defined UNIV_BLOB_LIGHT_DEBUG
-# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
-	btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big)
+# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
+	btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,upd,rmtr)
 #else
-# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
-	btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big)
+# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \
+	btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,rmtr)
 #endif
 
 /*******************************************************************//**

=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h	2011-07-19 14:54:59 +0000
+++ b/storage/innobase/include/buf0buf.h	2011-08-29 08:22:43 +0000
@@ -491,15 +491,6 @@ buf_page_peek(
 /*==========*/
 	ulint	space,	/*!< in: space id */
 	ulint	offset);/*!< in: page number */
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
-	ulint	space,	/*!< in: space id */
-	ulint	offset);/*!< in: page number */
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 /********************************************************************//**
 Sets file_page_was_freed TRUE if the page is found in the buffer pool.

=== modified file 'storage/innobase/include/dict0dict.h'
--- a/storage/innobase/include/dict0dict.h	2011-08-17 01:07:59 +0000
+++ b/storage/innobase/include/dict0dict.h	2011-08-29 09:44:28 +0000
@@ -1298,7 +1298,8 @@ UNIV_INTERN
 void
 dict_set_corrupted_index_cache_only(
 /*================================*/
-	dict_index_t*	index);		/*!< in/out: index */
+	dict_index_t*	index,		/*!< in/out: index */
+	dict_table_t*	table);		/*!< in/out: table */
 
 /**********************************************************************//**
 Flags a table with specified space_id corrupted in the table dictionary

=== modified file 'storage/innobase/include/fsp0fsp.h'
--- a/storage/innobase/include/fsp0fsp.h	2010-06-22 15:58:28 +0000
+++ b/storage/innobase/include/fsp0fsp.h	2011-08-29 08:22:43 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -176,19 +176,18 @@ fseg_n_reserved_pages(
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize
 file space fragmentation.
-@return	the allocated page offset FIL_NULL if no page could be allocated */
-UNIV_INTERN
-ulint
-fseg_alloc_free_page(
-/*=================*/
-	fseg_header_t*	seg_header, /*!< in: segment header */
-	ulint		hint,	/*!< in: hint of which page would be desirable */
-	byte		direction, /*!< in: if the new page is needed because
+@param[in/out] seg_header	segment header
+@param[in] hint			hint of which page would be desirable
+@param[in] direction		if the new page is needed because
 				of an index page split, and records are
 				inserted there in order, into which
 				direction they go alphabetically: FSP_DOWN,
-				FSP_UP, FSP_NO_DIR */
-	mtr_t*		mtr);	/*!< in: mtr handle */
+				FSP_UP, FSP_NO_DIR
+@param[in/out] mtr		mini-transaction
+@return	the allocated page offset FIL_NULL if no page could be allocated */
+#define fseg_alloc_free_page(seg_header, hint, direction, mtr)		\
+	fseg_alloc_free_page_general(seg_header, hint, direction,	\
+				     FALSE, mtr, mtr)
 /**********************************************************************//**
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
@@ -198,7 +197,7 @@ UNIV_INTERN
 ulint
 fseg_alloc_free_page_general(
 /*=========================*/
-	fseg_header_t*	seg_header,/*!< in: segment header */
+	fseg_header_t*	seg_header,/*!< in/out: segment header */
 	ulint		hint,	/*!< in: hint of which page would be desirable */
 	byte		direction,/*!< in: if the new page is needed because
 				of an index page split, and records are
@@ -210,7 +209,12 @@ fseg_alloc_free_page_general(
 				with fsp_reserve_free_extents, then there
 				is no need to do the check for this individual
 				page */
-	mtr_t*		mtr);	/*!< in: mtr handle */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	mtr_t*		init_mtr)/*!< in/out: mtr or another mini-transaction
+				in which the page should be initialized,
+				or NULL if this is a "fake allocation" of
+				a page that was previously freed in mtr */
+	__attribute__((warn_unused_result, nonnull(1,5)));
 /**********************************************************************//**
 Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand

=== modified file 'storage/innobase/include/mtr0mtr.h'
--- a/storage/innobase/include/mtr0mtr.h	2011-04-15 13:46:11 +0000
+++ b/storage/innobase/include/mtr0mtr.h	2011-08-29 08:22:43 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -53,6 +53,8 @@ first 3 values must be RW_S_LATCH, RW_X_
 #define MTR_MEMO_MODIFY		54
 #define	MTR_MEMO_S_LOCK		55
 #define	MTR_MEMO_X_LOCK		56
+/** The mini-transaction freed a clustered index leaf page. */
+#define MTR_MEMO_FREE_CLUST_LEAF	57
 
 /** @name Log item types
 The log items are declared 'byte' so that the compiler can warn if val
@@ -378,11 +380,14 @@ struct mtr_struct{
 #endif
 	dyn_array_t	memo;	/*!< memo stack for locks etc. */
 	dyn_array_t	log;	/*!< mini-transaction log */
-	ibool		inside_ibuf;
+	unsigned	inside_ibuf:1;
 				/*!< TRUE if inside ibuf changes */
-	ibool		modifications;
-				/* TRUE if the mtr made modifications to
-				buffer pool pages */
+	unsigned	modifications:1;
+				/*!< TRUE if the mini-transaction
+				modified buffer pool pages */
+	unsigned	freed_clust_leaf:1;
+				/*!< TRUE if MTR_MEMO_FREE_CLUST_LEAF
+				was logged in the mini-transaction */
 	ulint		n_log_recs;
 				/* count of how many page initial log records
 				have been written to the mtr log */

=== modified file 'storage/innobase/include/mtr0mtr.ic'
--- a/storage/innobase/include/mtr0mtr.ic	2011-05-31 07:55:29 +0000
+++ b/storage/innobase/include/mtr0mtr.ic	2011-08-29 08:22:43 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -43,8 +43,9 @@ mtr_start(
 	dyn_array_create(&(mtr->log));
 
 	mtr->log_mode = MTR_LOG_ALL;
-	mtr->modifications = FALSE;
 	mtr->inside_ibuf = FALSE;
+	mtr->modifications = FALSE;
+	mtr->freed_clust_leaf = FALSE;
 	mtr->n_log_recs = 0;
 
 	ut_d(mtr->state = MTR_ACTIVE);
@@ -66,7 +67,8 @@ mtr_memo_push(
 
 	ut_ad(object);
 	ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
-	ut_ad(type <= MTR_MEMO_X_LOCK);
+	ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF);
+	ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
 	ut_ad(mtr);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
 	ut_ad(mtr->state == MTR_ACTIVE);

=== modified file 'storage/innobase/include/sync0sync.ic'
--- a/storage/innobase/include/sync0sync.ic	2010-07-15 13:47:50 +0000
+++ b/storage/innobase/include/sync0sync.ic	2011-08-29 08:29:57 +0000
@@ -272,11 +272,10 @@ pfs_mutex_enter_nowait_func(
 	ulint	ret;
 	struct PSI_mutex_locker*	locker = NULL;
 	PSI_mutex_locker_state		state;
-	int	result = 0;
 
 	if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
 		locker = PSI_server->get_thread_mutex_locker(
-				&state, mutex->pfs_psi, PSI_MUTEX_LOCK);
+				&state, mutex->pfs_psi, PSI_MUTEX_TRYLOCK);
 		if (locker) {
 			PSI_server->start_mutex_wait(locker, file_name, line);
 		}
@@ -285,7 +284,7 @@ pfs_mutex_enter_nowait_func(
 	ret = mutex_enter_nowait_func(mutex, file_name, line);
 
 	if (locker) {
-		PSI_server->end_mutex_wait(locker, result);
+		PSI_server->end_mutex_wait(locker, ret);
 	}
 
 	return(ret);

=== modified file 'storage/innobase/mtr/mtr0mtr.c'
--- a/storage/innobase/mtr/mtr0mtr.c	2011-05-31 07:55:29 +0000
+++ b/storage/innobase/mtr/mtr0mtr.c	2011-08-29 08:22:43 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -64,12 +64,11 @@ mtr_memo_slot_release(
 			buf_page_release((buf_block_t*)object, type);
 		} else if (type == MTR_MEMO_S_LOCK) {
 			rw_lock_s_unlock((rw_lock_t*)object);
-#ifdef UNIV_DEBUG
 		} else if (type != MTR_MEMO_X_LOCK) {
-			ut_ad(type == MTR_MEMO_MODIFY);
+			ut_ad(type == MTR_MEMO_MODIFY
+			      || type == MTR_MEMO_FREE_CLUST_LEAF);
 			ut_ad(mtr_memo_contains(mtr, object,
 						MTR_MEMO_PAGE_X_FIX));
-#endif /* UNIV_DEBUG */
 		} else {
 			rw_lock_x_unlock((rw_lock_t*)object);
 		}

=== modified file 'storage/innobase/row/row0ins.c'
--- a/storage/innobase/row/row0ins.c	2011-08-17 01:07:59 +0000
+++ b/storage/innobase/row/row0ins.c	2011-08-29 08:22:43 +0000
@@ -2088,15 +2088,20 @@ row_ins_index_entry_low(
 			if (big_rec) {
 				ut_a(err == DB_SUCCESS);
 				/* Write out the externally stored
-				columns while still x-latching
-				index->lock and block->lock. We have
-				to mtr_commit(mtr) first, so that the
-				redo log will be written in the
-				correct order. Otherwise, we would run
-				into trouble on crash recovery if mtr
-				freed B-tree pages on which some of
-				the big_rec fields will be written. */
-				btr_cur_mtr_commit_and_start(&cursor, &mtr);
+				columns, but allocate the pages and
+				write the pointers using the
+				mini-transaction of the record update.
+				If any pages were freed in the update,
+				temporarily mark them allocated so
+				that off-page columns will not
+				overwrite them. We must do this,
+				because we will write the redo log for
+				the BLOB writes before writing the
+				redo log for the record update. Thus,
+				redo log application at crash recovery
+				will see BLOBs being written to free pages. */
+
+				btr_mark_freed_leaves(index, &mtr, TRUE);
 
 				rec = btr_cur_get_rec(&cursor);
 				offsets = rec_get_offsets(
@@ -2105,7 +2110,8 @@ row_ins_index_entry_low(
 
 				err = btr_store_big_rec_extern_fields(
 					index, btr_cur_get_block(&cursor),
-					rec, offsets, &mtr, FALSE, big_rec);
+					rec, offsets, big_rec, &mtr,
+					FALSE, &mtr);
 				/* If writing big_rec fails (for
 				example, because of DB_OUT_OF_FILE_SPACE),
 				the record will be corrupted. Even if
@@ -2118,6 +2124,9 @@ row_ins_index_entry_low(
 				undo log, and thus the record cannot
 				be rolled back. */
 				ut_a(err == DB_SUCCESS);
+				/* Free the pages again
+				in order to avoid a leak. */
+				btr_mark_freed_leaves(index, &mtr, FALSE);
 				goto stored_big_rec;
 			}
 		} else {
@@ -2159,7 +2168,7 @@ function_exit:
 
 		err = btr_store_big_rec_extern_fields(
 			index, btr_cur_get_block(&cursor),
-			rec, offsets, &mtr, FALSE, big_rec);
+			rec, offsets, big_rec, &mtr, FALSE, NULL);
 
 stored_big_rec:
 		if (modify) {
@@ -2434,7 +2443,7 @@ row_ins(
 		node->index = dict_table_get_next_index(node->index);
 		node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
 
-		/* Skip corrupted secondar index and its entry */
+		/* Skip corrupted secondary index and its entry */
 		while (node->index && dict_index_is_corrupted(node->index)) {
 
 			node->index = dict_table_get_next_index(node->index);

=== modified file 'storage/innobase/row/row0row.c'
--- a/storage/innobase/row/row0row.c	2011-07-08 13:16:23 +0000
+++ b/storage/innobase/row/row0row.c	2011-08-29 08:22:43 +0000
@@ -243,19 +243,20 @@ row_build(
 	}
 
 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-	/* This condition can occur during crash recovery before
-	trx_rollback_active() has completed execution.
+	if (rec_offs_any_null_extern(rec, offsets)) {
+		/* This condition can occur during crash recovery
+		before trx_rollback_active() has completed execution.
 
-	This condition is possible if the server crashed
-	during an insert or update before
-	btr_store_big_rec_extern_fields() did mtr_commit() all
-	BLOB pointers to the clustered index record.
-
-	If the record contains a null BLOB pointer, look up the
-	transaction that holds the implicit lock on this record, and
-	assert that it was recovered (and will soon be rolled back). */
-	ut_a(!rec_offs_any_null_extern(rec, offsets)
-	     || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets)));
+		This condition is possible if the server crashed
+		during an insert or update-by-delete-and-insert before
+		btr_store_big_rec_extern_fields() did mtr_commit() all
+		BLOB pointers to the freshly inserted clustered index
+		record. */
+		ut_a(trx_assert_recovered(
+			     row_get_rec_trx_id(rec, index, offsets)));
+		ut_a(trx_undo_roll_ptr_is_insert(
+			     row_get_rec_roll_ptr(rec, index, offsets)));
+	}
 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 	if (type != ROW_COPY_POINTERS) {

=== modified file 'storage/innobase/row/row0upd.c'
--- a/storage/innobase/row/row0upd.c	2011-08-17 01:07:59 +0000
+++ b/storage/innobase/row/row0upd.c	2011-08-29 08:22:43 +0000
@@ -2008,21 +2008,22 @@ row_upd_clust_rec(
 		rec_offs_init(offsets_);
 
 		ut_a(err == DB_SUCCESS);
-		/* Write out the externally stored columns while still
-		x-latching index->lock and block->lock. We have to
-		mtr_commit(mtr) first, so that the redo log will be
-		written in the correct order. Otherwise, we would run
-		into trouble on crash recovery if mtr freed B-tree
-		pages on which some of the big_rec fields will be
-		written. */
-		btr_cur_mtr_commit_and_start(btr_cur, mtr);
+		/* Write out the externally stored columns, but
+		allocate the pages and write the pointers using the
+		mini-transaction of the record update. If any pages
+		were freed in the update, temporarily mark them
+		allocated so that off-page columns will not overwrite
+		them. We must do this, because we write the redo log
+		for the BLOB writes before writing the redo log for
+		the record update. */
 
+		btr_mark_freed_leaves(index, mtr, TRUE);
 		rec = btr_cur_get_rec(btr_cur);
 		err = btr_store_big_rec_extern_fields(
 			index, btr_cur_get_block(btr_cur), rec,
 			rec_get_offsets(rec, index, offsets_,
 					ULINT_UNDEFINED, &heap),
-			mtr, TRUE, big_rec);
+			big_rec, mtr, TRUE, mtr);
 		/* If writing big_rec fails (for example, because of
 		DB_OUT_OF_FILE_SPACE), the record will be corrupted.
 		Even if we did not update any externally stored
@@ -2032,6 +2033,8 @@ row_upd_clust_rec(
 		to the undo log, and thus the record cannot be rolled
 		back. */
 		ut_a(err == DB_SUCCESS);
+		/* Free the pages again in order to avoid a leak. */
+		btr_mark_freed_leaves(index, mtr, FALSE);
 	}
 
 	mtr_commit(mtr);

=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c	2011-08-15 09:18:34 +0000
+++ b/storage/innobase/sync/sync0sync.c	2011-08-22 14:12:27 +0000
@@ -1329,7 +1329,13 @@ sync_thread_add_level(
 					     TRUE));
 		break;
 	case SYNC_IBUF_TREE_NODE_NEW:
-		ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+		/* ibuf_add_free_page() allocates new pages for the
+		change buffer while only holding the tablespace
+		x-latch. These pre-allocated new pages may only be
+		taken in use while holding ibuf_mutex, in
+		btr_page_alloc_for_ibuf(). */
+		ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+		     || sync_thread_levels_contain(array, SYNC_FSP));
 		break;
 	case SYNC_IBUF_INDEX_TREE:
 		if (sync_thread_levels_contain(array, SYNC_FSP)) {

=== modified file 'storage/innobase/trx/trx0undo.c'
--- a/storage/innobase/trx/trx0undo.c	2011-06-14 05:47:33 +0000
+++ b/storage/innobase/trx/trx0undo.c	2011-08-29 08:22:43 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -918,7 +918,7 @@ trx_undo_add_page(
 	page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
 					       + TRX_UNDO_FSEG_HEADER,
 					       undo->top_page_no + 1, FSP_UP,
-					       TRUE, mtr);
+					       TRUE, mtr, mtr);
 
 	fil_space_release_free_extents(undo->space, n_reserved);
 

=== modified file 'strings/decimal.c'
--- a/strings/decimal.c	2011-07-18 07:47:39 +0000
+++ b/strings/decimal.c	2011-08-29 09:34:48 +0000
@@ -1403,11 +1403,18 @@ int bin2decimal(const uchar *from, decim
     buf++;
   }
   my_afree(d_copy);
+
+  /*
+    No digits? We have read the number zero, of unspecified precision.
+    Make it a proper zero, with non-zero precision.
+  */
+  if (to->intg == 0 && to->frac == 0)
+    decimal_make_zero(to);
   return error;
 
 err:
   my_afree(d_copy);
-  decimal_make_zero(((decimal_t*) to));
+  decimal_make_zero(to);
   return(E_DEC_BAD_NUM);
 }
 

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.5-mtr branch (bjorn.munch:3239 to 3244) Bjorn Munch30 Aug