List:Commits« Previous MessageNext Message »
From:marko.makela Date:June 16 2011 9:30am
Subject:bzr push into mysql-5.5 branch (marko.makela:3454 to 3455)
View as plain text  
 3455 Marko Mäkelä	2011-06-16 [merge]
      Merge mysql-5.1 to mysql-5.5.

    modified:
      storage/innobase/btr/btr0btr.c
      storage/innobase/btr/btr0cur.c
      storage/innobase/include/btr0btr.h
      storage/innobase/include/btr0cur.h
      storage/innobase/include/btr0cur.ic
      storage/innobase/include/buf0buf.h
      storage/innobase/include/buf0buf.ic
      storage/innobase/include/page0cur.ic
      storage/innobase/include/page0page.h
      storage/innobase/include/page0page.ic
      storage/innobase/include/rem0rec.h
      storage/innobase/include/rem0rec.ic
      storage/innobase/include/sync0rw.ic
      storage/innobase/include/sync0sync.h
      storage/innobase/include/univ.i
      storage/innobase/page/page0cur.c
      storage/innobase/page/page0page.c
      storage/innobase/row/row0ins.c
      storage/innobase/row/row0row.c
      storage/innobase/row/row0upd.c
      storage/innobase/row/row0vers.c
      storage/innobase/sync/sync0rw.c
      storage/innobase/sync/sync0sync.c
      storage/innobase/trx/trx0rec.c
 3454 Jorgen Loland	2011-06-16
      BUG#11882110: UPDATE REPORTS ER_KEY_NOT_FOUND IF TABLE IS 
                    UPDATED TWICE
      
      For multi update it is not allowed to update a column
      of a table if that table is accessed through multiple aliases
      and either
      1) the updated column is used as partitioning key
      2) the updated column is part of the primary key 
         and the primary key is clustered
      
      This check is done in unsafe_key_update().
      
      The bug was that for case 2), it was checked whether
      updated_column_number == table_share->primary_key 
      However, the primary_key variable is the index number of the 
      primary key, not a column number.
      
      Prior to this bugfix, the first column was wrongly believed to be
      the primary key. The columns covered by an index is found in
      table->key_info[idx_number]->key_part. The bugfix is to check if
      any of the columns in the keyparts of the primary key are
      updated.
      
      The user-visible effect is that for storage engines with
      clustered primary key (e.g. InnoDB but not MyISAM) queries
      like 
      "UPDATE t1 AS A JOIN t2 AS B SET A.primkey=..."
      will now error with 
      "ERROR HY000: Primary key/partition key update is not allowed 
      since the table is updated both as 'A' and 'B'." 
      instead of 
      "ERROR 1032 (HY000): Can't find record in 't1_tb'"
      even if primkey is not the first column in the table. This 
      was the intended behavior of bugfix 11764529.
     @ mysql-test/r/multi_update.result
        Add test for bug#11882110
     @ mysql-test/r/multi_update_innodb.result
        Add test for bug#11882110
     @ mysql-test/t/multi_update.test
        Add test for bug#11882110
     @ mysql-test/t/multi_update_innodb.test
        Add test for bug#11882110
     @ sql/sql_update.cc
        unsafe_key_update() wrongly checked if the primary key index
        number was the same as updated column number. Now it is checked
        whether any of the columns making up the primary key is updated.
     @ sql/table.h
        Fix comment on TABLE_SHARE::primary_key. Incorrect comment
        was introduced by an earlier merge conflict (as per dlenev)

    modified:
      mysql-test/r/multi_update.result
      mysql-test/r/multi_update_innodb.result
      mysql-test/t/multi_update.test
      mysql-test/t/multi_update_innodb.test
      sql/sql_update.cc
      sql/table.h
=== modified file 'storage/innobase/btr/btr0btr.c'
--- a/storage/innobase/btr/btr0btr.c	revid:jorgen.loland@oracle.com-20110616062400-637j8qymf3vxdb41
+++ b/storage/innobase/btr/btr0btr.c	revid:marko.makela@stripped16090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -2275,7 +2275,7 @@ btr_attach_half_pages(
 /*==================*/
 	dict_index_t*	index,		/*!< in: the index tree */
 	buf_block_t*	block,		/*!< in/out: page to be split */
-	rec_t*		split_rec,	/*!< in: first record on upper
+	const rec_t*	split_rec,	/*!< in: first record on upper
 					half page */
 	buf_block_t*	new_block,	/*!< in/out: the new half page */
 	ulint		direction,	/*!< in: FSP_UP or FSP_DOWN */
@@ -2967,15 +2967,16 @@ btr_node_ptr_delete(
 	ut_a(err == DB_SUCCESS);
 
 	if (!compressed) {
-		btr_cur_compress_if_useful(&cursor, mtr);
+		btr_cur_compress_if_useful(&cursor, FALSE, mtr);
 	}
 }
 
 /*************************************************************//**
 If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height. */
+father page, thus reducing the tree height.
+@return father block */
 static
-void
+buf_block_t*
 btr_lift_page_up(
 /*=============*/
 	dict_index_t*	index,	/*!< in: index tree */
@@ -3092,6 +3093,8 @@ btr_lift_page_up(
 	}
 	ut_ad(page_validate(father_page, index));
 	ut_ad(btr_check_node_ptr(index, father_block, mtr));
+
+	return(father_block);
 }
 
 /*************************************************************//**
@@ -3108,11 +3111,13 @@ UNIV_INTERN
 ibool
 btr_compress(
 /*=========*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to merge or lift;
-				the page must not be empty: in record delete
-				use btr_discard_page if the page would become
-				empty */
-	mtr_t*		mtr)	/*!< in: mtr */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to merge
+				or lift; the page must not be empty:
+				when deleting records, use btr_discard_page()
+				if the page would become empty */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	dict_index_t*	index;
 	ulint		space;
@@ -3130,12 +3135,14 @@ btr_compress(
 	ulint*		offsets;
 	ulint		data_size;
 	ulint		n_recs;
+	ulint		nth_rec;
 	ulint		max_ins_size;
 	ulint		max_ins_size_reorg;
 
 	block = btr_cur_get_block(cursor);
 	page = btr_cur_get_page(cursor);
 	index = btr_cur_get_index(cursor);
+
 	ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
 
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
@@ -3156,6 +3163,10 @@ btr_compress(
 	offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
 					    &father_cursor);
 
+	if (adjust) {
+		nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+	}
+
 	/* Decide the page to which we try to merge and which will inherit
 	the locks */
 
@@ -3182,9 +3193,9 @@ btr_compress(
 	} else {
 		/* The page is the only one on the level, lift the records
 		to the father */
-		btr_lift_page_up(index, block, mtr);
-		mem_heap_free(heap);
-		return(TRUE);
+
+		merge_block = btr_lift_page_up(index, block, mtr);
+		goto func_exit;
 	}
 
 	n_recs = page_get_n_recs(page);
@@ -3266,6 +3277,10 @@ err_exit:
 
 		btr_node_ptr_delete(index, block, mtr);
 		lock_update_merge_left(merge_block, orig_pred, block);
+
+		if (adjust) {
+			nth_rec += page_rec_get_n_recs_before(orig_pred);
+		}
 	} else {
 		rec_t*		orig_succ;
 #ifdef UNIV_BTR_DEBUG
@@ -3330,7 +3345,6 @@ err_exit:
 	}
 
 	btr_blob_dbg_remove(page, index, "btr_compress");
-	mem_heap_free(heap);
 
 	if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
 		/* Update the free bits of the B-tree page in the
@@ -3382,6 +3396,16 @@ err_exit:
 	btr_page_free(index, block, mtr);
 
 	ut_ad(btr_check_node_ptr(index, merge_block, mtr));
+func_exit:
+	mem_heap_free(heap);
+
+	if (adjust) {
+		btr_cur_position(
+			index,
+			page_rec_get_nth(merge_block->frame, nth_rec),
+			merge_block, cursor);
+	}
+
 	return(TRUE);
 }
 

=== modified file 'storage/innobase/btr/btr0cur.c'
--- a/storage/innobase/btr/btr0cur.c	revid:jorgen.loland@strippedf3vxdb41
+++ b/storage/innobase/btr/btr0cur.c	revid:marko.makela@stripped
@@ -1972,7 +1972,6 @@ btr_cur_optimistic_update(
 	ulint		old_rec_size;
 	dtuple_t*	new_entry;
 	roll_ptr_t	roll_ptr;
-	trx_t*		trx;
 	mem_heap_t*	heap;
 	ulint		i;
 	ulint		n_ext;
@@ -1989,9 +1988,10 @@ btr_cur_optimistic_update(
 
 	heap = mem_heap_create(1024);
 	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-#ifdef UNIV_BLOB_NULL_DEBUG
-	ut_a(!rec_offs_any_null_extern(rec, offsets));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	ut_a(!rec_offs_any_null_extern(rec, offsets)
+	     || trx_is_recv(thr_get_trx(thr)));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 #ifdef UNIV_DEBUG
 	if (btr_cur_print_record_ops && thr) {
@@ -2114,13 +2114,11 @@ any_extern:
 
 	page_cur_move_to_prev(page_cursor);
 
-	trx = thr_get_trx(thr);
-
 	if (!(flags & BTR_KEEP_SYS_FLAG)) {
 		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
 					      roll_ptr);
 		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
-					      trx->id);
+					      thr_get_trx(thr)->id);
 	}
 
 	/* There are no externally stored columns in new_entry */
@@ -2206,7 +2204,9 @@ btr_cur_pessimistic_update(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging, locking, and rollback
 				flags */
-	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
+				cursor may become invalid if *big_rec == NULL
+				|| !(flags & BTR_KEEP_POS_FLAG) */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
@@ -2345,7 +2345,7 @@ btr_cur_pessimistic_update(
 	record to be inserted: we have to remember which fields were such */
 
 	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
+	ut_ad(rec_offs_validate(rec, index, offsets));
 	n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
 
 	if (UNIV_LIKELY_NULL(page_zip)) {
@@ -2368,6 +2368,10 @@ make_external:
 			err = DB_TOO_BIG_RECORD;
 			goto return_after_reservations;
 		}
+
+		ut_ad(page_is_leaf(page));
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(flags & BTR_KEEP_POS_FLAG);
 	}
 
 	/* Store state of explicit locks on rec on the page infimum record,
@@ -2395,6 +2399,8 @@ make_external:
 	rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
 
 	if (rec) {
+		page_cursor->rec = rec;
+
 		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
 						   rec, block);
 
@@ -2408,7 +2414,10 @@ make_external:
 						     rec, index, offsets, mtr);
 		}
 
-		btr_cur_compress_if_useful(cursor, mtr);
+		btr_cur_compress_if_useful(
+			cursor,
+			big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
+			mtr);
 
 		if (page_zip && !dict_index_is_clust(index)
 		    && page_is_leaf(page)) {
@@ -2428,6 +2437,21 @@ make_external:
 		}
 	}
 
+	if (big_rec_vec) {
+		ut_ad(page_is_leaf(page));
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(flags & BTR_KEEP_POS_FLAG);
+
+		/* btr_page_split_and_insert() in
+		btr_cur_pessimistic_insert() invokes
+		mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
+		We must keep the index->lock when we created a
+		big_rec, so that row_upd_clust_rec() can store the
+		big_rec in the same mini-transaction. */
+
+		mtr_x_lock(dict_index_get_lock(index), mtr);
+	}
+
 	/* Was the record to be updated positioned as the first user
 	record on its page? */
 	was_first = page_cur_is_before_first(page_cursor);
@@ -2443,6 +2467,7 @@ make_external:
 	ut_a(rec);
 	ut_a(err == DB_SUCCESS);
 	ut_a(dummy_big_rec == NULL);
+	page_cursor->rec = rec;
 
 	if (dict_index_is_sec_or_ibuf(index)) {
 		/* Update PAGE_MAX_TRX_ID in the index page header.
@@ -2501,6 +2526,39 @@ return_after_reservations:
 	return(err);
 }
 
+/**************************************************************//**
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	buf_block_t*	block;
+
+	block = btr_cur_get_block(cursor);
+
+	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
+				MTR_MEMO_X_LOCK));
+	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	/* Keep the locks across the mtr_commit(mtr). */
+	rw_lock_x_lock(dict_index_get_lock(cursor->index));
+	rw_lock_x_lock(&block->lock);
+	mutex_enter(&block->mutex);
+	buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+	mutex_exit(&block->mutex);
+	/* Write out the redo log. */
+	mtr_commit(mtr);
+	mtr_start(mtr);
+	/* Reassociate the locks with the mini-transaction.
+	They will be released on mtr_commit(mtr). */
+	mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
+		      MTR_MEMO_X_LOCK);
+	mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
+}
+
 /*==================== B-TREE DELETE MARK AND UNMARK ===============*/
 
 /****************************************************************//**
@@ -2881,10 +2939,12 @@ UNIV_INTERN
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
-				cursor does not stay valid if compression
-				occurs */
-	mtr_t*		mtr)	/*!< in: mtr */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to compress;
+				cursor does not stay valid if !adjust and
+				compression occurs */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
 	ut_ad(mtr_memo_contains(mtr,
 				dict_index_get_lock(btr_cur_get_index(cursor)),
@@ -2893,7 +2953,7 @@ btr_cur_compress_if_useful(
 				MTR_MEMO_PAGE_X_FIX));
 
 	return(btr_cur_compress_recommendation(cursor, mtr)
-	       && btr_compress(cursor, mtr));
+	       && btr_compress(cursor, adjust, mtr));
 }
 
 /*******************************************************//**
@@ -3135,7 +3195,7 @@ return_after_reservations:
 	mem_heap_free(heap);
 
 	if (ret == FALSE) {
-		ret = btr_cur_compress_if_useful(cursor, mtr);
+		ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
 	}
 
 	if (n_extents > 0) {

=== modified file 'storage/innobase/include/btr0btr.h'
--- a/storage/innobase/include/btr0btr.h	revid:jorgen.loland@stripped
+++ b/storage/innobase/include/btr0btr.h	revid:marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -481,11 +481,14 @@ UNIV_INTERN
 ibool
 btr_compress(
 /*=========*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to merge or lift;
-				the page must not be empty: in record delete
-				use btr_discard_page if the page would become
-				empty */
-	mtr_t*		mtr);	/*!< in: mtr */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to merge
+				or lift; the page must not be empty:
+				when deleting records, use btr_discard_page()
+				if the page would become empty */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /*************************************************************//**
 Discards a page from a B-tree. This is used to remove the last record from
 a B-tree page: the whole page must be removed at the same time. This cannot

=== modified file 'storage/innobase/include/btr0cur.h'
--- a/storage/innobase/include/btr0cur.h	revid:jorgen.loland@stripped
+++ b/storage/innobase/include/btr0cur.h	revid:marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,6 +36,9 @@ Created 10/16/1994 Heikki Tuuri
 #define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
 #define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
 					update vector or inserted entry */
+#define BTR_KEEP_POS_FLAG	8	/* btr_cur_pessimistic_update()
+					must keep cursor position when
+					moving columns to big_rec */
 
 #ifndef UNIV_HOTBACKUP
 #include "que0types.h"
@@ -310,7 +313,9 @@ btr_cur_pessimistic_update(
 /*=======================*/
 	ulint		flags,	/*!< in: undo logging, locking, and rollback
 				flags */
-	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the record to update;
+				cursor may become invalid if *big_rec == NULL
+				|| !(flags & BTR_KEEP_POS_FLAG) */
 	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
 	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
 				be stored externally by the caller, or NULL */
@@ -322,6 +327,16 @@ btr_cur_pessimistic_update(
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr);	/*!< in: mtr; must be committed before
 				latching any further pages */
+/*****************************************************************
+Commits and restarts a mini-transaction so that it will retain an
+x-lock on index->lock and the cursor page. */
+UNIV_INTERN
+void
+btr_cur_mtr_commit_and_start(
+/*=========================*/
+	btr_cur_t*	cursor,	/*!< in: cursor */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	UNIV_COLD __attribute__((nonnull));
 /***********************************************************//**
 Marks a clustered index record deleted. Writes an undo log record to
 undo log on this delete marking. Writes in the trx id field the id
@@ -364,10 +379,13 @@ UNIV_INTERN
 ibool
 btr_cur_compress_if_useful(
 /*=======================*/
-	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
+	btr_cur_t*	cursor,	/*!< in/out: cursor on the page to compress;
 				cursor does not stay valid if compression
 				occurs */
-	mtr_t*		mtr);	/*!< in: mtr */
+	ibool		adjust,	/*!< in: TRUE if should adjust the
+				cursor position even if compression occurs */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
 /*******************************************************//**
 Removes the record on which the tree cursor is positioned. It is assumed
 that the mtr has an x-latch on the page where the cursor is positioned,

=== modified file 'storage/innobase/include/btr0cur.ic'
--- a/storage/innobase/include/btr0cur.ic	revid:jorgen.loland@stripped6062400-637j8qymf3vxdb41
+++ b/storage/innobase/include/btr0cur.ic	revid:marko.makela@strippedn1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -139,7 +139,7 @@ btr_cur_compress_recommendation(
 	btr_cur_t*	cursor,	/*!< in: btr cursor */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	page_t*		page;
+	const page_t*		page;
 
 	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 				MTR_MEMO_PAGE_X_FIX));

=== modified file 'storage/innobase/include/buf0buf.h'
--- a/storage/innobase/include/buf0buf.h	revid:jorgen.loland@stripped-20110616062400-637j8qymf3vxdb41
+++ b/storage/innobase/include/buf0buf.h	revid:marko.makela@stripped0749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -582,6 +582,31 @@ buf_block_get_modify_clock(
 #else /* !UNIV_HOTBACKUP */
 # define buf_block_modify_clock_inc(block) ((void) 0)
 #endif /* !UNIV_HOTBACKUP */
+/*******************************************************************//**
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_func(
+/*=======================*/
+#ifdef UNIV_SYNC_DEBUG
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line */
+#endif /* UNIV_SYNC_DEBUG */
+	buf_block_t*	block)	/*!< in/out: block to bufferfix */
+	__attribute__((nonnull));
+#ifdef UNIV_SYNC_DEBUG
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
+#else /* UNIV_SYNC_DEBUG */
+/** Increments the bufferfix count.
+@param b	in/out: block to bufferfix
+@param f	in: file name where requested
+@param l	in: line number where requested */
+# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
+#endif /* UNIV_SYNC_DEBUG */
 /********************************************************************//**
 Calculates a page checksum which is stored to the page when it is written
 to a file. Note that we must be careful to calculate the same value

=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic	revid:jorgen.loland@stripped6062400-637j8qymf3vxdb41
+++ b/storage/innobase/include/buf0buf.ic	revid:marko.makela@strippedn1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -897,19 +897,6 @@ buf_block_buf_fix_inc_func(
 
 	block->page.buf_fix_count++;
 }
-#ifdef UNIV_SYNC_DEBUG
-/** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-#else /* UNIV_SYNC_DEBUG */
-/** Increments the bufferfix count.
-@param b	in/out: block to bufferfix
-@param f	in: file name where requested
-@param l	in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-#endif /* UNIV_SYNC_DEBUG */
 
 /*******************************************************************//**
 Decrements the bufferfix count. */
@@ -1160,7 +1147,7 @@ buf_block_dbg_add_level(
 				where we have acquired latch */
 	ulint		level)	/*!< in: latching order level */
 {
-	sync_thread_add_level(&block->lock, level);
+	sync_thread_add_level(&block->lock, level, FALSE);
 }
 #endif /* UNIV_SYNC_DEBUG */
 /********************************************************************//**

=== modified file 'storage/innobase/include/page0cur.ic'
--- a/storage/innobase/include/page0cur.ic	revid:jorgen.loland@strippedj8qymf3vxdb41
+++ b/storage/innobase/include/page0cur.ic	revid:marko.makela@strippedh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,6 +27,8 @@ Created 10/4/1994 Heikki Tuuri
 #include "buf0types.h"
 
 #ifdef UNIV_DEBUG
+# include "rem0cmp.h"
+
 /*********************************************************//**
 Gets pointer to the page frame where the cursor is positioned.
 @return	page */
@@ -268,6 +270,7 @@ page_cur_tuple_insert(
 					      index, rec, offsets, mtr);
 	}
 
+	ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, offsets));
 	mem_heap_free(heap);
 	return(rec);
 }

=== modified file 'storage/innobase/include/page0page.h'
--- a/storage/innobase/include/page0page.h	revid:jorgen.loland@stripped
+++ b/storage/innobase/include/page0page.h	revid:marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -284,16 +284,42 @@ page_get_supremum_offset(
 	const page_t*	page);	/*!< in: page which must have record(s) */
 #define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
 #define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
+
 /************************************************************//**
-Returns the middle record of record list. If there are an even number
-of records in the list, returns the first record of upper half-list.
-@return	middle record */
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
 UNIV_INTERN
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		nth)	/*!< in: nth record */
+	__attribute__((nonnull, warn_unused_result));
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+	page_t*	page,	/*< in: page */
+	ulint	nth)	/*!< in: nth record */
+	__attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return	middle record */
+UNIV_INLINE
 rec_t*
 page_get_middle_rec(
 /*================*/
-	page_t*	page);	/*!< in: page */
-#ifndef UNIV_HOTBACKUP
+	page_t*	page)	/*!< in: page */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Compares a data tuple to a physical record. Differs from the function
 cmp_dtuple_rec_with_match in the way that the record must reside on an
@@ -348,6 +374,7 @@ page_get_n_recs(
 /***************************************************************//**
 Returns the number of records before the given record in chain.
 The number includes infimum and supremum records.
+This is the inverse function of page_rec_get_nth().
 @return	number of records */
 UNIV_INTERN
 ulint

=== modified file 'storage/innobase/include/page0page.ic'
--- a/storage/innobase/include/page0page.ic	revid:jorgen.loland@stripped20110616062400-637j8qymf3vxdb41
+++ b/storage/innobase/include/page0page.ic	revid:marko.makela@stripped090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -419,7 +419,37 @@ page_rec_is_infimum(
 	return(page_rec_is_infimum_low(page_offset(rec)));
 }
 
+/************************************************************//**
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
+UNIV_INLINE
+rec_t*
+page_rec_get_nth(
+/*=============*/
+	page_t*	page,	/*!< in: page */
+	ulint	nth)	/*!< in: nth record */
+{
+	return((rec_t*) page_rec_get_nth_const(page, nth));
+}
+
 #ifndef UNIV_HOTBACKUP
+/************************************************************//**
+Returns the middle record of the records on the page. If there is an
+even number of records in the list, returns the first record of the
+upper half-list.
+@return	middle record */
+UNIV_INLINE
+rec_t*
+page_get_middle_rec(
+/*================*/
+	page_t*	page)	/*!< in: page */
+{
+	ulint	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+
+	return(page_rec_get_nth(page, middle));
+}
+
 /*************************************************************//**
 Compares a data tuple to a physical record. Differs from the function
 cmp_dtuple_rec_with_match in the way that the record must reside on an

=== modified file 'storage/innobase/include/rem0rec.h'
--- a/storage/innobase/include/rem0rec.h	revid:jorgen.loland@strippedxdb41
+++ b/storage/innobase/include/rem0rec.h	revid:marko.makela@stripped
@@ -480,7 +480,7 @@ ulint
 rec_offs_any_extern(
 /*================*/
 	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return	first field containing a null BLOB pointer, or NULL if none found */
@@ -491,7 +491,7 @@ rec_offs_any_null_extern(
 	const rec_t*	rec,		/*!< in: record */
 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec) */
 	__attribute__((nonnull, warn_unused_result));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.
 @return	nonzero if externally stored */

=== modified file 'storage/innobase/include/rem0rec.ic'
--- a/storage/innobase/include/rem0rec.ic	revid:jorgen.loland@stripped8qymf3vxdb41
+++ b/storage/innobase/include/rem0rec.ic	revid:marko.makela@strippedkl
@@ -1088,7 +1088,7 @@ rec_offs_any_extern(
 	return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
 }
 
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 /******************************************************//**
 Determine if the offsets are for a record containing null BLOB pointers.
 @return	first field containing a null BLOB pointer, or NULL if none found */
@@ -1124,7 +1124,7 @@ rec_offs_any_null_extern(
 
 	return(NULL);
 }
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 /******************************************************//**
 Returns nonzero if the extern bit is set in nth field of rec.

=== modified file 'storage/innobase/include/sync0rw.ic'
--- a/storage/innobase/include/sync0rw.ic	revid:jorgen.loland@strippedvxdb41
+++ b/storage/innobase/include/sync0rw.ic	revid:marko.makela@stripped
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -603,16 +603,16 @@ rw_lock_x_unlock_direct(
 
 	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
 
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
-#endif
-
 	if (lock->lock_word == 0) {
 		lock->recursive = FALSE;
 		UNIV_MEM_INVALID(&lock->writer_thread,
 				 sizeof lock->writer_thread);
 	}
 
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
+#endif
+
 	lock->lock_word += X_LOCK_DECR;
 
 	ut_ad(!lock->waiters);

=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h	revid:jorgen.loland@stripped3vxdb41
+++ b/storage/innobase/include/sync0sync.h	revid:marko.makela@stripped
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -400,8 +400,10 @@ void
 sync_thread_add_level(
 /*==================*/
 	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level);	/*!< in: level in the latching order; if
+	ulint	level,	/*!< in: level in the latching order; if
 			SYNC_LEVEL_VARYING, nothing is done */
+	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
+	__attribute__((nonnull));
 /******************************************************************//**
 Removes a latch from the thread level array if it is found there.
 @return TRUE if found in the array; it is no error if the latch is

=== modified file 'storage/innobase/include/univ.i'
--- a/storage/innobase/include/univ.i	revid:jorgen.loland@stripped1
+++ b/storage/innobase/include/univ.i	revid:marko.makela@stripped
@@ -1,8 +1,7 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Sun Microsystems, Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -186,8 +185,6 @@ command. Not tested on Windows. */
 						debugging without UNIV_DEBUG */
 #define UNIV_BLOB_LIGHT_DEBUG			/* Enable off-page column
 						debugging without UNIV_DEBUG */
-#define UNIV_BLOB_NULL_DEBUG			/* Enable deep off-page
-						column debugging */
 #define UNIV_DEBUG				/* Enable ut_ad() assertions
 						and disable UNIV_INLINE */
 #define UNIV_DEBUG_LOCK_VALIDATE		/* Enable

=== modified file 'storage/innobase/page/page0cur.c'
--- a/storage/innobase/page/page0cur.c	revid:jorgen.loland@stripped
+++ b/storage/innobase/page/page0cur.c	revid:marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1180,14 +1180,15 @@ page_cur_insert_rec_zip_reorg(
 	/* Before trying to reorganize the page,
 	store the number of preceding records on the page. */
 	pos = page_rec_get_n_recs_before(rec);
+	ut_ad(pos > 0);
 
 	if (page_zip_reorganize(block, index, mtr)) {
 		/* The page was reorganized: Find rec by seeking to pos,
 		and update *current_rec. */
-		rec = page + PAGE_NEW_INFIMUM;
-
-		while (--pos) {
-			rec = page + rec_get_next_offs(rec, TRUE);
+		if (pos > 1) {
+			rec = page_rec_get_nth(page, pos - 1);
+		} else {
+			rec = page + PAGE_NEW_INFIMUM;
 		}
 
 		*current_rec = rec;
@@ -1283,6 +1284,12 @@ page_cur_insert_rec_zip(
 			insert_rec = page_cur_insert_rec_zip_reorg(
 				current_rec, block, index, insert_rec,
 				page, page_zip, mtr);
+#ifdef UNIV_DEBUG
+			if (insert_rec) {
+				rec_offs_make_valid(
+					insert_rec, index, offsets);
+			}
+#endif /* UNIV_DEBUG */
 		}
 
 		return(insert_rec);

=== modified file 'storage/innobase/page/page0page.c'
--- a/storage/innobase/page/page0page.c	revid:jorgen.loland@oracle.com-20110616062400-637j8qymf3vxdb41
+++ b/storage/innobase/page/page0page.c	revid:marko.makela@stripped6090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1465,55 +1465,54 @@ page_dir_balance_slot(
 	}
 }
 
-#ifndef UNIV_HOTBACKUP
 /************************************************************//**
-Returns the middle record of the record list. If there are an even number
-of records in the list, returns the first record of the upper half-list.
-@return	middle record */
+Returns the nth record of the record list.
+This is the inverse function of page_rec_get_n_recs_before().
+@return	nth record */
 UNIV_INTERN
-rec_t*
-page_get_middle_rec(
-/*================*/
-	page_t*	page)	/*!< in: page */
+const rec_t*
+page_rec_get_nth_const(
+/*===================*/
+	const page_t*	page,	/*!< in: page */
+	ulint		nth)	/*!< in: nth record */
 {
-	page_dir_slot_t*	slot;
-	ulint			middle;
+	const page_dir_slot_t*	slot;
 	ulint			i;
 	ulint			n_owned;
-	ulint			count;
-	rec_t*			rec;
-
-	/* This many records we must leave behind */
-	middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+	const rec_t*		rec;
 
-	count = 0;
+	ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	for (i = 0;; i++) {
 
 		slot = page_dir_get_nth_slot(page, i);
 		n_owned = page_dir_slot_get_n_owned(slot);
 
-		if (count + n_owned > middle) {
+		if (n_owned > nth) {
 			break;
 		} else {
-			count += n_owned;
+			nth -= n_owned;
 		}
 	}
 
 	ut_ad(i > 0);
 	slot = page_dir_get_nth_slot(page, i - 1);
-	rec = (rec_t*) page_dir_slot_get_rec(slot);
-	rec = page_rec_get_next(rec);
+	rec = page_dir_slot_get_rec(slot);
 
-	/* There are now count records behind rec */
-
-	for (i = 0; i < middle - count; i++) {
-		rec = page_rec_get_next(rec);
+	if (page_is_comp(page)) {
+		do {
+			rec = page_rec_get_next_low(rec, TRUE);
+			ut_ad(rec);
+		} while (nth--);
+	} else {
+		do {
+			rec = page_rec_get_next_low(rec, FALSE);
+			ut_ad(rec);
+		} while (nth--);
 	}
 
 	return(rec);
 }
-#endif /* !UNIV_HOTBACKUP */
 
 /***************************************************************//**
 Returns the number of records before the given record in chain.
@@ -1575,6 +1574,7 @@ page_rec_get_n_recs_before(
 	n--;
 
 	ut_ad(n >= 0);
+	ut_ad(n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
 
 	return((ulint) n);
 }

=== modified file 'storage/innobase/row/row0ins.c'
--- a/storage/innobase/row/row0ins.c	revid:jorgen.loland@stripped
+++ b/storage/innobase/row/row0ins.c	revid:marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -345,9 +345,9 @@ row_ins_clust_index_entry_by_modify(
 			return(DB_LOCK_TABLE_FULL);
 
 		}
-		err = btr_cur_pessimistic_update(0, cursor,
-						 heap, big_rec, update,
-						 0, thr, mtr);
+		err = btr_cur_pessimistic_update(
+			BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update,
+			0, thr, mtr);
 	}
 
 	return(err);
@@ -1973,6 +1973,7 @@ row_ins_index_entry_low(
 	ulint		modify = 0; /* remove warning */
 	rec_t*		insert_rec;
 	rec_t*		rec;
+	ulint*		offsets;
 	ulint		err;
 	ulint		n_unique;
 	big_rec_t*	big_rec			= NULL;
@@ -2081,6 +2082,42 @@ row_ins_index_entry_low(
 			err = row_ins_clust_index_entry_by_modify(
 				mode, &cursor, &heap, &big_rec, entry,
 				thr, &mtr);
+
+			if (big_rec) {
+				ut_a(err == DB_SUCCESS);
+				/* Write out the externally stored
+				columns while still x-latching
+				index->lock and block->lock. We have
+				to mtr_commit(mtr) first, so that the
+				redo log will be written in the
+				correct order. Otherwise, we would run
+				into trouble on crash recovery if mtr
+				freed B-tree pages on which some of
+				the big_rec fields will be written. */
+				btr_cur_mtr_commit_and_start(&cursor, &mtr);
+
+				rec = btr_cur_get_rec(&cursor);
+				offsets = rec_get_offsets(
+					rec, index, NULL,
+					ULINT_UNDEFINED, &heap);
+
+				err = btr_store_big_rec_extern_fields(
+					index, btr_cur_get_block(&cursor),
+					rec, offsets, &mtr, FALSE, big_rec);
+				/* If writing big_rec fails (for
+				example, because of DB_OUT_OF_FILE_SPACE),
+				the record will be corrupted. Even if
+				we did not update any externally
+				stored columns, our update could cause
+				the record to grow so that a
+				non-updated column was selected for
+				external storage. This non-update
+				would not have been written to the
+				undo log, and thus the record cannot
+				be rolled back. */
+				ut_a(err == DB_SUCCESS);
+				goto stored_big_rec;
+			}
 		} else {
 			ut_ad(!n_ext);
 			err = row_ins_sec_index_entry_by_modify(
@@ -2109,8 +2146,6 @@ function_exit:
 	mtr_commit(&mtr);
 
 	if (UNIV_LIKELY_NULL(big_rec)) {
-		rec_t*	rec;
-		ulint*	offsets;
 		mtr_start(&mtr);
 
 		btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
@@ -2124,6 +2159,7 @@ function_exit:
 			index, btr_cur_get_block(&cursor),
 			rec, offsets, &mtr, FALSE, big_rec);
 
+stored_big_rec:
 		if (modify) {
 			dtuple_big_rec_free(big_rec);
 		} else {

=== modified file 'storage/innobase/row/row0row.c'
--- a/storage/innobase/row/row0row.c	revid:jorgen.loland@stripped062400-637j8qymf3vxdb41
+++ b/storage/innobase/row/row0row.c	revid:marko.makela@strippedragnh5kl
@@ -192,6 +192,7 @@ row_build(
 
 	ut_ad(index && rec && heap);
 	ut_ad(dict_index_is_clust(index));
+	ut_ad(!mutex_own(&kernel_mutex));
 
 	if (!offsets) {
 		offsets = rec_get_offsets(rec, index, offsets_,
@@ -200,13 +201,35 @@ row_build(
 		ut_ad(rec_offs_validate(rec, index, offsets));
 	}
 
-#if 0 && defined UNIV_BLOB_NULL_DEBUG
-	/* This one can fail in trx_rollback_active() if
-	the server crashed during an insert before the
-	btr_store_big_rec_extern_fields() did mtr_commit()
-	all BLOB pointers to the clustered index record. */
-	ut_a(!rec_offs_any_null_extern(rec, offsets));
-#endif /* 0 && UNIV_BLOB_NULL_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+	if (UNIV_LIKELY_NULL(rec_offs_any_null_extern(rec, offsets))) {
+		/* This condition can occur during crash recovery before
+		trx_rollback_active() has completed execution.
+
+		This condition is possible if the server crashed
+		during an insert or update before
+		btr_store_big_rec_extern_fields() did mtr_commit() all
+		BLOB pointers to the clustered index record.
+
+		Look up the transaction that holds the implicit lock
+		on this record, and assert that it was recovered (and
+		will soon be rolled back). */
+
+		ulint		trx_id_pos	= dict_index_get_sys_col_pos(
+				index, DATA_TRX_ID);
+		ulint		len;
+		trx_id_t	trx_id		= trx_read_trx_id(
+			rec_get_nth_field(rec, offsets, trx_id_pos, &len));
+		trx_t*		trx;
+		ut_a(len == 6);
+
+		mutex_enter(&kernel_mutex);
+		trx = trx_get_on_id(trx_id);
+		ut_a(trx);
+		ut_a(trx->is_recovered);
+		mutex_exit(&kernel_mutex);
+	}
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 	if (type != ROW_COPY_POINTERS) {
 		/* Take a copy of rec to heap */
@@ -391,10 +414,10 @@ row_rec_to_index_entry(
 		rec = rec_copy(buf, rec, offsets);
 		/* Avoid a debug assertion in rec_offs_validate(). */
 		rec_offs_make_valid(rec, index, offsets);
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 	} else {
 		ut_a(!rec_offs_any_null_extern(rec, offsets));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 	}
 
 	entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);

=== modified file 'storage/innobase/row/row0upd.c'
--- a/storage/innobase/row/row0upd.c	revid:jorgen.loland@stripped-637j8qymf3vxdb41
+++ b/storage/innobase/row/row0upd.c	revid:marko.makela@strippedkl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1999,28 +1999,43 @@ row_upd_clust_rec(
 	ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
 				    dict_table_is_comp(index->table)));
 
-	err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
-					 &heap, &big_rec, node->update,
-					 node->cmpl_info, thr, mtr);
-	mtr_commit(mtr);
-
-	if (err == DB_SUCCESS && big_rec) {
+	err = btr_cur_pessimistic_update(
+		BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
+		&heap, &big_rec, node->update, node->cmpl_info, thr, mtr);
+	if (big_rec) {
 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 		rec_t*		rec;
 		rec_offs_init(offsets_);
 
-		mtr_start(mtr);
+		ut_a(err == DB_SUCCESS);
+		/* Write out the externally stored columns while still
+		x-latching index->lock and block->lock. We have to
+		mtr_commit(mtr) first, so that the redo log will be
+		written in the correct order. Otherwise, we would run
+		into trouble on crash recovery if mtr freed B-tree
+		pages on which some of the big_rec fields will be
+		written. */
+		btr_cur_mtr_commit_and_start(btr_cur, mtr);
 
-		ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
 		rec = btr_cur_get_rec(btr_cur);
 		err = btr_store_big_rec_extern_fields(
 			index, btr_cur_get_block(btr_cur), rec,
 			rec_get_offsets(rec, index, offsets_,
 					ULINT_UNDEFINED, &heap),
 			mtr, TRUE, big_rec);
-		mtr_commit(mtr);
+		/* If writing big_rec fails (for example, because of
+		DB_OUT_OF_FILE_SPACE), the record will be corrupted.
+		Even if we did not update any externally stored
+		columns, our update could cause the record to grow so
+		that a non-updated column was selected for external
+		storage. This non-update would not have been written
+		to the undo log, and thus the record cannot be rolled
+		back. */
+		ut_a(err == DB_SUCCESS);
 	}
 
+	mtr_commit(mtr);
+
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}

=== modified file 'storage/innobase/row/row0vers.c'
--- a/storage/innobase/row/row0vers.c	revid:jorgen.loland@stripped637j8qymf3vxdb41
+++ b/storage/innobase/row/row0vers.c	revid:marko.makela@strippedkl
@@ -550,10 +550,10 @@ row_vers_build_for_consistent_read(
 				/* The view already sees this version: we can
 				copy it to in_heap and return */
 
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 				ut_a(!rec_offs_any_null_extern(
 					     version, *offsets));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 				buf = mem_heap_alloc(in_heap,
 						     rec_offs_size(*offsets));
@@ -588,9 +588,9 @@ row_vers_build_for_consistent_read(
 		*offsets = rec_get_offsets(prev_version, index, *offsets,
 					   ULINT_UNDEFINED, offset_heap);
 
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 		ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 		trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
 
@@ -691,9 +691,9 @@ row_vers_build_for_semi_consistent_read(
 			/* We found a version that belongs to a
 			committed transaction: return it. */
 
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 			ut_a(!rec_offs_any_null_extern(version, *offsets));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 			if (rec == version) {
 				*old_vers = rec;
@@ -752,9 +752,9 @@ row_vers_build_for_semi_consistent_read(
 		version = prev_version;
 		*offsets = rec_get_offsets(version, index, *offsets,
 					   ULINT_UNDEFINED, offset_heap);
-#ifdef UNIV_BLOB_NULL_DEBUG
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 		ut_a(!rec_offs_any_null_extern(version, *offsets));
-#endif /* UNIV_BLOB_NULL_DEBUG */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 	}/* for (;;) */
 
 	if (heap) {

=== modified file 'storage/innobase/sync/sync0rw.c'
--- a/storage/innobase/sync/sync0rw.c	revid:jorgen.loland@strippedf3vxdb41
+++ b/storage/innobase/sync/sync0rw.c	revid:marko.makela@stripped
@@ -782,7 +782,9 @@ rw_lock_add_debug_info(
 	rw_lock_debug_mutex_exit();
 
 	if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
-		sync_thread_add_level(lock, lock->level);
+		sync_thread_add_level(lock, lock->level,
+				      lock_type == RW_LOCK_EX
+				      && lock->lock_word < 0);
 	}
 }
 

=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c	revid:jorgen.loland@stripped
+++ b/storage/innobase/sync/sync0sync.c	revid:marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -690,7 +690,7 @@ mutex_set_debug_info(
 	ut_ad(mutex);
 	ut_ad(file_name);
 
-	sync_thread_add_level(mutex, mutex->level);
+	sync_thread_add_level(mutex, mutex->level, FALSE);
 
 	mutex->file_name = file_name;
 	mutex->line	 = line;
@@ -1133,8 +1133,9 @@ void
 sync_thread_add_level(
 /*==================*/
 	void*	latch,	/*!< in: pointer to a mutex or an rw-lock */
-	ulint	level)	/*!< in: level in the latching order; if
+	ulint	level,	/*!< in: level in the latching order; if
 			SYNC_LEVEL_VARYING, nothing is done */
+	ibool	relock)	/*!< in: TRUE if re-entering an x-lock */
 {
 	ulint		i;
 	sync_level_t*	slot;
@@ -1185,6 +1186,10 @@ sync_thread_add_level(
 
 	array = thread_slot->levels;
 
+	if (relock) {
+		goto levels_ok;
+	}
+
 	/* NOTE that there is a problem with _NODE and _LEAF levels: if the
 	B-tree height changes, then a leaf can change to an internal node
 	or the other way around. We do not know at present if this can cause
@@ -1350,6 +1355,7 @@ sync_thread_add_level(
 		ut_error;
 	}
 
+levels_ok:
 	if (array->next_free == ULINT_UNDEFINED) {
 		ut_a(array->n_elems < array->max_elems);
 

=== modified file 'storage/innobase/trx/trx0rec.c'
--- a/storage/innobase/trx/trx0rec.c	revid:jorgen.loland@stripped3vxdb41
+++ b/storage/innobase/trx/trx0rec.c	revid:marko.makela@stripped
@@ -1593,9 +1593,9 @@ trx_undo_prev_version_build(
 		return(DB_ERROR);
 	}
 
-# ifdef UNIV_BLOB_NULL_DEBUG
+# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 	ut_a(!rec_offs_any_null_extern(rec, offsets));
-# endif /* UNIV_BLOB_NULL_DEBUG */
+# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
 
 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 		ulint	n_ext;

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20110616090749-x7n1fq0dragnh5kl.bundle
Thread
bzr push into mysql-5.5 branch (marko.makela:3454 to 3455) marko.makela16 Jun