List:Commits« Previous MessageNext Message »
From:marko.makela Date:December 21 2010 12:55pm
Subject:bzr push into mysql-5.1-innodb branch (marko.makela:3668 to 3670) Bug#55284
View as plain text  
 3670 Marko Mäkelä	2010-12-21
      Bug #55284 Double BLOB free due to lock wait while updating PRIMARY KEY
      
      This bug fix requires that Bug #58912 be fixed as well (bzr revision id
      marko.makela@stripped01221093919-mcmmgd4zpse9567d). Otherwise,
      another double BLOB free could occur when InnoDB would try to perform
      an update-in-place as delete-and-insert-by-update-in-place.
      
      row_upd_clust_rec_by_insert(): Do not disown the externally stored
      columns from the old record (btr_cur_mark_extern_inherited_fields())
      until after checking the foreign key constraints and successfully
      inserting the updated record. If a lock wait timeout occurs between
      the delete-marking of the old record and the insertion of the updated
      record, mark the columns inherited before retrying the insert.
      Distinguish the state UPD_NODE_INSERT_BLOB from
      UPD_NODE_INSERT_CLUSTERED.
      
      btr_cur_del_mark_set_clust_rec(): Replace the cursor with
      block,rec,index,offsets so that the offsets need not be recalculated.
      Assert that rec is on a clustered index leaf page.
      
      btr_cur_disown_inherited_fields(): Renamed from
      btr_cur_mark_extern_inherited_fields(). Use
      upd_get_field_by_field_no(). Assert that there are externally stored
      columns. Assert that a mini-transaction is passed. Remove the return
      status. (The only caller, row_upd_clust_rec_by_insert(), will have
      determined that some fields have changed ownership.)
      
      btr_cur_mark_dtuple_inherited_extern(): Rename to
      row_upd_clust_rec_by_insert_inherit_func() and declare as static. Add
      the debug parameters rec, offsets. When rec is given, assert that the
      off-page columns match those in the inesrt tuple and that the off-page
      columns are owned by the record. Assert that the non-updated off-page
      columns in the insert tuple are owned, and mark them inherited.
      
      row_upd_clust_rec_by_insert_inherit(): A wrapper macro for
      row_upd_clust_rec_by_insert_inherit_func().
      
      row_undo_mod_upd_exist_sec(): Adjust a comment about
      row_upd_clust_rec_by_insert().
      
      rb:508 approved by Jimmy Yang

    modified:
      storage/innodb_plugin/ChangeLog
      storage/innodb_plugin/btr/btr0cur.c
      storage/innodb_plugin/include/btr0cur.h
      storage/innodb_plugin/include/row0upd.h
      storage/innodb_plugin/row/row0umod.c
      storage/innodb_plugin/row/row0upd.c
 3669 Marko Mäkelä	2010-12-21
      Bug#58912 InnoDB unnecessarily avoids update-in-place on column prefix indexes
      
      row_upd_changes_ord_field_binary(): Do not return TRUE if the update
      vector changes a column that is covered by a prefix index, but does
      not change the column prefix. Add the row_ext_t parameter for
      determining whether the prefixes of externally stored columns match.
      
      dfield_datas_are_binary_equal(): Add the parameter len, for comparing
      column prefixes when len > 0.
      
      innodb.test: Add a test case where the patch of Bug #55284 failed
      without this fix.
      
      rb:537 approved by Jimmy Yang

    modified:
      mysql-test/suite/innodb_plugin/r/innodb.result
      mysql-test/suite/innodb_plugin/t/innodb.test
      storage/innodb_plugin/ChangeLog
      storage/innodb_plugin/btr/btr0cur.c
      storage/innodb_plugin/include/data0data.h
      storage/innodb_plugin/include/data0data.ic
      storage/innodb_plugin/include/row0upd.h
      storage/innodb_plugin/row/row0purge.c
      storage/innodb_plugin/row/row0umod.c
      storage/innodb_plugin/row/row0upd.c
 3668 Vasil Dimov	2010-12-14
      Speed up innodb_bug57255.test
      
      Submitted by:	Stewart Smith (via internals@stripped)

    modified:
      mysql-test/suite/innodb/t/innodb_bug57255.test
      mysql-test/suite/innodb_plugin/t/innodb_bug57255.test
=== modified file 'mysql-test/suite/innodb_plugin/r/innodb.result'
--- a/mysql-test/suite/innodb_plugin/r/innodb.result	revid:vasil.dimov@stripped2offmkgzcz2725
+++ b/mysql-test/suite/innodb_plugin/r/innodb.result	revid:marko.makela@strippedyxxzzgqtem8bcm7
@@ -1,5 +1,8 @@
 drop table if exists t1,t2,t3,t4;
 drop database if exists mysqltest;
+CREATE TABLE bug58912 (a BLOB, b TEXT, PRIMARY KEY(a(1))) ENGINE=InnoDB;
+INSERT INTO bug58912 VALUES(REPEAT('a',8000),REPEAT('b',8000));
+UPDATE bug58912 SET a=REPEAT('a',7999);
 create table t1 (id int unsigned not null auto_increment, code tinyint unsigned not null, name char(20) not null, primary key (id), key (code), unique (name)) engine=innodb;
 insert into t1 (code, name) values (1, 'Tim'), (1, 'Monty'), (2, 'David'), (2, 'Erik'), (3, 'Sasha'), (3, 'Jeremy'), (4, 'Matt');
 select id, code, name from t1 order by id;
@@ -1676,10 +1679,10 @@ variable_value - @innodb_rows_deleted_or
 71
 SELECT variable_value - @innodb_rows_inserted_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_inserted';
 variable_value - @innodb_rows_inserted_orig
-1066
+1067
 SELECT variable_value - @innodb_rows_updated_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_rows_updated';
 variable_value - @innodb_rows_updated_orig
-865
+866
 SELECT variable_value - @innodb_row_lock_waits_orig FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_waits';
 variable_value - @innodb_row_lock_waits_orig
 0
@@ -3239,3 +3242,4 @@ Variable_name	Value
 Handler_update	1
 Variable_name	Value
 Handler_delete	1
+DROP TABLE bug58912;

=== modified file 'mysql-test/suite/innodb_plugin/t/innodb.test'
--- a/mysql-test/suite/innodb_plugin/t/innodb.test	revid:vasil.dimov@stripped
+++ b/mysql-test/suite/innodb_plugin/t/innodb.test	revid:marko.makela@stripped
@@ -43,6 +43,15 @@ drop table if exists t1,t2,t3,t4;
 drop database if exists mysqltest;
 --enable_warnings
 
+# Bug#58912 InnoDB unnecessarily avoids update-in-place on column prefixes
+CREATE TABLE bug58912 (a BLOB, b TEXT, PRIMARY KEY(a(1))) ENGINE=InnoDB;
+INSERT INTO bug58912 VALUES(REPEAT('a',8000),REPEAT('b',8000));
+UPDATE bug58912 SET a=REPEAT('a',7999);
+# The above statements used to trigger a failure during purge when
+# Bug#55284 was fixed while Bug#58912 was not. Defer the DROP TABLE,
+# so that purge gets a chance to run (and a double free of the
+# off-page column can be detected, if one is to occur.)
+
 #
 # Small basic test with ignore
 #
@@ -2541,6 +2550,9 @@ SET GLOBAL innodb_thread_concurrency = @
 
 -- enable_query_log
 
+# Clean up after the Bug#55284/Bug#58912 test case.
+DROP TABLE bug58912;
+
 #######################################################################
 #                                                                     #
 # Please, DO NOT TOUCH this file as well as the innodb.result file.   #

=== modified file 'storage/innodb_plugin/ChangeLog'
--- a/storage/innodb_plugin/ChangeLog	revid:vasil.dimov@stripped
+++ b/storage/innodb_plugin/ChangeLog	revid:marko.makela@oracle.com-20101221112722-1yxxzzgqtem8bcm7
@@ -1,8 +1,24 @@
+2010-12-21	The InnoDB Team
+	* include/btr0cur.h, include/row0upd.h, btr/btr0cur.c,
+	row/row0umod.c, row/row0upd.c:
+	Fix Bug#55284 Double free of off-page columns due to lock wait
+	while updating PRIMARY KEY
+
+2010-12-21	The InnoDB Team
+
+	* include/data0data.h, include/data0data.ic, include/row0upd.h,
+	btr/btr0cur.c, row/row0purge.c, row/row0umod.c, row/row0upd.c,
+	innodb.result, innodb.test:
+	Fix Bug#58912 InnoDB unnecessarily avoids update-in-place
+	on column prefix indexes
+
 2010-12-09	The InnoDB Team
+
 	* buf/buf0lru.c:
 	Fix Bug#57600 output of I/O sum[%lu] can go negative
 
 2010-11-11	The InnoDB Team
+
 	* thr/thr0loc.c, trx/trx0i_s.c:
 	Fix Bug#57802 Empty ASSERTION parameter passed to the HASH_SEARCH macro
 

=== modified file 'storage/innodb_plugin/btr/btr0cur.c'
--- a/storage/innodb_plugin/btr/btr0cur.c	revid:vasil.dimov@stripped
+++ b/storage/innodb_plugin/btr/btr0cur.c	revid:marko.makela@strippedom-20101221112722-1yxxzzgqtem8bcm7
@@ -1756,7 +1756,8 @@ btr_cur_update_in_place(
 		NOT call it if index is secondary */
 
 		if (!dict_index_is_clust(index)
-		    || row_upd_changes_ord_field_binary(NULL, index, update)) {
+		    || row_upd_changes_ord_field_binary(NULL, NULL,
+							index, update)) {
 
 			/* Remove possible hash index pointer to this record */
 			btr_search_update_hash_on_delete(cursor);
@@ -2508,27 +2509,24 @@ ulint
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/*!< in: cursor */
+	buf_block_t*	block,	/*!< in/out: buffer block of the record */
+	rec_t*		rec,	/*!< in/out: record */
+	dict_index_t*	index,	/*!< in: clustered index of the record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
 	ibool		val,	/*!< in: value to set */
 	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	dict_index_t*	index;
-	buf_block_t*	block;
 	roll_ptr_t	roll_ptr;
 	ulint		err;
-	rec_t*		rec;
 	page_zip_des_t*	page_zip;
 	trx_t*		trx;
-	mem_heap_t*	heap		= NULL;
-	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-	ulint*		offsets		= offsets_;
-	rec_offs_init(offsets_);
 
-	rec = btr_cur_get_rec(cursor);
-	index = cursor->index;
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+	ut_ad(buf_block_get_frame(block) == page_align(rec));
+	ut_ad(page_is_leaf(page_align(rec)));
 
 #ifdef UNIV_DEBUG
 	if (btr_cur_print_record_ops && thr) {
@@ -2540,13 +2538,12 @@ btr_cur_del_mark_set_clust_rec(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
-	err = lock_clust_rec_modify_check_and_lock(flags,
-						   btr_cur_get_block(cursor),
+	err = lock_clust_rec_modify_check_and_lock(flags, block,
 						   rec, index, offsets, thr);
 
 	if (err != DB_SUCCESS) {
 
-		goto func_exit;
+		return(err);
 	}
 
 	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
@@ -2554,11 +2551,9 @@ btr_cur_del_mark_set_clust_rec(
 					    &roll_ptr);
 	if (err != DB_SUCCESS) {
 
-		goto func_exit;
+		return(err);
 	}
 
-	block = btr_cur_get_block(cursor);
-
 	if (block->is_hashed) {
 		rw_lock_x_lock(&btr_search_latch);
 	}
@@ -2581,10 +2576,6 @@ btr_cur_del_mark_set_clust_rec(
 	btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
 					   roll_ptr, mtr);
 
-func_exit:
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
 	return(err);
 }
 
@@ -3476,108 +3467,36 @@ btr_cur_set_ownership_of_extern_field(
 }
 
 /*******************************************************************//**
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
+Marks non-updated off-page fields as disowned by this record. The ownership
+must be transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
-to free the field.
-@return TRUE if BLOB ownership was transferred */
+to free the field. */
 UNIV_INTERN
-ibool
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
+void
+btr_cur_disown_inherited_fields(
+/*============================*/
 	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
 				part will be updated, or NULL */
 	rec_t*		rec,	/*!< in/out: record in a clustered index */
 	dict_index_t*	index,	/*!< in: index of the page */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	const upd_t*	update,	/*!< in: update vector */
-	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	ulint	n;
-	ulint	j;
 	ulint	i;
-	ibool	change_ownership = FALSE;
 
-	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(rec_offs_validate(rec, index, offsets));
 	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
+	ut_ad(rec_offs_any_extern(offsets));
+	ut_ad(mtr);
 
-	if (!rec_offs_any_extern(offsets)) {
-
-		return(FALSE);
-	}
-
-	n = rec_offs_n_fields(offsets);
-
-	for (i = 0; i < n; i++) {
-		if (rec_offs_nth_extern(offsets, i)) {
-
-			/* Check it is not in updated fields */
-
-			if (update) {
-				for (j = 0; j < upd_get_n_fields(update);
-				     j++) {
-					if (upd_get_nth_field(update, j)
-					    ->field_no == i) {
-
-						goto updated;
-					}
-				}
-			}
-
+	for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+		if (rec_offs_nth_extern(offsets, i)
+		    && !upd_get_field_by_field_no(update, i)) {
 			btr_cur_set_ownership_of_extern_field(
 				page_zip, rec, index, offsets, i, FALSE, mtr);
-
-			change_ownership = TRUE;
-updated:
-			;
 		}
 	}
-
-	return(change_ownership);
-}
-
-/*******************************************************************//**
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-UNIV_INTERN
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
-	dtuple_t*	entry,		/*!< in/out: updated entry to be
-					inserted to clustered index */
-	const upd_t*	update)		/*!< in: update vector */
-{
-	ulint		i;
-
-	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
-		dfield_t*	dfield = dtuple_get_nth_field(entry, i);
-		byte*		data;
-		ulint		len;
-		ulint		j;
-
-		if (!dfield_is_ext(dfield)) {
-			continue;
-		}
-
-		/* Check if it is in updated fields */
-
-		for (j = 0; j < upd_get_n_fields(update); j++) {
-			if (upd_get_nth_field(update, j)->field_no == i) {
-
-				goto is_updated;
-			}
-		}
-
-		data = dfield_get_data(dfield);
-		len = dfield_get_len(dfield);
-		data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
-			|= BTR_EXTERN_INHERITED_FLAG;
-
-is_updated:
-		;
-	}
 }
 
 /*******************************************************************//**
@@ -3614,29 +3533,6 @@ btr_cur_unmark_extern_fields(
 		}
 	}
 }
-
-/*******************************************************************//**
-Marks all extern fields in a dtuple as owned by the record. */
-UNIV_INTERN
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
-	dtuple_t*	entry)		/*!< in/out: clustered index entry */
-{
-	ulint	i;
-
-	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-		dfield_t* dfield = dtuple_get_nth_field(entry, i);
-
-		if (dfield_is_ext(dfield)) {
-			byte*	data = dfield_get_data(dfield);
-			ulint	len = dfield_get_len(dfield);
-
-			data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
-				&= ~BTR_EXTERN_OWNER_FLAG;
-		}
-	}
-}
 
 /*******************************************************************//**
 Flags the data tuple fields that are marked as extern storage in the

=== modified file 'storage/innodb_plugin/include/btr0cur.h'
--- a/storage/innodb_plugin/include/btr0cur.h	revid:vasil.dimov@stripped
+++ b/storage/innodb_plugin/include/btr0cur.h	revid:marko.makela@stripped
@@ -332,10 +332,14 @@ ulint
 btr_cur_del_mark_set_clust_rec(
 /*===========================*/
 	ulint		flags,	/*!< in: undo logging and locking flags */
-	btr_cur_t*	cursor,	/*!< in: cursor */
+	buf_block_t*	block,	/*!< in/out: buffer block of the record */
+	rec_t*		rec,	/*!< in/out: record */
+	dict_index_t*	index,	/*!< in: clustered index of the record */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec) */
 	ibool		val,	/*!< in: value to set */
 	que_thr_t*	thr,	/*!< in: query thread */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in: mtr */
+	__attribute__((nonnull));
 /***********************************************************//**
 Sets a secondary index record delete mark to TRUE or FALSE.
 @return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
@@ -481,40 +485,22 @@ btr_estimate_number_of_different_key_val
 /*======================================*/
 	dict_index_t*	index);	/*!< in: index */
 /*******************************************************************//**
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
+Marks non-updated off-page fields as disowned by this record. The ownership
+must be transferred to the updated record which is inserted elsewhere in the
 index tree. In purge only the owner of externally stored field is allowed
-to free the field.
-@return TRUE if BLOB ownership was transferred */
+to free the field. */
 UNIV_INTERN
-ibool
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
+void
+btr_cur_disown_inherited_fields(
+/*============================*/
 	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
 				part will be updated, or NULL */
 	rec_t*		rec,	/*!< in/out: record in a clustered index */
 	dict_index_t*	index,	/*!< in: index of the page */
 	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
 	const upd_t*	update,	/*!< in: update vector */
-	mtr_t*		mtr);	/*!< in: mtr, or NULL if not logged */
-/*******************************************************************//**
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-UNIV_INTERN
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
-	dtuple_t*	entry,		/*!< in/out: updated entry to be
-					inserted to clustered index */
-	const upd_t*	update);	/*!< in: update vector */
-/*******************************************************************//**
-Marks all extern fields in a dtuple as owned by the record. */
-UNIV_INTERN
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
-	dtuple_t*	entry);		/*!< in/out: clustered index entry */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull(2,3,4,5,6)));
 /*******************************************************************//**
 Stores the fields in big_rec_vec to the tablespace and puts pointers to
 them in rec.  The extern flags in rec will have to be set beforehand.

=== modified file 'storage/innodb_plugin/include/data0data.h'
--- a/storage/innodb_plugin/include/data0data.h	revid:vasil.dimov@strippedzcz2725
+++ b/storage/innodb_plugin/include/data0data.h	revid:marko.makela@strippedcm7
@@ -154,14 +154,19 @@ dfield_dup(
 	dfield_t*	field,	/*!< in/out: data field */
 	mem_heap_t*	heap);	/*!< in: memory heap where allocated */
 /*********************************************************************//**
-Tests if data length and content is equal for two dfields.
-@return	TRUE if equal */
+Tests if two data fields are equal.
+If len==0, tests the data length and content for equality.
+If len>0, tests the first len bytes of the content for equality.
+@return	TRUE if both fields are NULL or if they are equal */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
 /*==========================*/
 	const dfield_t*	field1,	/*!< in: field */
-	const dfield_t*	field2);/*!< in: field */
+	const dfield_t*	field2,	/*!< in: field */
+	ulint		len)	/*!< in: maximum prefix to compare,
+				or 0 to compare the whole field length */
+	__attribute__((nonnull, warn_unused_result));
 /*********************************************************************//**
 Tests if dfield data length and content is equal to the given.
 @return	TRUE if equal */

=== modified file 'storage/innodb_plugin/include/data0data.ic'
--- a/storage/innodb_plugin/include/data0data.ic	revid:vasil.dimov@oracle.com-20101214093819-wb2offmkgzcz2725
+++ b/storage/innodb_plugin/include/data0data.ic	revid:marko.makela@stripped-20101221112722-1yxxzzgqtem8bcm7
@@ -229,20 +229,30 @@ dfield_dup(
 }
 
 /*********************************************************************//**
-Tests if data length and content is equal for two dfields.
-@return	TRUE if equal */
+Tests if two data fields are equal.
+If len==0, tests the data length and content for equality.
+If len>0, tests the first len bytes of the content for equality.
+@return	TRUE if both fields are NULL or if they are equal */
 UNIV_INLINE
 ibool
 dfield_datas_are_binary_equal(
 /*==========================*/
 	const dfield_t*	field1,	/*!< in: field */
-	const dfield_t*	field2)	/*!< in: field */
+	const dfield_t*	field2,	/*!< in: field */
+	ulint		len)	/*!< in: maximum prefix to compare,
+				or 0 to compare the whole field length */
 {
-	ulint	len;
+	ulint	len2 = len;
 
-	len = field1->len;
+	if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) {
+		len = field1->len;
+	}
 
-	return(len == field2->len
+	if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) {
+		len2 = field2->len;
+	}
+
+	return(len == len2
 	       && (len == UNIV_SQL_NULL
 		   || !memcmp(field1->data, field2->data, len)));
 }

=== modified file 'storage/innodb_plugin/include/row0upd.h'
--- a/storage/innodb_plugin/include/row0upd.h	revid:vasil.dimov@stripped1214093819-wb2offmkgzcz2725
+++ b/storage/innodb_plugin/include/row0upd.h	revid:marko.makela@stripped2722-1yxxzzgqtem8bcm7
@@ -286,10 +286,13 @@ row_upd_changes_ord_field_binary(
 				row and the data values in update are not
 				known when this function is called, e.g., at
 				compile time */
+	const row_ext_t*ext,	/*!< NULL, or prefixes of the externally
+				stored columns in the old row */
 	dict_index_t*	index,	/*!< in: index of the record */
-	const upd_t*	update);/*!< in: update vector for the row; NOTE: the
+	const upd_t*	update)	/*!< in: update vector for the row; NOTE: the
 				field numbers in this MUST be clustered index
 				positions! */
+	__attribute__((nonnull(3,4), warn_unused_result));
 /***********************************************************//**
 Checks if an update vector changes an ordering field of an index record.
 This function is fast if the update vector is short or the number of ordering
@@ -462,11 +465,16 @@ struct upd_node_struct{
 #define UPD_NODE_INSERT_CLUSTERED  3	/* clustered index record should be
 					inserted, old record is already delete
 					marked */
-#define UPD_NODE_UPDATE_ALL_SEC	   4	/* an ordering field of the clustered
+#define UPD_NODE_INSERT_BLOB	   4	/* clustered index record should be
+					inserted, old record is already
+					delete-marked; non-updated BLOBs
+					should be inherited by the new record
+					and disowned by the old record */
+#define UPD_NODE_UPDATE_ALL_SEC	   5	/* an ordering field of the clustered
 					index record was changed, or this is
 					a delete operation: should update
 					all the secondary index records */
-#define	UPD_NODE_UPDATE_SOME_SEC   5	/* secondary index entries should be
+#define UPD_NODE_UPDATE_SOME_SEC   6	/* secondary index entries should be
 					looked at and updated if an ordering
 					field changed */
 

=== modified file 'storage/innodb_plugin/row/row0purge.c'
--- a/storage/innodb_plugin/row/row0purge.c	revid:vasil.dimov@stripped93819-wb2offmkgzcz2725
+++ b/storage/innodb_plugin/row/row0purge.c	revid:marko.makela@strippedxxzzgqtem8bcm7
@@ -413,7 +413,7 @@ row_purge_upd_exist_or_extern(
 	while (node->index != NULL) {
 		index = node->index;
 
-		if (row_upd_changes_ord_field_binary(NULL, node->index,
+		if (row_upd_changes_ord_field_binary(NULL, NULL, node->index,
 						     node->update)) {
 			/* Build the older version of the index entry */
 			entry = row_build_index_entry(node->row, NULL,

=== modified file 'storage/innodb_plugin/row/row0umod.c'
--- a/storage/innodb_plugin/row/row0umod.c	revid:vasil.dimov@stripped101214093819-wb2offmkgzcz2725
+++ b/storage/innodb_plugin/row/row0umod.c	revid:marko.makela@stripped722-1yxxzzgqtem8bcm7
@@ -668,19 +668,18 @@ row_undo_mod_upd_exist_sec(
 	while (node->index != NULL) {
 		index = node->index;
 
-		if (row_upd_changes_ord_field_binary(node->row, node->index,
-						     node->update)) {
+		if (row_upd_changes_ord_field_binary(
+			    node->row, node->ext, node->index, node->update)) {
 
 			/* Build the newest version of the index entry */
 			entry = row_build_index_entry(node->row, node->ext,
 						      index, heap);
 			if (UNIV_UNLIKELY(!entry)) {
 				/* The server must have crashed in
-				row_upd_clust_rec_by_insert(), in
-				row_ins_index_entry_low() before
-				btr_store_big_rec_extern_fields()
-				has written the externally stored columns
-				(BLOBs) of the new clustered index entry. */
+				row_upd_clust_rec_by_insert() before
+				the updated externally stored columns (BLOBs)
+				of the new clustered index entry were
+				written. */
 
 				/* The table must be in DYNAMIC or COMPRESSED
 				format.  REDUNDANT and COMPACT formats

=== modified file 'storage/innodb_plugin/row/row0upd.c'
--- a/storage/innodb_plugin/row/row0upd.c	revid:vasil.dimov@stripped-wb2offmkgzcz2725
+++ b/storage/innodb_plugin/row/row0upd.c	revid:marko.makela@strippedem8bcm7
@@ -1198,20 +1198,21 @@ row_upd_changes_ord_field_binary(
 				row and the data values in update are not
 				known when this function is called, e.g., at
 				compile time */
+	const row_ext_t*ext,	/*!< NULL, or prefixes of the externally
+				stored columns in the old row */
 	dict_index_t*	index,	/*!< in: index of the record */
 	const upd_t*	update)	/*!< in: update vector for the row; NOTE: the
 				field numbers in this MUST be clustered index
 				positions! */
 {
-	ulint		n_unique;
-	ulint		n_upd_fields;
-	ulint		i, j;
-	dict_index_t*	clust_index;
+	ulint			n_unique;
+	ulint			i;
+	const dict_index_t*	clust_index;
 
-	ut_ad(update && index);
+	ut_ad(update);
+	ut_ad(index);
 
 	n_unique = dict_index_get_n_unique(index);
-	n_upd_fields = upd_get_n_fields(update);
 
 	clust_index = dict_table_get_first_index(index->table);
 
@@ -1219,33 +1220,72 @@ row_upd_changes_ord_field_binary(
 
 		const dict_field_t*	ind_field;
 		const dict_col_t*	col;
-		ulint			col_pos;
 		ulint			col_no;
+		const upd_field_t*	upd_field;
+		const dfield_t*		dfield;
+		dfield_t		dfield_ext;
+		ulint			dfield_len;
+		const byte*		buf;
 
 		ind_field = dict_index_get_nth_field(index, i);
 		col = dict_field_get_col(ind_field);
-		col_pos = dict_col_get_clust_pos(col, clust_index);
 		col_no = dict_col_get_no(col);
 
-		for (j = 0; j < n_upd_fields; j++) {
+		upd_field = upd_get_field_by_field_no(
+			update, dict_col_get_clust_pos(col, clust_index));
 
-			const upd_field_t*	upd_field
-				= upd_get_nth_field(update, j);
+		if (upd_field == NULL) {
+			continue;
+		}
+
+		if (row == NULL) {
+			ut_ad(ext == NULL);
+			return(TRUE);
+		}
 
-			/* Note that if the index field is a column prefix
-			then it may be that row does not contain an externally
-			stored part of the column value, and we cannot compare
-			the datas */
+		dfield = dtuple_get_nth_field(row, col_no);
 
-			if (col_pos == upd_field->field_no
-			    && (row == NULL
-				|| ind_field->prefix_len > 0
-				|| !dfield_datas_are_binary_equal(
-					dtuple_get_nth_field(row, col_no),
-					&(upd_field->new_val)))) {
+		/* This treatment of column prefix indexes is loosely
+		based on row_build_index_entry(). */
 
-				return(TRUE);
+		if (UNIV_LIKELY(ind_field->prefix_len == 0)
+		    || dfield_is_null(dfield)) {
+			/* do nothing special */
+		} else if (UNIV_LIKELY_NULL(ext)) {
+			/* See if the column is stored externally. */
+			buf = row_ext_lookup(ext, col_no, &dfield_len);
+
+			ut_ad(col->ord_part);
+
+			if (UNIV_LIKELY_NULL(buf)) {
+				if (UNIV_UNLIKELY(buf == field_ref_zero)) {
+					/* This should never happen, but
+					we try to fail safe here. */
+					ut_ad(0);
+					return(TRUE);
+				}
+
+				goto copy_dfield;
 			}
+		} else if (dfield_is_ext(dfield)) {
+			dfield_len = dfield_get_len(dfield);
+			ut_a(dfield_len > BTR_EXTERN_FIELD_REF_SIZE);
+			dfield_len -= BTR_EXTERN_FIELD_REF_SIZE;
+			ut_a(dict_index_is_clust(index)
+			     || ind_field->prefix_len <= dfield_len);
+			buf = dfield_get_data(dfield);
+copy_dfield:
+			ut_a(dfield_len > 0);
+			dfield_copy(&dfield_ext, dfield);
+			dfield_set_data(&dfield_ext, buf, dfield_len);
+			dfield = &dfield_ext;
+		}
+
+		if (!dfield_datas_are_binary_equal(
+			    dfield, &upd_field->new_val,
+			    ind_field->prefix_len)) {
+
+			return(TRUE);
 		}
 	}
 
@@ -1329,7 +1369,7 @@ row_upd_changes_first_fields_binary(
 			if (col_pos == upd_field->field_no
 			    && !dfield_datas_are_binary_equal(
 				    dtuple_get_nth_field(entry, i),
-				    &(upd_field->new_val))) {
+				    &upd_field->new_val, 0)) {
 
 				return(TRUE);
 			}
@@ -1568,14 +1608,99 @@ row_upd_sec_step(
 	ut_ad(!dict_index_is_clust(node->index));
 
 	if (node->state == UPD_NODE_UPDATE_ALL_SEC
-	    || row_upd_changes_ord_field_binary(node->row, node->index,
-						node->update)) {
+	    || row_upd_changes_ord_field_binary(node->row, node->ext,
+						node->index, node->update)) {
 		return(row_upd_sec_index_entry(node, thr));
 	}
 
 	return(DB_SUCCESS);
 }
 
+#ifdef UNIV_DEBUG
+# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update)	\
+	row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update)
+#else /* UNIV_DEBUG */
+# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update)	\
+	row_upd_clust_rec_by_insert_inherit_func(entry,update)
+#endif /* UNIV_DEBUG */
+/*******************************************************************//**
+Mark non-updated off-page columns inherited when the primary key is
+updated. We must mark them as inherited in entry, so that they are not
+freed in a rollback. A limited version of this function used to be
+called btr_cur_mark_dtuple_inherited_extern().
+@return TRUE if any columns were inherited */
+static __attribute__((warn_unused_result))
+ibool
+row_upd_clust_rec_by_insert_inherit_func(
+/*=====================================*/
+#ifdef UNIV_DEBUG
+	const rec_t*	rec,	/*!< in: old record, or NULL */
+	const ulint*	offsets,/*!< in: rec_get_offsets(rec), or NULL */
+#endif /* UNIV_DEBUG */
+	dtuple_t*	entry,	/*!< in/out: updated entry to be
+				inserted into the clustered index */
+	const upd_t*	update)	/*!< in: update vector */
+{
+	ibool	inherit	= FALSE;
+	ulint	i;
+
+	ut_ad(!rec == !offsets);
+	ut_ad(!rec || rec_offs_any_extern(offsets));
+
+	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
+		dfield_t*	dfield	= dtuple_get_nth_field(entry, i);
+		byte*		data;
+		ulint		len;
+
+		ut_ad(!offsets
+		      || !rec_offs_nth_extern(offsets, i)
+		      == !dfield_is_ext(dfield)
+		      || upd_get_field_by_field_no(update, i));
+		if (!dfield_is_ext(dfield)
+		    || upd_get_field_by_field_no(update, i)) {
+			continue;
+		}
+
+#ifdef UNIV_DEBUG
+		if (UNIV_LIKELY(rec != NULL)) {
+			const byte* rec_data
+				= rec_get_nth_field(rec, offsets, i, &len);
+			ut_ad(len == dfield_get_len(dfield));
+			ut_ad(len != UNIV_SQL_NULL);
+			ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+			rec_data += len - BTR_EXTERN_FIELD_REF_SIZE;
+
+			/* The pointer must not be zero. */
+			ut_ad(memcmp(rec_data, field_ref_zero,
+				     BTR_EXTERN_FIELD_REF_SIZE));
+			/* The BLOB must be owned. */
+			ut_ad(!(rec_data[BTR_EXTERN_LEN]
+				& BTR_EXTERN_OWNER_FLAG));
+		}
+#endif /* UNIV_DEBUG */
+
+		len = dfield_get_len(dfield);
+		ut_a(len != UNIV_SQL_NULL);
+		ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+		data = dfield_get_data(dfield) + len
+			- BTR_EXTERN_FIELD_REF_SIZE;
+		/* The pointer must not be zero. */
+		ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
+		/* The BLOB must be owned. */
+		ut_a(!(data[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
+
+		data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG;
+		/* The BTR_EXTERN_INHERITED_FLAG only matters in
+		rollback. Purge will always free the extern fields of
+		a delete-marked row. */
+
+		inherit = TRUE;
+	}
+
+	return(inherit);
+}
+
 /***********************************************************//**
 Marks the clustered index record deleted and inserts the updated version
 of the record to the index. This function should be used when the ordering
@@ -1594,14 +1719,16 @@ row_upd_clust_rec_by_insert(
 				a foreign key constraint */
 	mtr_t*		mtr)	/*!< in/out: mtr; gets committed here */
 {
-	mem_heap_t*	heap	= NULL;
+	mem_heap_t*	heap;
 	btr_pcur_t*	pcur;
 	btr_cur_t*	btr_cur;
 	trx_t*		trx;
 	dict_table_t*	table;
 	dtuple_t*	entry;
 	ulint		err;
-	ibool		change_ownership = FALSE;
+	ibool		change_ownership	= FALSE;
+	rec_t*		rec;
+	ulint*		offsets			= NULL;
 
 	ut_ad(node);
 	ut_ad(dict_index_is_clust(index));
@@ -1611,77 +1738,112 @@ row_upd_clust_rec_by_insert(
 	pcur = node->pcur;
 	btr_cur	= btr_pcur_get_btr_cur(pcur);
 
-	if (node->state != UPD_NODE_INSERT_CLUSTERED) {
-		rec_t*		rec;
-		dict_index_t*	index;
-		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
-		ulint*		offsets;
-		rec_offs_init(offsets_);
+	heap = mem_heap_create(1000);
+
+	entry = row_build_index_entry(node->upd_row, node->upd_ext,
+				      index, heap);
+	ut_a(entry);
+
+	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
+
+	switch (node->state) {
+	default:
+		ut_error;
+	case UPD_NODE_INSERT_BLOB:
+		/* A lock wait occurred in row_ins_index_entry() in
+		the previous invocation of this function. Mark the
+		off-page columns in the entry inherited. */
+
+		change_ownership = row_upd_clust_rec_by_insert_inherit(
+			NULL, NULL, entry, node->update);
+		ut_a(change_ownership);
+		/* fall through */
+	case UPD_NODE_INSERT_CLUSTERED:
+		/* A lock wait occurred in row_ins_index_entry() in
+		the previous invocation of this function. */
+		break;
+	case UPD_NODE_UPDATE_CLUSTERED:
+		/* This is the first invocation of the function where
+		we update the primary key.  Delete-mark the old record
+		in the clustered index and prepare to insert a new entry. */
+		rec = btr_cur_get_rec(btr_cur);
+		offsets = rec_get_offsets(rec, index, NULL,
+					  ULINT_UNDEFINED, &heap);
+		ut_ad(page_rec_is_user_rec(rec));
 
-		err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
-						     btr_cur, TRUE, thr, mtr);
+		err = btr_cur_del_mark_set_clust_rec(
+			BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
+			rec, index, offsets, TRUE, thr, mtr);
 		if (err != DB_SUCCESS) {
+err_exit:
 			mtr_commit(mtr);
+			mem_heap_free(heap);
 			return(err);
 		}
 
-		/* Mark as not-owned the externally stored fields which the new
-		row inherits from the delete marked record: purge should not
-		free those externally stored fields even if the delete marked
-		record is removed from the index tree, or updated. */
+		/* If the the new row inherits externally stored
+		fields (off-page columns a.k.a. BLOBs) from the
+		delete-marked old record, mark them disowned by the
+		old record and owned by the new entry. */
+
+		if (rec_offs_any_extern(offsets)) {
+			change_ownership = row_upd_clust_rec_by_insert_inherit(
+				rec, offsets, entry, node->update);
+
+			if (change_ownership) {
+				btr_pcur_store_position(pcur, mtr);
+			}
+		}
 
-		rec = btr_cur_get_rec(btr_cur);
-		index = dict_table_get_first_index(table);
-		offsets = rec_get_offsets(rec, index, offsets_,
-					  ULINT_UNDEFINED, &heap);
-		change_ownership = btr_cur_mark_extern_inherited_fields(
-			btr_cur_get_page_zip(btr_cur), rec, index, offsets,
-			node->update, mtr);
 		if (check_ref) {
 			/* NOTE that the following call loses
 			the position of pcur ! */
 			err = row_upd_check_references_constraints(
 				node, pcur, table, index, offsets, thr, mtr);
 			if (err != DB_SUCCESS) {
-				mtr_commit(mtr);
-				if (UNIV_LIKELY_NULL(heap)) {
-					mem_heap_free(heap);
-				}
-				return(err);
+				goto err_exit;
 			}
 		}
 	}
 
 	mtr_commit(mtr);
 
-	if (!heap) {
-		heap = mem_heap_create(500);
-	}
-	node->state = UPD_NODE_INSERT_CLUSTERED;
-
-	entry = row_build_index_entry(node->upd_row, node->upd_ext,
-				      index, heap);
-	ut_a(entry);
+	err = row_ins_index_entry(index, entry,
+				  node->upd_ext ? node->upd_ext->n_ext : 0,
+				  TRUE, thr);
+	node->state = change_ownership
+		? UPD_NODE_INSERT_BLOB
+		: UPD_NODE_INSERT_CLUSTERED;
+
+	if (err == DB_SUCCESS && change_ownership) {
+		/* Mark the non-updated fields disowned by the old record. */
+
+		/* NOTE: this transaction has an x-lock on the record
+		and therefore other transactions cannot modify the
+		record when we have no latch on the page. In addition,
+		we assume that other query threads of the same
+		transaction do not modify the record in the meantime.
+		Therefore we can assert that the restoration of the
+		cursor succeeds. */
 
-	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
+		mtr_start(mtr);
 
-	if (change_ownership) {
-		/* If we return from a lock wait, for example, we may have
-		extern fields marked as not-owned in entry (marked in the
-		if-branch above). We must unmark them, take the ownership
-		back. */
+		if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr)) {
+			ut_error;
+		}
 
-		btr_cur_unmark_dtuple_extern_fields(entry);
+		rec = btr_cur_get_rec(btr_cur);
+		offsets = rec_get_offsets(rec, index, offsets,
+					  ULINT_UNDEFINED, &heap);
+		ut_ad(page_rec_is_user_rec(rec));
 
-		/* We must mark non-updated extern fields in entry as
-		inherited, so that a possible rollback will not free them. */
+		btr_cur_disown_inherited_fields(
+			btr_cur_get_page_zip(btr_cur),
+			rec, index, offsets, node->update, mtr);
 
-		btr_cur_mark_dtuple_inherited_extern(entry, node->update);
+		mtr_commit(mtr);
 	}
 
-	err = row_ins_index_entry(index, entry,
-				  node->upd_ext ? node->upd_ext->n_ext : 0,
-				  TRUE, thr);
 	mem_heap_free(heap);
 
 	return(err);
@@ -1825,8 +1987,9 @@ row_upd_del_mark_clust_rec(
 	/* Mark the clustered index record deleted; we do not have to check
 	locks, because we assume that we have an x-lock on the record */
 
-	err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
-					     btr_cur, TRUE, thr, mtr);
+	err = btr_cur_del_mark_set_clust_rec(
+		BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
+		btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr);
 	if (err == DB_SUCCESS && check_ref) {
 		/* NOTE that the following call loses the position of pcur ! */
 
@@ -1973,7 +2136,8 @@ exit_func:
 
 	row_upd_store_row(node);
 
-	if (row_upd_changes_ord_field_binary(node->row, index, node->update)) {
+	if (row_upd_changes_ord_field_binary(node->row, node->ext, index,
+					     node->update)) {
 
 		/* Update causes an ordering field (ordering fields within
 		the B-tree) of the clustered index record to change: perform
@@ -2042,7 +2206,8 @@ row_upd(
 	}
 
 	if (node->state == UPD_NODE_UPDATE_CLUSTERED
-	    || node->state == UPD_NODE_INSERT_CLUSTERED) {
+	    || node->state == UPD_NODE_INSERT_CLUSTERED
+	    || node->state == UPD_NODE_INSERT_BLOB) {
 
 		log_free_check();
 		err = row_upd_clust_step(node, thr);

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20101221112722-1yxxzzgqtem8bcm7.bundle
Thread
bzr push into mysql-5.1-innodb branch (marko.makela:3668 to 3670) Bug#55284marko.makela21 Dec