List:Commits« Previous MessageNext Message »
From:marko.makela Date:June 29 2010 1:13pm
Subject:bzr push into mysql-5.1-innodb branch (marko.makela:3528 to 3532) Bug#54408
View as plain text  
 3532 Marko Mäkelä	2010-06-29
      ChangeLog entry for Bug #54408

    modified:
      storage/innodb_plugin/ChangeLog
 3531 Marko Mäkelä	2010-06-29
      Bug#54408: txn rollback after recovery: row0umod.c:673
      dict_table_get_format(index->table)
      
      The REDUNDANT and COMPACT formats store a local 768-byte prefix of
      each externally stored column. No row_ext cache is needed, but we
      initialized one nevertheless. When the BLOB pointer was zero, we would
      ignore the locally stored prefix as well. This triggered an assertion
      failure in row_undo_mod_upd_exist_sec().
      
      row_build(): Allow ext==NULL when a REDUNDANT or COMPACT table
      contains externally stored columns.
      
      row_undo_search_clust_to_pcur(), row_upd_store_row(): Invoke
      row_build() with ext==NULL on REDUNDANT and COMPACT tables.
      
      rb://382 approved by Jimmy Yang

    modified:
      storage/innodb_plugin/row/row0row.c
      storage/innodb_plugin/row/row0undo.c
      storage/innodb_plugin/row/row0upd.c
 3530 Marko Mäkelä	2010-06-29
      ChangeLog entry for Bug #54358

    modified:
      storage/innodb_plugin/ChangeLog
 3529 Marko Mäkelä	2010-06-29
      Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED
      columns
      
      When the server crashes after a record stub has been inserted and
      before all its off-page columns have been written, the record will
      contain incomplete off-page columns after crash recovery. Such records
      may only be accessed at the READ UNCOMMITTED isolation level or when
      rolling back a recovered transaction in recv_recovery_rollback_active().
      Skip these records at the READ UNCOMMITTED isolation level.
      
      TODO: Add assertions for checking the above assumptions hold when an
      incomplete BLOB is encountered.
      
      btr_rec_copy_externally_stored_field(): Return NULL if the field is
      incomplete.
      
      row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this
      context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs.
      
      row_sel_store_mysql_rec(): Return FALSE if not all columns could be
      retrieved. Previously this function always returned TRUE.  Assert that
      the record is not delete-marked.
      
      row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns
      could be retrieved.
      
      row_search_for_mysql(): Skip records containing incomplete off-page
      columns. Assert that the transaction isolation level is READ
      UNCOMMITTED.
      
      rb://380 approved by Jimmy Yang

    modified:
      storage/innodb_plugin/btr/btr0cur.c
      storage/innodb_plugin/include/btr0cur.h
      storage/innodb_plugin/include/row0mysql.h
      storage/innodb_plugin/row/row0merge.c
      storage/innodb_plugin/row/row0sel.c
 3528 Jimmy Yang	2010-06-28
      Check in fix for bug #53756: "ALTER TABLE ADD PRIMARY KEY affects
      crash recovery"
      
      rb://369 approved by Marko

    added:
      mysql-test/suite/innodb/r/innodb_bug53756.result
      mysql-test/suite/innodb/t/innodb_bug53756.test
    modified:
      storage/innobase/dict/dict0load.c
=== modified file 'storage/innodb_plugin/ChangeLog'
--- a/storage/innodb_plugin/ChangeLog	revid:jimmy.yang@stripped
+++ b/storage/innodb_plugin/ChangeLog	revid:marko.makela@oracle.com-20100629131219-pjbkpk5rsqztmw27
@@ -1,3 +1,15 @@
+2010-06-29	The InnoDB Team
+	* row/row0row.c, row/row0undo.c, row/row0upd.c:
+	Fix Bug#54408 txn rollback after recovery: row0umod.c:673
+	dict_table_get_format(index->table)
+
+2010-06-29	The InnoDB Team
+
+	* btr/btr0cur.c, include/btr0cur.h,
+	include/row0mysql.h, row/row0merge.c, row/row0sel.c:
+	Fix Bug#54358 READ UNCOMMITTED access failure of off-page DYNAMIC
+	or COMPRESSED columns
+
 2010-06-24	The InnoDB Team
 
 	* handler/ha_innodb.cc:

=== modified file 'storage/innodb_plugin/btr/btr0cur.c'
--- a/storage/innodb_plugin/btr/btr0cur.c	revid:jimmy.yang@oracle.com-20100629024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/btr/btr0cur.c	revid:marko.makela@stripped-20100629131219-pjbkpk5rsqztmw27
@@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field(
 
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(
@@ -4844,6 +4844,28 @@ btr_rec_copy_externally_stored_field(
 
 	data = rec_get_nth_field(rec, offsets, no, &local_len);
 
+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	if (UNIV_UNLIKELY
+	    (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+		     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+		/* The externally stored field was not written
+		yet. This is only a valid condition when the server
+		crashed after the time a record stub was freshly
+		inserted but before all its columns were written. This
+		record should only be seen by
+		recv_recovery_rollback_active() or any
+		TRX_ISO_READ_UNCOMMITTED transactions. */
+
+		/* TODO: assert that there is an owner_trx with
+		owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
+
+		/* TODO: assert that for the current transaction trx,
+		either (trx == owner_trx && trx_is_recv(trx)) or
+		trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
+		return(NULL);
+	}
+
 	return(btr_copy_externally_stored_field(len, data,
 						zip_size, local_len, heap));
 }

=== modified file 'storage/innodb_plugin/include/btr0cur.h'
--- a/storage/innodb_plugin/include/btr0cur.h	revid:jimmy.yang@strippeduzvb
+++ b/storage/innodb_plugin/include/btr0cur.h	revid:marko.makela@stripped
@@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix(
 	ulint		local_len);/*!< in: length of data, in bytes */
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(

=== modified file 'storage/innodb_plugin/include/row0mysql.h'
--- a/storage/innodb_plugin/include/row0mysql.h	revid:jimmy.yang@stripped137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/include/row0mysql.h	revid:marko.makela@strippedpjbkpk5rsqztmw27
@@ -622,7 +622,11 @@ struct row_prebuilt_struct {
 					the secondary index, then this is
 					set to TRUE */
 	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
-					BLOB column(s) */
+					a column with DATA_BLOB ==
+					get_innobase_type_from_mysql_type();
+					not to be confused with InnoDB
+					externally stored columns
+					(VARCHAR can be off-page too) */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template

=== modified file 'storage/innodb_plugin/row/row0merge.c'
--- a/storage/innodb_plugin/row/row0merge.c	revid:jimmy.yang@strippedm-20100629024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0merge.c	revid:marko.makela@stripped29131219-pjbkpk5rsqztmw27
@@ -1780,6 +1780,11 @@ row_merge_copy_blobs(
 		(below). */
 		data = btr_rec_copy_externally_stored_field(
 			mrec, offsets, zip_size, i, &len, heap);
+		/* Because we have locked the table, any records
+		written by incomplete transactions must have been
+		rolled back already. There must not be any incomplete
+		BLOB columns. */
+		ut_a(data);
 
 		dfield_set_data(field, data, len);
 	}

=== modified file 'storage/innodb_plugin/row/row0row.c'
--- a/storage/innodb_plugin/row/row0row.c	revid:jimmy.yang@strippedsogruzvb
+++ b/storage/innodb_plugin/row/row0row.c	revid:marko.makela@stripped
@@ -294,7 +294,13 @@ row_build(
 
 	ut_ad(dtuple_check_typed(row));
 
-	if (j) {
+	if (!ext) {
+		/* REDUNDANT and COMPACT formats store a local
+		768-byte prefix of each externally stored
+		column. No cache is needed. */
+		ut_ad(dict_table_get_format(index->table)
+		      < DICT_TF_FORMAT_ZIP);
+	} else if (j) {
 		*ext = row_ext_create(j, ext_cols, row,
 				      dict_table_zip_size(index->table),
 				      heap);

=== modified file 'storage/innodb_plugin/row/row0sel.c'
--- a/storage/innodb_plugin/row/row0sel.c	revid:jimmy.yang@strippedsacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0sel.c	revid:marko.makela@strippedw27
@@ -416,7 +416,7 @@ row_sel_fetch_columns(
 							      field_no))) {
 
 				/* Copy an externally stored field to the
-				temporary heap */
+				temporary heap, if possible. */
 
 				heap = mem_heap_create(1);
 
@@ -425,6 +425,21 @@ row_sel_fetch_columns(
 					dict_table_zip_size(index->table),
 					field_no, &len, heap);
 
+				/* data == NULL means that the
+				externally stored field was not
+				written yet. This is only a valid
+				condition when the server crashed
+				after the time a record stub was
+				freshly inserted but before all its
+				columns were written. This record
+				should only be seen by
+				recv_recovery_rollback_active() or any
+				TRX_ISO_READ_UNCOMMITTED
+				transactions. The InnoDB SQL parser
+				(the sole caller of this function)
+				does not implement READ UNCOMMITTED,
+				and it is not involved during rollback. */
+				ut_a(data);
 				ut_a(len != UNIV_SQL_NULL);
 
 				needs_copy = TRUE;
@@ -926,6 +941,7 @@ row_sel_get_clust_rec(
 	when plan->clust_pcur was positioned.  The latch will not be
 	released until mtr_commit(mtr). */
 
+	ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
 	row_sel_fetch_columns(index, clust_rec, offsets,
 			      UT_LIST_GET_FIRST(plan->columns));
 	*out_rec = clust_rec;
@@ -1628,6 +1644,13 @@ skip_lock:
 				}
 
 				if (old_vers == NULL) {
+					/* The record does not exist
+					in our read view. Skip it, but
+					first attempt to determine
+					whether the index segment we
+					are searching through has been
+					exhausted. */
+
 					offsets = rec_get_offsets(
 						rec, index, offsets,
 						ULINT_UNDEFINED, &heap);
@@ -2647,9 +2670,8 @@ Convert a row in the Innobase format to
 Note that the template in prebuilt may advise us to copy only a few
 columns to mysql_rec, other columns are left blank. All columns may not
 be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
 ibool
 row_sel_store_mysql_rec(
 /*====================*/
@@ -2672,6 +2694,7 @@ row_sel_store_mysql_rec(
 	ut_ad(prebuilt->mysql_template);
 	ut_ad(prebuilt->default_rec);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
 	if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
 		mem_heap_free(prebuilt->blob_heap);
@@ -2719,6 +2742,26 @@ row_sel_store_mysql_rec(
 				dict_table_zip_size(prebuilt->table),
 				templ->rec_field_no, &len, heap);
 
+			if (UNIV_UNLIKELY(!data)) {
+				/* The externally stored field
+				was not written yet. This is
+				only a valid condition when
+				the server crashed after the
+				time a record stub was freshly
+				inserted but before all its
+				columns were written. This
+				record should only be seen by
+				recv_recovery_rollback_active()
+				or any TRX_ISO_READ_UNCOMMITTED
+				transactions. */
+
+				if (extern_field_heap) {
+					mem_heap_free(extern_field_heap);
+				}
+
+				return(FALSE);
+			}
+
 			ut_a(len != UNIV_SQL_NULL);
 		} else {
 			/* Field is stored in the row. */
@@ -3136,9 +3179,10 @@ row_sel_pop_cached_row_for_mysql(
 }
 
 /********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
 row_sel_push_cache_row_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
@@ -3180,10 +3224,11 @@ row_sel_push_cache_row_for_mysql(
 				  prebuilt->fetch_cache[
 					  prebuilt->n_fetch_cached],
 				  prebuilt, rec, offsets))) {
-		ut_error;
+		return(FALSE);
 	}
 
 	prebuilt->n_fetch_cached++;
+	return(TRUE);
 }
 
 /*********************************************************************//**
@@ -3578,6 +3623,24 @@ row_search_for_mysql(
 
 				if (!row_sel_store_mysql_rec(buf, prebuilt,
 							     rec, offsets)) {
+					/* Only fresh inserts at
+					server crash time may contain
+					incomplete externally stored
+					columns. Pretend that such
+					records do not exist. Such
+					records may only be accessed
+					at the READ UNCOMMITTED
+					isolation level or when
+					rolling back a recovered
+					transaction. Rollback happens
+					at a lower level, not here. */
+					ut_a(trx->isolation_level
+					     == TRX_ISO_READ_UNCOMMITTED);
+					/* TODO: assert that there is
+					an owner_trx with
+					owner_trx->id == DB_TRX_ID and
+					owner_trx->is_recovered */
+
 					err = DB_TOO_BIG_RECORD;
 
 					/* We let the main loop to do the
@@ -4357,9 +4420,20 @@ requires_clust_rec:
 		not cache rows because there the cursor is a scrollable
 		cursor. */
 
-		row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
-						 offsets);
-		if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+		if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+						      offsets)) {
+			/* Only fresh inserts at server crash time may contain
+			incomplete externally stored columns. Pretend that
+			such records do not exist. Such records may only be
+			accessed at the READ UNCOMMITTED isolation level or
+			when rolling back a recovered transaction. Rollback
+			happens at a lower level, not here. */
+			ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+			/* TODO: assert that there is an owner_trx
+			with owner_trx->id == DB_TRX_ID and
+			owner_trx->is_recovered */
+		} else if (prebuilt->n_fetch_cached
+			   == MYSQL_FETCH_CACHE_SIZE) {
 
 			goto got_row;
 		}
@@ -4375,9 +4449,20 @@ requires_clust_rec:
 		} else {
 			if (!row_sel_store_mysql_rec(buf, prebuilt,
 						     result_rec, offsets)) {
-				err = DB_TOO_BIG_RECORD;
-
-				goto lock_wait_or_error;
+				/* Only fresh inserts at server crash
+				time may contain incomplete externally
+				stored columns. Pretend that such
+				records do not exist. Such records may
+				only be accessed at the READ UNCOMMITTED
+				isolation level or when rolling back a
+				recovered transaction. Rollback happens
+				at a lower level, not here. */
+				ut_a(trx->isolation_level
+				     == TRX_ISO_READ_UNCOMMITTED);
+				/* TODO: assert that there is an owner_trx
+				with owner_trx->id == DB_TRX_ID and
+				owner_trx->is_recovered */
+				goto next_rec;
 			}
 		}
 

=== modified file 'storage/innodb_plugin/row/row0undo.c'
--- a/storage/innodb_plugin/row/row0undo.c	revid:jimmy.yang@stripped690sacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0undo.c	revid:marko.makela@strippedqztmw27
@@ -199,8 +199,24 @@ row_undo_search_clust_to_pcur(
 
 		ret = FALSE;
 	} else {
+		row_ext_t**	ext;
+
+		if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
+			/* In DYNAMIC or COMPRESSED format, there is
+			no prefix of externally stored columns in the
+			clustered index record. Build a cache of
+			column prefixes. */
+			ext = &node->ext;
+		} else {
+			/* REDUNDANT and COMPACT formats store a local
+			768-byte prefix of each externally stored
+			column. No cache is needed. */
+			ext = NULL;
+			node->ext = NULL;
+		}
+
 		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
-				      offsets, NULL, &node->ext, node->heap);
+				      offsets, NULL, ext, node->heap);
 		if (node->update) {
 			node->undo_row = dtuple_copy(node->row, node->heap);
 			row_upd_replace(node->undo_row, &node->undo_ext,

=== modified file 'storage/innodb_plugin/row/row0upd.c'
--- a/storage/innodb_plugin/row/row0upd.c	revid:jimmy.yang@stripped-20100629024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0upd.c	revid:marko.makela@stripped31219-pjbkpk5rsqztmw27
@@ -1398,6 +1398,7 @@ row_upd_store_row(
 	dict_index_t*	clust_index;
 	rec_t*		rec;
 	mem_heap_t*	heap		= NULL;
+	row_ext_t**	ext;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	const ulint*	offsets;
 	rec_offs_init(offsets_);
@@ -1414,8 +1415,22 @@ row_upd_store_row(
 
 	offsets = rec_get_offsets(rec, clust_index, offsets_,
 				  ULINT_UNDEFINED, &heap);
+
+	if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
+		/* In DYNAMIC or COMPRESSED format, there is no prefix
+		of externally stored columns in the clustered index
+		record. Build a cache of column prefixes. */
+		ext = &node->ext;
+	} else {
+		/* REDUNDANT and COMPACT formats store a local
+		768-byte prefix of each externally stored column.
+		No cache is needed. */
+		ext = NULL;
+		node->ext = NULL;
+	}
+
 	node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
-			      NULL, &node->ext, node->heap);
+			      NULL, ext, node->heap);
 	if (node->is_delete) {
 		node->upd_row = NULL;
 		node->upd_ext = NULL;

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20100629131219-pjbkpk5rsqztmw27.bundle
Thread
bzr push into mysql-5.1-innodb branch (marko.makela:3528 to 3532) Bug#54408marko.makela29 Jun