List:Commits« Previous MessageNext Message »
From:marko.makela Date:June 29 2010 12:55pm
Subject:bzr commit into mysql-5.1-innodb branch (marko.makela:3529) Bug#54358
View as plain text  
#At file:///home/marko/innobase/dev/mysql2a/5.1-innodb/ based on revid:jimmy.yang@strippedm5sogruzvb

 3529 Marko Mäkelä	2010-06-29
      Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED
      columns
      
      When the server crashes after a record stub has been inserted and
      before all its off-page columns have been written, the record will
      contain incomplete off-page columns after crash recovery. Such records
      may only be accessed at the READ UNCOMMITTED isolation level or when
      rolling back a recovered transaction in recv_recovery_rollback_active().
      Skip these records at the READ UNCOMMITTED isolation level.
      
      TODO: Add assertions for checking the above assumptions hold when an
      incomplete BLOB is encountered.
      
      btr_rec_copy_externally_stored_field(): Return NULL if the field is
      incomplete.
      
      row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this
      context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs.
      
      row_sel_store_mysql_rec(): Return FALSE if not all columns could be
      retrieved. Previously this function always returned TRUE.  Assert that
      the record is not delete-marked.
      
      row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns
      could be retrieved.
      
      row_search_for_mysql(): Skip records containing incomplete off-page
      columns. Assert that the transaction isolation level is READ
      UNCOMMITTED.
      
      rb://380 approved by Jimmy Yang

    modified:
      storage/innodb_plugin/btr/btr0cur.c
      storage/innodb_plugin/include/btr0cur.h
      storage/innodb_plugin/include/row0mysql.h
      storage/innodb_plugin/row/row0merge.c
      storage/innodb_plugin/row/row0sel.c
=== modified file 'storage/innodb_plugin/btr/btr0cur.c'
--- a/storage/innodb_plugin/btr/btr0cur.c	revid:jimmy.yang@stripped
+++ b/storage/innodb_plugin/btr/btr0cur.c	revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j
@@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field(
 
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(
@@ -4844,6 +4844,28 @@ btr_rec_copy_externally_stored_field(
 
 	data = rec_get_nth_field(rec, offsets, no, &local_len);
 
+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	if (UNIV_UNLIKELY
+	    (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+		     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+		/* The externally stored field was not written
+		yet. This is only a valid condition when the server
+		crashed after the time a record stub was freshly
+		inserted but before all its columns were written. This
+		record should only be seen by
+		recv_recovery_rollback_active() or any
+		TRX_ISO_READ_UNCOMMITTED transactions. */
+
+		/* TODO: assert that there is an owner_trx with
+		owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
+
+		/* TODO: assert that for the current transaction trx,
+		either (trx == owner_trx && trx_is_recv(trx)) or
+		trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
+		return(NULL);
+	}
+
 	return(btr_copy_externally_stored_field(len, data,
 						zip_size, local_len, heap));
 }

=== modified file 'storage/innodb_plugin/include/btr0cur.h'
--- a/storage/innodb_plugin/include/btr0cur.h	revid:jimmy.yang@strippedsogruzvb
+++ b/storage/innodb_plugin/include/btr0cur.h	revid:marko.makela@stripped0j
@@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix(
 	ulint		local_len);/*!< in: length of data, in bytes */
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(

=== modified file 'storage/innodb_plugin/include/row0mysql.h'
--- a/storage/innodb_plugin/include/row0mysql.h	revid:jimmy.yang@stripped9024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/include/row0mysql.h	revid:marko.makela@stripped518-m3am4ia1ffjr0d0j
@@ -622,7 +622,11 @@ struct row_prebuilt_struct {
 					the secondary index, then this is
 					set to TRUE */
 	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
-					BLOB column(s) */
+					a column with DATA_BLOB ==
+					get_innobase_type_from_mysql_type();
+					not to be confused with InnoDB
+					externally stored columns
+					(VARCHAR can be off-page too) */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template

=== modified file 'storage/innodb_plugin/row/row0merge.c'
--- a/storage/innodb_plugin/row/row0merge.c	revid:jimmy.yang@oracle.com-20100629024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0merge.c	revid:marko.makela@stripped100629125518-m3am4ia1ffjr0d0j
@@ -1780,6 +1780,11 @@ row_merge_copy_blobs(
 		(below). */
 		data = btr_rec_copy_externally_stored_field(
 			mrec, offsets, zip_size, i, &len, heap);
+		/* Because we have locked the table, any records
+		written by incomplete transactions must have been
+		rolled back already. There must not be any incomplete
+		BLOB columns. */
+		ut_a(data);
 
 		dfield_set_data(field, data, len);
 	}

=== modified file 'storage/innodb_plugin/row/row0sel.c'
--- a/storage/innodb_plugin/row/row0sel.c	revid:jimmy.yang@strippedacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0sel.c	revid:marko.makela@stripped0j
@@ -416,7 +416,7 @@ row_sel_fetch_columns(
 							      field_no))) {
 
 				/* Copy an externally stored field to the
-				temporary heap */
+				temporary heap, if possible. */
 
 				heap = mem_heap_create(1);
 
@@ -425,6 +425,21 @@ row_sel_fetch_columns(
 					dict_table_zip_size(index->table),
 					field_no, &len, heap);
 
+				/* data == NULL means that the
+				externally stored field was not
+				written yet. This is only a valid
+				condition when the server crashed
+				after the time a record stub was
+				freshly inserted but before all its
+				columns were written. This record
+				should only be seen by
+				recv_recovery_rollback_active() or any
+				TRX_ISO_READ_UNCOMMITTED
+				transactions. The InnoDB SQL parser
+				(the sole caller of this function)
+				does not implement READ UNCOMMITTED,
+				and it is not involved during rollback. */
+				ut_a(data);
 				ut_a(len != UNIV_SQL_NULL);
 
 				needs_copy = TRUE;
@@ -926,6 +941,7 @@ row_sel_get_clust_rec(
 	when plan->clust_pcur was positioned.  The latch will not be
 	released until mtr_commit(mtr). */
 
+	ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
 	row_sel_fetch_columns(index, clust_rec, offsets,
 			      UT_LIST_GET_FIRST(plan->columns));
 	*out_rec = clust_rec;
@@ -1628,6 +1644,13 @@ skip_lock:
 				}
 
 				if (old_vers == NULL) {
+					/* The record does not exist
+					in our read view. Skip it, but
+					first attempt to determine
+					whether the index segment we
+					are searching through has been
+					exhausted. */
+
 					offsets = rec_get_offsets(
 						rec, index, offsets,
 						ULINT_UNDEFINED, &heap);
@@ -2647,9 +2670,8 @@ Convert a row in the Innobase format to
 Note that the template in prebuilt may advise us to copy only a few
 columns to mysql_rec, other columns are left blank. All columns may not
 be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
 ibool
 row_sel_store_mysql_rec(
 /*====================*/
@@ -2672,6 +2694,7 @@ row_sel_store_mysql_rec(
 	ut_ad(prebuilt->mysql_template);
 	ut_ad(prebuilt->default_rec);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
 	if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
 		mem_heap_free(prebuilt->blob_heap);
@@ -2719,6 +2742,26 @@ row_sel_store_mysql_rec(
 				dict_table_zip_size(prebuilt->table),
 				templ->rec_field_no, &len, heap);
 
+			if (UNIV_UNLIKELY(!data)) {
+				/* The externally stored field
+				was not written yet. This is
+				only a valid condition when
+				the server crashed after the
+				time a record stub was freshly
+				inserted but before all its
+				columns were written. This
+				record should only be seen by
+				recv_recovery_rollback_active()
+				or any TRX_ISO_READ_UNCOMMITTED
+				transactions. */
+
+				if (extern_field_heap) {
+					mem_heap_free(extern_field_heap);
+				}
+
+				return(FALSE);
+			}
+
 			ut_a(len != UNIV_SQL_NULL);
 		} else {
 			/* Field is stored in the row. */
@@ -3136,9 +3179,10 @@ row_sel_pop_cached_row_for_mysql(
 }
 
 /********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
 row_sel_push_cache_row_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
@@ -3180,10 +3224,11 @@ row_sel_push_cache_row_for_mysql(
 				  prebuilt->fetch_cache[
 					  prebuilt->n_fetch_cached],
 				  prebuilt, rec, offsets))) {
-		ut_error;
+		return(FALSE);
 	}
 
 	prebuilt->n_fetch_cached++;
+	return(TRUE);
 }
 
 /*********************************************************************//**
@@ -3578,6 +3623,24 @@ row_search_for_mysql(
 
 				if (!row_sel_store_mysql_rec(buf, prebuilt,
 							     rec, offsets)) {
+					/* Only fresh inserts at
+					server crash time may contain
+					incomplete externally stored
+					columns. Pretend that such
+					records do not exist. Such
+					records may only be accessed
+					at the READ UNCOMMITTED
+					isolation level or when
+					rolling back a recovered
+					transaction. Rollback happens
+					at a lower level, not here. */
+					ut_a(trx->isolation_level
+					     == TRX_ISO_READ_UNCOMMITTED);
+					/* TODO: assert that there is
+					an owner_trx with
+					owner_trx->id == DB_TRX_ID and
+					owner_trx->is_recovered */
+
 					err = DB_TOO_BIG_RECORD;
 
 					/* We let the main loop to do the
@@ -4357,9 +4420,20 @@ requires_clust_rec:
 		not cache rows because there the cursor is a scrollable
 		cursor. */
 
-		row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
-						 offsets);
-		if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+		if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+						      offsets)) {
+			/* Only fresh inserts at server crash time may contain
+			incomplete externally stored columns. Pretend that
+			such records do not exist. Such records may only be
+			accessed at the READ UNCOMMITTED isolation level or
+			when rolling back a recovered transaction. Rollback
+			happens at a lower level, not here. */
+			ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+			/* TODO: assert that there is an owner_trx
+			with owner_trx->id == DB_TRX_ID and
+			owner_trx->is_recovered */
+		} else if (prebuilt->n_fetch_cached
+			   == MYSQL_FETCH_CACHE_SIZE) {
 
 			goto got_row;
 		}
@@ -4375,9 +4449,20 @@ requires_clust_rec:
 		} else {
 			if (!row_sel_store_mysql_rec(buf, prebuilt,
 						     result_rec, offsets)) {
-				err = DB_TOO_BIG_RECORD;
-
-				goto lock_wait_or_error;
+				/* Only fresh inserts at server crash
+				time may contain incomplete externally
+				stored columns. Pretend that such
+				records do not exist. Such records may
+				only be accessed at the READ UNCOMMITTED
+				isolation level or when rolling back a
+				recovered transaction. Rollback happens
+				at a lower level, not here. */
+				ut_a(trx->isolation_level
+				     == TRX_ISO_READ_UNCOMMITTED);
+				/* TODO: assert that there is an owner_trx
+				with owner_trx->id == DB_TRX_ID and
+				owner_trx->is_recovered */
+				goto next_rec;
 			}
 		}
 

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j.bundle
Thread
bzr commit into mysql-5.1-innodb branch (marko.makela:3529) Bug#54358marko.makela29 Jun