List:Commits« Previous MessageNext Message »
From:marko.makela Date:June 29 2010 1:19pm
Subject:bzr commit into mysql-trunk-innodb branch (marko.makela:3127) Bug#54358
View as plain text  
#At file:///home/marko/innobase/dev/mysql2a/5.5-innodb/ based on revid:marko.makela@stripped8lnzr44z94gg

 3127 Marko Mäkelä	2010-06-29
      Merge Bug#54358 fix from mysql-5.1-innodb:
      ------------------------------------------------------------
      revno: 3529
      revision-id: marko.makela@stripped629125518-m3am4ia1ffjr0d0j
      parent: jimmy.yang@oracle.com-20100629024137-690sacm5sogruzvb
      committer: Marko Mäkelä <marko.makela@stripped>
      branch nick: 5.1-innodb
      timestamp: Tue 2010-06-29 15:55:18 +0300
      message:
        Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED
        columns
      
        When the server crashes after a record stub has been inserted and
        before all its off-page columns have been written, the record will
        contain incomplete off-page columns after crash recovery. Such records
        may only be accessed at the READ UNCOMMITTED isolation level or when
        rolling back a recovered transaction in recv_recovery_rollback_active().
        Skip these records at the READ UNCOMMITTED isolation level.
      
        TODO: Add assertions for checking the above assumptions hold when an
        incomplete BLOB is encountered.
      
        btr_rec_copy_externally_stored_field(): Return NULL if the field is
        incomplete.
      
        row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this
        context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs.
      
        row_sel_store_mysql_rec(): Return FALSE if not all columns could be
        retrieved. Previously this function always returned TRUE.  Assert that
        the record is not delete-marked.
      
        row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns
        could be retrieved.
      
        row_search_for_mysql(): Skip records containing incomplete off-page
        columns. Assert that the transaction isolation level is READ
        UNCOMMITTED.
      
        rb://380 approved by Jimmy Yang

    modified:
      storage/innobase/btr/btr0cur.c
      storage/innobase/include/btr0cur.h
      storage/innobase/include/row0mysql.h
      storage/innobase/row/row0merge.c
      storage/innobase/row/row0sel.c
=== modified file 'storage/innobase/btr/btr0cur.c'
--- a/storage/innobase/btr/btr0cur.c	revid:marko.makela@stripped-20100629113248-fvl48lnzr44z94gg
+++ b/storage/innobase/btr/btr0cur.c	revid:marko.makela@stripped-epjs6h2rv457h7qv
@@ -4934,7 +4934,7 @@ btr_copy_externally_stored_field(
 
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(
@@ -4964,6 +4964,28 @@ btr_rec_copy_externally_stored_field(
 
 	data = rec_get_nth_field(rec, offsets, no, &local_len);
 
+	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+	if (UNIV_UNLIKELY
+	    (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+		     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+		/* The externally stored field was not written
+		yet. This is only a valid condition when the server
+		crashed after the time a record stub was freshly
+		inserted but before all its columns were written. This
+		record should only be seen by
+		recv_recovery_rollback_active() or any
+		TRX_ISO_READ_UNCOMMITTED transactions. */
+
+		/* TODO: assert that there is an owner_trx with
+		owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
+
+		/* TODO: assert that for the current transaction trx,
+		either (trx == owner_trx && trx_is_recv(trx)) or
+		trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
+		return(NULL);
+	}
+
 	return(btr_copy_externally_stored_field(len, data,
 						zip_size, local_len, heap));
 }

=== modified file 'storage/innobase/include/btr0cur.h'
--- a/storage/innobase/include/btr0cur.h	revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg
+++ b/storage/innobase/include/btr0cur.h	revid:marko.makela@stripped0629131907-epjs6h2rv457h7qv
@@ -558,7 +558,7 @@ btr_copy_externally_stored_field_prefix(
 	ulint		local_len);/*!< in: length of data, in bytes */
 /*******************************************************************//**
 Copies an externally stored field of a record to mem heap.
-@return	the field copied to heap */
+@return	the field copied to heap, or NULL if the field is incomplete */
 UNIV_INTERN
 byte*
 btr_rec_copy_externally_stored_field(

=== modified file 'storage/innobase/include/row0mysql.h'
--- a/storage/innobase/include/row0mysql.h	revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg
+++ b/storage/innobase/include/row0mysql.h	revid:marko.makela@stripped-20100629131907-epjs6h2rv457h7qv
@@ -633,7 +633,11 @@ struct row_prebuilt_struct {
 					the secondary index, then this is
 					set to TRUE */
 	unsigned	templ_contains_blob:1;/*!< TRUE if the template contains
-					BLOB column(s) */
+					a column with DATA_BLOB ==
+					get_innobase_type_from_mysql_type();
+					not to be confused with InnoDB
+					externally stored columns
+					(VARCHAR can be off-page too) */
 	mysql_row_templ_t* mysql_template;/*!< template used to transform
 					rows fast between MySQL and Innobase
 					formats; memory for this template

=== modified file 'storage/innobase/row/row0merge.c'
--- a/storage/innobase/row/row0merge.c	revid:marko.makela@oracle.com-20100629113248-fvl48lnzr44z94gg
+++ b/storage/innobase/row/row0merge.c	revid:marko.makela@stripped00629131907-epjs6h2rv457h7qv
@@ -1779,6 +1779,11 @@ row_merge_copy_blobs(
 		(below). */
 		data = btr_rec_copy_externally_stored_field(
 			mrec, offsets, zip_size, i, &len, heap);
+		/* Because we have locked the table, any records
+		written by incomplete transactions must have been
+		rolled back already. There must not be any incomplete
+		BLOB columns. */
+		ut_a(data);
 
 		dfield_set_data(field, data, len);
 	}

=== modified file 'storage/innobase/row/row0sel.c'
--- a/storage/innobase/row/row0sel.c	revid:marko.makela@stripped4gg
+++ b/storage/innobase/row/row0sel.c	revid:marko.makela@stripped
@@ -414,7 +414,7 @@ row_sel_fetch_columns(
 							      field_no))) {
 
 				/* Copy an externally stored field to the
-				temporary heap */
+				temporary heap, if possible. */
 
 				heap = mem_heap_create(1);
 
@@ -423,6 +423,21 @@ row_sel_fetch_columns(
 					dict_table_zip_size(index->table),
 					field_no, &len, heap);
 
+				/* data == NULL means that the
+				externally stored field was not
+				written yet. This is only a valid
+				condition when the server crashed
+				after the time a record stub was
+				freshly inserted but before all its
+				columns were written. This record
+				should only be seen by
+				recv_recovery_rollback_active() or any
+				TRX_ISO_READ_UNCOMMITTED
+				transactions. The InnoDB SQL parser
+				(the sole caller of this function)
+				does not implement READ UNCOMMITTED,
+				and it is not involved during rollback. */
+				ut_a(data);
 				ut_a(len != UNIV_SQL_NULL);
 
 				needs_copy = TRUE;
@@ -924,6 +939,7 @@ row_sel_get_clust_rec(
 	when plan->clust_pcur was positioned.  The latch will not be
 	released until mtr_commit(mtr). */
 
+	ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
 	row_sel_fetch_columns(index, clust_rec, offsets,
 			      UT_LIST_GET_FIRST(plan->columns));
 	*out_rec = clust_rec;
@@ -1626,6 +1642,13 @@ skip_lock:
 				}
 
 				if (old_vers == NULL) {
+					/* The record does not exist
+					in our read view. Skip it, but
+					first attempt to determine
+					whether the index segment we
+					are searching through has been
+					exhausted. */
+
 					offsets = rec_get_offsets(
 						rec, index, offsets,
 						ULINT_UNDEFINED, &heap);
@@ -2639,9 +2662,8 @@ Convert a row in the Innobase format to
 Note that the template in prebuilt may advise us to copy only a few
 columns to mysql_rec, other columns are left blank. All columns may not
 be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
 ibool
 row_sel_store_mysql_rec(
 /*====================*/
@@ -2664,6 +2686,7 @@ row_sel_store_mysql_rec(
 	ut_ad(prebuilt->mysql_template);
 	ut_ad(prebuilt->default_rec);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
+	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
 
 	if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
 		mem_heap_free(prebuilt->blob_heap);
@@ -2711,6 +2734,26 @@ row_sel_store_mysql_rec(
 				dict_table_zip_size(prebuilt->table),
 				templ->rec_field_no, &len, heap);
 
+			if (UNIV_UNLIKELY(!data)) {
+				/* The externally stored field
+				was not written yet. This is
+				only a valid condition when
+				the server crashed after the
+				time a record stub was freshly
+				inserted but before all its
+				columns were written. This
+				record should only be seen by
+				recv_recovery_rollback_active()
+				or any TRX_ISO_READ_UNCOMMITTED
+				transactions. */
+
+				if (extern_field_heap) {
+					mem_heap_free(extern_field_heap);
+				}
+
+				return(FALSE);
+			}
+
 			ut_a(len != UNIV_SQL_NULL);
 		} else {
 			/* Field is stored in the row. */
@@ -3128,9 +3171,10 @@ row_sel_pop_cached_row_for_mysql(
 }
 
 /********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
 row_sel_push_cache_row_for_mysql(
 /*=============================*/
 	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct */
@@ -3172,10 +3216,11 @@ row_sel_push_cache_row_for_mysql(
 				  prebuilt->fetch_cache[
 					  prebuilt->n_fetch_cached],
 				  prebuilt, rec, offsets))) {
-		ut_error;
+		return(FALSE);
 	}
 
 	prebuilt->n_fetch_cached++;
+	return(TRUE);
 }
 
 /*********************************************************************//**
@@ -3570,6 +3615,24 @@ row_search_for_mysql(
 
 				if (!row_sel_store_mysql_rec(buf, prebuilt,
 							     rec, offsets)) {
+					/* Only fresh inserts at
+					server crash time may contain
+					incomplete externally stored
+					columns. Pretend that such
+					records do not exist. Such
+					records may only be accessed
+					at the READ UNCOMMITTED
+					isolation level or when
+					rolling back a recovered
+					transaction. Rollback happens
+					at a lower level, not here. */
+					ut_a(trx->isolation_level
+					     == TRX_ISO_READ_UNCOMMITTED);
+					/* TODO: assert that there is
+					an owner_trx with
+					owner_trx->id == DB_TRX_ID and
+					owner_trx->is_recovered */
+
 					err = DB_TOO_BIG_RECORD;
 
 					/* We let the main loop to do the
@@ -4349,9 +4412,20 @@ requires_clust_rec:
 		not cache rows because there the cursor is a scrollable
 		cursor. */
 
-		row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
-						 offsets);
-		if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+		if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+						      offsets)) {
+			/* Only fresh inserts at server crash time may contain
+			incomplete externally stored columns. Pretend that
+			such records do not exist. Such records may only be
+			accessed at the READ UNCOMMITTED isolation level or
+			when rolling back a recovered transaction. Rollback
+			happens at a lower level, not here. */
+			ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+			/* TODO: assert that there is an owner_trx
+			with owner_trx->id == DB_TRX_ID and
+			owner_trx->is_recovered */
+		} else if (prebuilt->n_fetch_cached
+			   == MYSQL_FETCH_CACHE_SIZE) {
 
 			goto got_row;
 		}
@@ -4367,9 +4441,20 @@ requires_clust_rec:
 		} else {
 			if (!row_sel_store_mysql_rec(buf, prebuilt,
 						     result_rec, offsets)) {
-				err = DB_TOO_BIG_RECORD;
-
-				goto lock_wait_or_error;
+				/* Only fresh inserts at server crash
+				time may contain incomplete externally
+				stored columns. Pretend that such
+				records do not exist. Such records may
+				only be accessed at the READ UNCOMMITTED
+				isolation level or when rolling back a
+				recovered transaction. Rollback happens
+				at a lower level, not here. */
+				ut_a(trx->isolation_level
+				     == TRX_ISO_READ_UNCOMMITTED);
+				/* TODO: assert that there is an owner_trx
+				with owner_trx->id == DB_TRX_ID and
+				owner_trx->is_recovered */
+				goto next_rec;
 			}
 		}
 

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20100629131907-epjs6h2rv457h7qv.bundle
Thread
bzr commit into mysql-trunk-innodb branch (marko.makela:3127) Bug#54358marko.makela29 Jun