#At file:///home/marko/innobase/dev/mysql2a/5.1-innodb/ based on revid:jimmy.yang@strippedm5sogruzvb
3529 Marko Mäkelä 2010-06-29
Bug#54358: READ UNCOMMITTED access failure of off-page DYNAMIC or COMPRESSED
columns
When the server crashes after a record stub has been inserted and
before all its off-page columns have been written, the record will
contain incomplete off-page columns after crash recovery. Such records
may only be accessed at the READ UNCOMMITTED isolation level or when
rolling back a recovered transaction in recv_recovery_rollback_active().
Skip these records at the READ UNCOMMITTED isolation level.
TODO: Add assertions for checking the above assumptions hold when an
incomplete BLOB is encountered.
btr_rec_copy_externally_stored_field(): Return NULL if the field is
incomplete.
row_prebuilt_t::templ_contains_blob: Clarify what "BLOB" means in this
context. Hint: MySQL BLOBs are not the same as InnoDB BLOBs.
row_sel_store_mysql_rec(): Return FALSE if not all columns could be
retrieved. Previously this function always returned TRUE. Assert that
the record is not delete-marked.
row_sel_push_cache_row_for_mysql(): Return FALSE if not all columns
could be retrieved.
row_search_for_mysql(): Skip records containing incomplete off-page
columns. Assert that the transaction isolation level is READ
UNCOMMITTED.
rb://380 approved by Jimmy Yang
modified:
storage/innodb_plugin/btr/btr0cur.c
storage/innodb_plugin/include/btr0cur.h
storage/innodb_plugin/include/row0mysql.h
storage/innodb_plugin/row/row0merge.c
storage/innodb_plugin/row/row0sel.c
=== modified file 'storage/innodb_plugin/btr/btr0cur.c'
--- a/storage/innodb_plugin/btr/btr0cur.c revid:jimmy.yang@stripped
+++ b/storage/innodb_plugin/btr/btr0cur.c revid:marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j
@@ -4814,7 +4814,7 @@ btr_copy_externally_stored_field(
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
+@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
@@ -4844,6 +4844,28 @@ btr_rec_copy_externally_stored_field(
data = rec_get_nth_field(rec, offsets, no, &local_len);
+ ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ if (UNIV_UNLIKELY
+ (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
+ /* The externally stored field was not written
+ yet. This is only a valid condition when the server
+ crashed after the time a record stub was freshly
+ inserted but before all its columns were written. This
+ record should only be seen by
+ recv_recovery_rollback_active() or any
+ TRX_ISO_READ_UNCOMMITTED transactions. */
+
+ /* TODO: assert that there is an owner_trx with
+ owner_trx->id == DB_TRX_ID and owner_trx->is_recovered */
+
+ /* TODO: assert that for the current transaction trx,
+ either (trx == owner_trx && trx_is_recv(trx)) or
+ trx->isolation_level == TRX_ISO_READ_UNCOMMITTED. */
+ return(NULL);
+ }
+
return(btr_copy_externally_stored_field(len, data,
zip_size, local_len, heap));
}
=== modified file 'storage/innodb_plugin/include/btr0cur.h'
--- a/storage/innodb_plugin/include/btr0cur.h revid:jimmy.yang@strippedsogruzvb
+++ b/storage/innodb_plugin/include/btr0cur.h revid:marko.makela@stripped0j
@@ -570,7 +570,7 @@ btr_copy_externally_stored_field_prefix(
ulint local_len);/*!< in: length of data, in bytes */
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
-@return the field copied to heap */
+@return the field copied to heap, or NULL if the field is incomplete */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
=== modified file 'storage/innodb_plugin/include/row0mysql.h'
--- a/storage/innodb_plugin/include/row0mysql.h revid:jimmy.yang@stripped9024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/include/row0mysql.h revid:marko.makela@stripped518-m3am4ia1ffjr0d0j
@@ -622,7 +622,11 @@ struct row_prebuilt_struct {
the secondary index, then this is
set to TRUE */
unsigned templ_contains_blob:1;/*!< TRUE if the template contains
- BLOB column(s) */
+ a column with DATA_BLOB ==
+ get_innobase_type_from_mysql_type();
+ not to be confused with InnoDB
+ externally stored columns
+ (VARCHAR can be off-page too) */
mysql_row_templ_t* mysql_template;/*!< template used to transform
rows fast between MySQL and Innobase
formats; memory for this template
=== modified file 'storage/innodb_plugin/row/row0merge.c'
--- a/storage/innodb_plugin/row/row0merge.c revid:jimmy.yang@oracle.com-20100629024137-690sacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0merge.c revid:marko.makela@stripped100629125518-m3am4ia1ffjr0d0j
@@ -1780,6 +1780,11 @@ row_merge_copy_blobs(
(below). */
data = btr_rec_copy_externally_stored_field(
mrec, offsets, zip_size, i, &len, heap);
+ /* Because we have locked the table, any records
+ written by incomplete transactions must have been
+ rolled back already. There must not be any incomplete
+ BLOB columns. */
+ ut_a(data);
dfield_set_data(field, data, len);
}
=== modified file 'storage/innodb_plugin/row/row0sel.c'
--- a/storage/innodb_plugin/row/row0sel.c revid:jimmy.yang@strippedacm5sogruzvb
+++ b/storage/innodb_plugin/row/row0sel.c revid:marko.makela@stripped0j
@@ -416,7 +416,7 @@ row_sel_fetch_columns(
field_no))) {
/* Copy an externally stored field to the
- temporary heap */
+ temporary heap, if possible. */
heap = mem_heap_create(1);
@@ -425,6 +425,21 @@ row_sel_fetch_columns(
dict_table_zip_size(index->table),
field_no, &len, heap);
+ /* data == NULL means that the
+ externally stored field was not
+ written yet. This is only a valid
+ condition when the server crashed
+ after the time a record stub was
+ freshly inserted but before all its
+ columns were written. This record
+ should only be seen by
+ recv_recovery_rollback_active() or any
+ TRX_ISO_READ_UNCOMMITTED
+ transactions. The InnoDB SQL parser
+ (the sole caller of this function)
+ does not implement READ UNCOMMITTED,
+ and it is not involved during rollback. */
+ ut_a(data);
ut_a(len != UNIV_SQL_NULL);
needs_copy = TRUE;
@@ -926,6 +941,7 @@ row_sel_get_clust_rec(
when plan->clust_pcur was positioned. The latch will not be
released until mtr_commit(mtr). */
+ ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
row_sel_fetch_columns(index, clust_rec, offsets,
UT_LIST_GET_FIRST(plan->columns));
*out_rec = clust_rec;
@@ -1628,6 +1644,13 @@ skip_lock:
}
if (old_vers == NULL) {
+ /* The record does not exist
+ in our read view. Skip it, but
+ first attempt to determine
+ whether the index segment we
+ are searching through has been
+ exhausted. */
+
offsets = rec_get_offsets(
rec, index, offsets,
ULINT_UNDEFINED, &heap);
@@ -2647,9 +2670,8 @@ Convert a row in the Innobase format to
Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not
be needed in the query.
-@return TRUE if success, FALSE if could not allocate memory for a BLOB
-(though we may also assert in that case) */
-static
+@return TRUE on success, FALSE if not all columns could be retrieved */
+static __attribute__((warn_unused_result))
ibool
row_sel_store_mysql_rec(
/*====================*/
@@ -2672,6 +2694,7 @@ row_sel_store_mysql_rec(
ut_ad(prebuilt->mysql_template);
ut_ad(prebuilt->default_rec);
ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
mem_heap_free(prebuilt->blob_heap);
@@ -2719,6 +2742,26 @@ row_sel_store_mysql_rec(
dict_table_zip_size(prebuilt->table),
templ->rec_field_no, &len, heap);
+ if (UNIV_UNLIKELY(!data)) {
+ /* The externally stored field
+ was not written yet. This is
+ only a valid condition when
+ the server crashed after the
+ time a record stub was freshly
+ inserted but before all its
+ columns were written. This
+ record should only be seen by
+ recv_recovery_rollback_active()
+ or any TRX_ISO_READ_UNCOMMITTED
+ transactions. */
+
+ if (extern_field_heap) {
+ mem_heap_free(extern_field_heap);
+ }
+
+ return(FALSE);
+ }
+
ut_a(len != UNIV_SQL_NULL);
} else {
/* Field is stored in the row. */
@@ -3136,9 +3179,10 @@ row_sel_pop_cached_row_for_mysql(
}
/********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
+Pushes a row for MySQL to the fetch cache.
+@return TRUE on success, FALSE if the record contains incomplete BLOBs */
+UNIV_INLINE __attribute__((warn_unused_result))
+ibool
row_sel_push_cache_row_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
@@ -3180,10 +3224,11 @@ row_sel_push_cache_row_for_mysql(
prebuilt->fetch_cache[
prebuilt->n_fetch_cached],
prebuilt, rec, offsets))) {
- ut_error;
+ return(FALSE);
}
prebuilt->n_fetch_cached++;
+ return(TRUE);
}
/*********************************************************************//**
@@ -3578,6 +3623,24 @@ row_search_for_mysql(
if (!row_sel_store_mysql_rec(buf, prebuilt,
rec, offsets)) {
+ /* Only fresh inserts at
+ server crash time may contain
+ incomplete externally stored
+ columns. Pretend that such
+ records do not exist. Such
+ records may only be accessed
+ at the READ UNCOMMITTED
+ isolation level or when
+ rolling back a recovered
+ transaction. Rollback happens
+ at a lower level, not here. */
+ ut_a(trx->isolation_level
+ == TRX_ISO_READ_UNCOMMITTED);
+ /* TODO: assert that there is
+ an owner_trx with
+ owner_trx->id == DB_TRX_ID and
+ owner_trx->is_recovered */
+
err = DB_TOO_BIG_RECORD;
/* We let the main loop to do the
@@ -4357,9 +4420,20 @@ requires_clust_rec:
not cache rows because there the cursor is a scrollable
cursor. */
- row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
- offsets);
- if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
+ if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
+ offsets)) {
+ /* Only fresh inserts at server crash time may contain
+ incomplete externally stored columns. Pretend that
+ such records do not exist. Such records may only be
+ accessed at the READ UNCOMMITTED isolation level or
+ when rolling back a recovered transaction. Rollback
+ happens at a lower level, not here. */
+ ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
+ /* TODO: assert that there is an owner_trx
+ with owner_trx->id == DB_TRX_ID and
+ owner_trx->is_recovered */
+ } else if (prebuilt->n_fetch_cached
+ == MYSQL_FETCH_CACHE_SIZE) {
goto got_row;
}
@@ -4375,9 +4449,20 @@ requires_clust_rec:
} else {
if (!row_sel_store_mysql_rec(buf, prebuilt,
result_rec, offsets)) {
- err = DB_TOO_BIG_RECORD;
-
- goto lock_wait_or_error;
+ /* Only fresh inserts at server crash
+ time may contain incomplete externally
+ stored columns. Pretend that such
+ records do not exist. Such records may
+ only be accessed at the READ UNCOMMITTED
+ isolation level or when rolling back a
+ recovered transaction. Rollback happens
+ at a lower level, not here. */
+ ut_a(trx->isolation_level
+ == TRX_ISO_READ_UNCOMMITTED);
+ /* TODO: assert that there is an owner_trx
+ with owner_trx->id == DB_TRX_ID and
+ owner_trx->is_recovered */
+ goto next_rec;
}
}
Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20100629125518-m3am4ia1ffjr0d0j.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-innodb branch (marko.makela:3529) Bug#54358 | marko.makela | 29 Jun |