List:Commits« Previous MessageNext Message »
From:marko.makela Date:May 10 2012 1:11pm
Subject:bzr push into mysql-trunk-wl5854 branch (marko.makela:3766 to 3767) WL#5854
WL#6255
View as plain text  
 3767 Marko Mäkelä	2012-05-10
      WL#5854 (broken patch, abandoning this tree for WL#6255)
      
      Start implementing ALTER TABLE t MODIFY c NULL
      for other than ROW_FORMAT=REDUNDANT tables.
      
      This adds a multi-versioned data dictionary for the clustered index and the
      table. The patch is incomplete and broken. This approach could work, but
      we will try something simpler to have better hope of making the deadline.

    modified:
      mysql-test/r/alter_table.result
      mysql-test/suite/innodb/r/innodb-alter-nullable.result
      mysql-test/suite/innodb/t/innodb-alter-nullable.test
      storage/innobase/btr/btr0btr.cc
      storage/innobase/btr/btr0cur.cc
      storage/innobase/dict/dict0crea.cc
      storage/innobase/dict/dict0dict.cc
      storage/innobase/dict/dict0mem.cc
      storage/innobase/handler/handler0alter.cc
      storage/innobase/include/btr0btr.h
      storage/innobase/include/btr0pcur.h
      storage/innobase/include/btr0sea.ic
      storage/innobase/include/dict0dict.h
      storage/innobase/include/dict0dict.ic
      storage/innobase/include/dict0mem.h
      storage/innobase/include/page0page.h
      storage/innobase/include/page0zip.h
      storage/innobase/include/rem0rec.h
      storage/innobase/include/rem0rec.ic
      storage/innobase/include/row0merge.h
      storage/innobase/include/row0row.ic
      storage/innobase/include/univ.i
      storage/innobase/lock/lock0lock.cc
      storage/innobase/page/page0page.cc
      storage/innobase/page/page0zip.cc
      storage/innobase/row/row0merge.cc
      storage/innobase/row/row0mysql.cc
      storage/innobase/row/row0sel.cc
 3766 Marko Mäkelä	2012-05-10
      WL#5854: Allocate index->lock and index->blobs_mutex separately
      from the rest of the dict_index_t, so that when there are multiple
      versions of a clustered index (and a table definition), these
      data structures will be shared among all of them.

    modified:
      storage/innobase/btr/btr0btr.cc
      storage/innobase/dict/dict0dict.cc
      storage/innobase/dict/dict0mem.cc
      storage/innobase/handler/handler0alter.cc
      storage/innobase/include/dict0dict.ic
      storage/innobase/include/dict0mem.h
=== modified file 'mysql-test/r/alter_table.result'
--- a/mysql-test/r/alter_table.result	revid:marko.makela@oracle.com-20120510112547-5au30h69jlo94a8z
+++ b/mysql-test/r/alter_table.result	revid:marko.makela@stripped0130933-7li0w21gdgsv11x1
@@ -1748,8 +1748,8 @@ ALTER TABLE tm1 MODIFY COLUMN c INT NOT
 affected rows: 2
 info: Records: 2  Duplicates: 0  Warnings: 0
 ALTER TABLE ti1 MODIFY COLUMN c INT NULL;
-affected rows: 2
-info: Records: 2  Duplicates: 0  Warnings: 0
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
 ALTER TABLE tm1 MODIFY COLUMN c INT NULL;
 affected rows: 2
 info: Records: 2  Duplicates: 0  Warnings: 0

=== modified file 'mysql-test/suite/innodb/r/innodb-alter-nullable.result'
--- a/mysql-test/suite/innodb/r/innodb-alter-nullable.result	revid:marko.makela@stripped
+++ b/mysql-test/suite/innodb/r/innodb-alter-nullable.result	revid:marko.makela@strippedgdgsv11x1
@@ -39,11 +39,9 @@ c1	c2	c3
 7	8	9
 ALTER TABLE tr MODIFY c2 INT, ALGORITHM=INPLACE;
 ALTER TABLE tc MODIFY c2 INT, ALGORITHM=INPLACE;
-ERROR 42000: This version of MySQL doesn't yet support 'ALTER TABLE tc MODIFY c2 INT, ALGORITHM=INPLACE'
 BEGIN;
 UPDATE tr SET c2=NULL;
 UPDATE tc SET c2=NULL;
-ERROR 23000: Column 'c2' cannot be null
 SELECT * FROM tr;
 c1	c2	c3
 1	NULL	3
@@ -51,9 +49,9 @@ c1	c2	c3
 7	NULL	9
 SELECT * FROM tc;
 c1	c2	c3
-1	2	3
-4	5	6
-7	8	9
+1	NULL	3
+4	NULL	6
+7	NULL	9
 ROLLBACK;
 SELECT * FROM tr;
 c1	c2	c3
@@ -67,7 +65,6 @@ c1	c2	c3
 7	8	9
 ALTER TABLE tr MODIFY c2 INT NULL, ALGORITHM=INPLACE;
 ALTER TABLE tc MODIFY c2 INT NULL, ALGORITHM=INPLACE;
-ERROR 42000: This version of MySQL doesn't yet support 'ALTER TABLE tc MODIFY c2 INT NULL, ALGORITHM=INPLACE'
 SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES
 WHERE NAME LIKE 'test/t[rc]';
 TABLE_ID	NAME	FLAG	N_COLS	SPACE

=== modified file 'mysql-test/suite/innodb/t/innodb-alter-nullable.test'
--- a/mysql-test/suite/innodb/t/innodb-alter-nullable.test	revid:marko.makela@stripped510112547-5au30h69jlo94a8z
+++ b/mysql-test/suite/innodb/t/innodb-alter-nullable.test	revid:marko.makela@strippedm-20120510130933-7li0w21gdgsv11x1
@@ -63,13 +63,11 @@ connection default;
 
 # These should change the column to NULL.
 ALTER TABLE tr MODIFY c2 INT, ALGORITHM=INPLACE;
---error ER_NOT_SUPPORTED_YET
 ALTER TABLE tc MODIFY c2 INT, ALGORITHM=INPLACE;
 
 connection con1;
 BEGIN;
 UPDATE tr SET c2=NULL;
---error ER_BAD_NULL_ERROR
 UPDATE tc SET c2=NULL;
 SELECT * FROM tr;
 SELECT * FROM tc;
@@ -82,7 +80,6 @@ connection default;
 
 # These should be no-ops.
 ALTER TABLE tr MODIFY c2 INT NULL, ALGORITHM=INPLACE;
---error ER_NOT_SUPPORTED_YET
 ALTER TABLE tc MODIFY c2 INT NULL, ALGORITHM=INPLACE;
 
 SELECT * FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES

=== modified file 'storage/innobase/btr/btr0btr.cc'
--- a/storage/innobase/btr/btr0btr.cc	revid:marko.makela@stripped94a8z
+++ b/storage/innobase/btr/btr0btr.cc	revid:marko.makela@stripped
@@ -1608,18 +1608,20 @@ btr_free_root(
 #endif /* !UNIV_HOTBACKUP */
 
 /*************************************************************//**
-Reorganizes an index page. */
-static
-ibool
+Reorganizes an index page.
+@return whether the operation succeeded */
+static __attribute__((nonnull))
+bool
 btr_page_reorganize_low(
 /*====================*/
-	ibool		recovery,/*!< in: TRUE if called in recovery:
+	bool		recovery,/*!< in: true if called in recovery:
 				locks should not be updated, i.e.,
 				there cannot exist locks on the
 				page, and a hash index should not be
 				dropped: it cannot exist */
 	buf_block_t*	block,	/*!< in: page to be reorganized */
-	dict_index_t*	index,	/*!< in: record descriptor */
+	dict_index_t*	page_index,/*!< in: index definition of the page */
+	dict_index_t*	index,	/*!< in: new index definition of the page */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 #ifndef UNIV_HOTBACKUP
@@ -1634,23 +1636,19 @@ btr_page_reorganize_low(
 	ulint		data_size2;
 	ulint		max_ins_size1;
 	ulint		max_ins_size2;
-	ibool		success		= FALSE;
+	bool		success		= false;
 
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-	btr_assert_not_corrupted(block, index);
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+	btr_assert_not_corrupted(block, page_index);
+
 #ifdef UNIV_ZIP_DEBUG
 	ut_a(!page_zip || page_zip_validate(page_zip, page));
 #endif /* UNIV_ZIP_DEBUG */
 	data_size1 = page_get_data_size(page);
 	max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
 
-#ifndef UNIV_HOTBACKUP
-	/* Write the log record */
-	mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
-				  ? MLOG_COMP_PAGE_REORGANIZE
-				  : MLOG_PAGE_REORGANIZE, 0);
-#endif /* !UNIV_HOTBACKUP */
-
 	/* Turn logging off */
 	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 
@@ -1668,23 +1666,45 @@ btr_page_reorganize_low(
 #ifndef UNIV_HOTBACKUP
 	if (!recovery) {
 		btr_search_drop_page_hash_index(block);
+
+		/* In change buffer merge (on secondary indexes), we
+		use a dummy index with bogus index->id. */
+		ut_ad(page_index->id == btr_page_get_index_id(page)
+		      || mtr->inside_ibuf);
 	}
 
 	block->check_index_page_at_flush = TRUE;
 #endif /* !UNIV_HOTBACKUP */
-	btr_blob_dbg_remove(page, index, "btr_page_reorganize");
+	btr_blob_dbg_remove(page, page_index, "btr_page_reorganize");
 
 	/* Recreate the page: note that global data on page (possible
 	segment headers, next page-field, etc.) is preserved intact */
 
 	page_create(block, mtr, dict_table_is_comp(index->table));
 
+#ifndef UNIV_HOTBACKUP
+	if (page_index != index) {
+		ut_ad(!recovery);
+		if (page_zip) {
+			mach_write_to_8(PAGE_HEADER + PAGE_INDEX_ID + page,
+					index->id);
+		} else {
+			mlog_write_ull(PAGE_HEADER + PAGE_INDEX_ID + page,
+				       index->id, mtr);
+		}
+	}
+#endif /* UNIV_HOTBACKUP */
+
 	/* Copy the records from the temporary space to the recreated page;
 	do not copy the lock bits yet */
 
-	page_copy_rec_list_end_no_locks(block, temp_block,
-					page_get_infimum_rec(temp_page),
-					index, mtr);
+	if (!page_copy_rec_list_end_no_locks(
+		    block, temp_block,
+		    page_get_infimum_rec(temp_page),
+		    page_index, index, mtr)) {
+		ut_ad(page_index != index);
+		goto err_exit;
+	}
 
 	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
 		/* Copy max trx id to recreated page */
@@ -1696,11 +1716,20 @@ btr_page_reorganize_low(
 		ut_ad(max_trx_id != 0 || recovery);
 	}
 
+	ut_ad(!memcmp(page, temp_page, FIL_PAGE_DATA));
+	ut_ad(!memcmp(PAGE_N_RECS + PAGE_HEADER + page,
+		      PAGE_N_RECS + PAGE_HEADER + temp_page,
+		      PAGE_INDEX_ID - PAGE_N_RECS));
+	ut_ad(!memcmp(PAGE_BTR_SEG_LEAF + PAGE_HEADER + page,
+		      PAGE_BTR_SEG_LEAF + PAGE_HEADER + temp_page,
+		      PAGE_DATA - PAGE_BTR_SEG_LEAF - PAGE_HEADER));
+
 	if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) {
 
+err_exit:
 		/* Restore the old page and exit. */
-		btr_blob_dbg_restore(page, temp_page, index,
-				     "btr_page_reorganize_compress_fail");
+		btr_blob_dbg_restore(page, temp_page, page_index,
+				     "btr_page_reorganize_fail");
 
 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 		/* Check that the bytes that we skip are identical. */
@@ -1715,6 +1744,8 @@ btr_page_reorganize_low(
 
 		memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
 		       PAGE_N_RECS - PAGE_N_DIR_SLOTS);
+		memcpy(PAGE_HEADER + PAGE_INDEX_ID + page,
+		       PAGE_HEADER + PAGE_INDEX_ID + temp_page, 8);
 		memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
 		       UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
 
@@ -1736,6 +1767,8 @@ btr_page_reorganize_low(
 	max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
 
 	if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
+		/* TODO: suppress this if page_index != index
+		and UT_BITS_IN_BYTES(index->n_nullable) changed */
 		buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
 		buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
 
@@ -1750,8 +1783,9 @@ btr_page_reorganize_low(
 			(unsigned long) max_ins_size1,
 			(unsigned long) max_ins_size2);
 		ut_ad(0);
+		goto err_exit;
 	} else {
-		success = TRUE;
+		success = true;
 	}
 
 func_exit:
@@ -1775,16 +1809,75 @@ IMPORTANT: if btr_page_reorganize() is i
 page of a non-clustered index, the caller must update the insert
 buffer free bits in the same mini-transaction in such a way that the
 modification will be redo-logged.
-@return	TRUE on success, FALSE on failure */
+@return	true on success, false on failure */
 UNIV_INTERN
-ibool
+bool
 btr_page_reorganize(
 /*================*/
 	buf_block_t*	block,	/*!< in: page to be reorganized */
 	dict_index_t*	index,	/*!< in: record descriptor */
-	mtr_t*		mtr)	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
 {
-	return(btr_page_reorganize_low(FALSE, block, index, mtr));
+	bool	success = btr_page_reorganize_low(
+		false, block, index, index, mtr);
+
+	if (success) {
+		/* Write the log record. */
+		mlog_open_and_write_index(mtr, block->frame, index,
+					  page_is_comp(block->frame)
+					  ? MLOG_COMP_PAGE_REORGANIZE
+					  : MLOG_PAGE_REORGANIZE, 0);
+	}
+
+	return(success);
+}
+
+/*************************************************************//**
+Converts a clustered index leaf page by reorganizing it.
+IMPORTANT: when invoked on a compressed page, the caller must update
+the insert buffer free bits in the same mini-transaction in such a
+way that the modification will be redo-logged.
+@return	true on success, false on failure */
+UNIV_INTERN
+bool
+btr_page_convert(
+/*=============*/
+	buf_block_t*	block,		/*!< in: page to be converted */
+	index_id_t	index_id,	/*!< in: PAGE_INDEX_ID of the page */
+	dict_index_t*	index,		/*!< in: current clustered index */
+	mtr_t*		mtr)		/*!< in/out: mini-transaction */
+{
+	dict_index_t*	page_index = dict_index_get_version(index, index_id);
+
+	ut_ad(page_index);
+	if (!page_index) {
+		return(false);
+	}
+
+	if (!btr_page_reorganize_low(
+		    false, block, page_index, index, mtr)) {
+		return(false);
+	}
+
+	/* Write the new page image to the redo log. */
+	page_zip_des_t* page_zip = buf_block_get_page_zip(block);
+
+	if (page_zip) {
+		page_zip_compress_write_log(
+			page_zip, block->frame, index, mtr);
+	} else {
+		ut_ad(mach_read_from_8(PAGE_INDEX_ID + PAGE_HEADER
+				       + block->frame) == index->id);
+		mlog_log_string(PAGE_HEADER + block->frame,
+				PAGE_N_RECS, mtr);
+		mlog_write_ull(PAGE_INDEX_ID + PAGE_HEADER
+			       + block->frame, index->id, mtr);
+		mlog_log_string(PAGE_DATA + block->frame,
+				UNIV_PAGE_SIZE - PAGE_DATA
+				- FIL_PAGE_DATA_END, mtr);
+	}
+
+	return(true);
 }
 #endif /* !UNIV_HOTBACKUP */
 
@@ -1807,7 +1900,11 @@ btr_parse_page_reorganize(
 	/* The record is empty, except for the record initial part */
 
 	if (block != NULL) {
-		btr_page_reorganize_low(TRUE, block, index, mtr);
+		if (!btr_page_reorganize_low(true, block, index, index, mtr)) {
+			/* We should not write any redo log entry
+			for a failed reorganization. */
+			ut_ad(0);
+		}
 	}
 
 	return(ptr);

=== modified file 'storage/innobase/btr/btr0cur.cc'
--- a/storage/innobase/btr/btr0cur.cc	revid:marko.makela@stripped47-5au30h69jlo94a8z
+++ b/storage/innobase/btr/btr0cur.cc	revid:marko.makela@strippedv11x1
@@ -714,6 +714,18 @@ retry_page_get:
 	}
 
 	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+	{
+		index_id_t	page_index_id = btr_page_get_index_id(page);
+
+		if (index->id != page_index_id) {
+			index = dict_index_get_version(index, page_index_id);
+			/* A corresponding table definition must be found. */
+			ut_a(index);
+		}
+	}
+
+
 	ut_ad(index->id == btr_page_get_index_id(page));
 
 	if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -722,6 +734,7 @@ retry_page_get:
 		height = btr_page_get_level(page, mtr);
 		root_height = height;
 		cursor->tree_height = root_height + 1;
+		ut_ad(level <= height);
 
 #ifdef BTR_CUR_ADAPT
 		if (block != guess && info) {
@@ -773,11 +786,39 @@ retry_page_get:
 		const rec_t*	node_ptr;
 		ut_ad(height > 0);
 
+		if (index != cursor->index && index->table->newer) {
+			ut_ad(dict_index_is_clust(index));
+
+			/* Update PAGE_INDEX_ID on node pointer pages
+			to the newest definition if index->lock is
+			exclusively locked. */
+
+			switch (latch_mode) {
+			default:
+				break;
+			case BTR_MODIFY_TREE:
+			case BTR_CONT_MODIFY_TREE:
+				while (index->table->newer) {
+					index = dict_table_get_first_index(
+						index->table->newer);
+				}
+
+				btr_page_set_index_id(
+					page, buf_block_get_page_zip(block),
+					index->id, mtr);
+			}
+		}
+
 		height--;
 		guess = NULL;
 
 		node_ptr = page_cur_get_rec(page_cursor);
 
+		/* We may have page_index_id mismatch here,
+		but because table definition changes are only allowed
+		when the primary key columns are not affected, the
+		node pointer records can be interpreted using any clustered
+		index definition of the table. */
 		offsets = rec_get_offsets(
 			node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
 
@@ -913,7 +954,9 @@ btr_cur_open_at_index_side_func(
 					 file, line, mtr);
 		page = buf_block_get_frame(block);
 		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-		ut_ad(index->id == btr_page_get_index_id(page));
+		ut_ad(index->id == btr_page_get_index_id(page)
+		      || (dict_index_is_clust(index)
+			  && (index->table->newer || index->table->older)));
 
 		block->check_index_page_at_flush = TRUE;
 
@@ -1043,7 +1086,9 @@ btr_cur_open_at_rnd_pos_func(
 					 file, line, mtr);
 		page = buf_block_get_frame(block);
 		ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-		ut_ad(index->id == btr_page_get_index_id(page));
+		ut_ad(index->id == btr_page_get_index_id(page)
+		      || (dict_index_is_clust(index)
+			  && (index->table->newer || index->table->older)));
 
 		if (height == ULINT_UNDEFINED) {
 			/* We are in the root node */
@@ -2068,6 +2113,17 @@ btr_cur_optimistic_update(
 	ut_ad(thr || flags == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
 			       | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
 	ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+	{
+		index_id_t page_index_id = btr_page_get_index_id(page);
+
+		if (UNIV_UNLIKELY
+		    (page_index_id != index->id
+		     && !btr_page_convert(block, page_index_id, index, mtr))) {
+			return(DB_OVERFLOW);
+		}
+	}
+
 	ut_ad(btr_page_get_index_id(page) == index->id);
 
 	heap = mem_heap_create(1024);
@@ -2386,6 +2442,8 @@ btr_cur_pessimistic_update(
 		}
 	}
 
+	ut_ad(btr_page_get_index_id(page) == index->id);
+
 	if (!*heap) {
 		*heap = mem_heap_create(1024);
 	}
@@ -3024,6 +3082,8 @@ btr_cur_optimistic_delete_func(
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
 	ibool		no_compress_needed;
+	dict_index_t*	index		= cursor->index;
+	index_id_t	page_index_id;
 	rec_offs_init(offsets_);
 
 	ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
@@ -3033,12 +3093,26 @@ btr_cur_optimistic_delete_func(
 
 	block = btr_cur_get_block(cursor);
 
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(fil_page_get_type(buf_block_get_frame(block)) == FIL_PAGE_INDEX);
 	ut_ad(page_is_leaf(buf_block_get_frame(block)));
 	ut_ad(!dict_index_is_online_ddl(cursor->index)
 	      || (flags & BTR_CREATE_FLAG));
 
+	page_index_id = btr_page_get_index_id(buf_block_get_frame(block));
+
+	if (page_index_id != index->id) {
+		index = dict_index_get_version(index, page_index_id);
+		/* A corresponding table definition must be found. */
+		ut_ad(index);
+		/* Add some fault tolerance. */
+		if (!index) {
+			return(FALSE);
+		}
+	}
+
 	rec = btr_cur_get_rec(cursor);
-	offsets = rec_get_offsets(rec, cursor->index, offsets,
+	offsets = rec_get_offsets(rec, index, offsets,
 				  ULINT_UNDEFINED, &heap);
 
 	no_compress_needed = !rec_offs_any_extern(offsets)
@@ -3063,13 +3137,13 @@ btr_cur_optimistic_delete_func(
 		ut_a(!page_zip || page_zip_validate(page_zip, page));
 #endif /* UNIV_ZIP_DEBUG */
 		page_cur_delete_rec(btr_cur_get_page_cur(cursor),
-				    cursor->index, offsets, mtr);
+				    index, offsets, mtr);
 #ifdef UNIV_ZIP_DEBUG
 		ut_a(!page_zip || page_zip_validate(page_zip, page));
 #endif /* UNIV_ZIP_DEBUG */
 
-		if (dict_index_is_clust(cursor->index)
-		    || dict_index_is_ibuf(cursor->index)
+		if (dict_index_is_clust(index)
+		    || dict_index_is_ibuf(index)
 		    || !page_is_leaf(page)) {
 			/* The insert buffer does not handle
 			inserts to clustered indexes, to
@@ -3121,6 +3195,7 @@ btr_cur_pessimistic_delete(
 	buf_block_t*	block;
 	page_t*		page;
 	page_zip_des_t*	page_zip;
+	index_id_t	page_index_id;
 	dict_index_t*	index;
 	rec_t*		rec;
 	dtuple_t*	node_ptr;
@@ -3141,6 +3216,22 @@ btr_cur_pessimistic_delete(
 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
 				MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	ut_ad(fil_page_get_type(buf_block_get_frame(block)) == FIL_PAGE_INDEX);
+
+	page_index_id = btr_page_get_index_id(buf_block_get_frame(block));
+
+	if (page_index_id != index->id) {
+		index = dict_index_get_version(index, page_index_id);
+		/* A corresponding table definition must be found. */
+		ut_ad(index);
+		/* Add some fault tolerance. */
+		if (!index) {
+			*err = DB_INDEX_CORRUPT;
+			return(FALSE);
+		}
+	}
+
 	if (!has_reserved_extents) {
 		/* First reserve enough free space for the file segments
 		of the index tree, so that the node pointer updates will

=== modified file 'storage/innobase/dict/dict0crea.cc'
--- a/storage/innobase/dict/dict0crea.cc	revid:marko.makela@strippedom-20120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/dict/dict0crea.cc	revid:marko.makela@stripped130933-7li0w21gdgsv11x1
@@ -265,9 +265,11 @@ dict_build_table_def_step(
 	table = node->table;
 	use_tablespace = !!(table->flags2 & DICT_TF2_USE_TABLESPACE);
 
-	dict_hdr_get_new_id(&table->id, NULL, NULL);
-
-	thr_get_trx(thr)->table_id = table->id;
+	if (table->id == 0) {
+		dict_hdr_get_new_id(&table->id, NULL, NULL);
+	} else {
+		goto func_exit;
+	}
 
 	if (use_tablespace) {
 		/* This table will not use the system tablespace.
@@ -325,6 +327,8 @@ dict_build_table_def_step(
 		table->flags &= DICT_TF_COMPACT;
 	}
 
+func_exit:
+	thr_get_trx(thr)->table_id = table->id;
 	row = dict_create_sys_tables_tuple(table, node->heap);
 
 	ins_node_set_new_row(node->tab_def, row);
@@ -438,7 +442,7 @@ dict_create_sys_indexes_tuple(
 		entry, DICT_COL__SYS_INDEXES__PAGE_NO);
 
 	ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-	mach_write_to_4(ptr, FIL_NULL);
+	mach_write_to_4(ptr, index->page);
 
 	dfield_set_data(dfield, ptr, 4);
 
@@ -608,7 +612,7 @@ dict_build_index_def_step(
 	table in the same tablespace */
 
 	index->space = table->space;
-	node->page_no = FIL_NULL;
+	node->page_no = index->page;
 	row = dict_create_sys_indexes_tuple(index, node->heap);
 	node->ind_row = row;
 
@@ -660,8 +664,9 @@ dict_create_index_tree_step(
 
 	sys_indexes = dict_sys->sys_indexes;
 
-	if (index->type == DICT_FTS) {
-		/* FTS index does not need an index tree */
+	if (index->type == DICT_FTS || node->page_no != FIL_NULL) {
+		/* FTS index does not need an index tree,
+		and neither do online table definition changes. */
 		return(DB_SUCCESS);
 	}
 

=== modified file 'storage/innobase/dict/dict0dict.cc'
--- a/storage/innobase/dict/dict0dict.cc	revid:marko.makela@strippedjlo94a8z
+++ b/storage/innobase/dict/dict0dict.cc	revid:marko.makela@stripped
@@ -203,9 +203,10 @@ dict_index_remove_from_cache_low(
 	ibool		lru_evict);	/*!< in: TRUE if page being evicted
 					to make room in the table LRU list */
 /**********************************************************************//**
-Removes a table object from the dictionary cache. */
-static
-void
+Removes a table object from the dictionary cache.
+@return previous table in the LRU list, or NULL */
+static __attribute__((nonnull))
+dict_table_t*
 dict_table_remove_from_cache_low(
 /*=============================*/
 	dict_table_t*	table,		/*!< in, own: table */
@@ -1247,18 +1248,12 @@ dict_make_room_in_cache(
 	     && (len - n_evicted) > max_tables;
 	     --i) {
 
-		dict_table_t*	prev_table;
-
-	        prev_table = UT_LIST_GET_PREV(table_LRU, table);
-
 		if (dict_table_can_be_evicted(table)) {
 
-			dict_table_remove_from_cache_low(table, TRUE);
+			table = dict_table_remove_from_cache_low(table, TRUE);
 
 			++n_evicted;
 		}
-
-		table = prev_table;
 	}
 
 	return(n_evicted);
@@ -1638,9 +1633,10 @@ dict_table_change_id_in_cache(
 }
 
 /**********************************************************************//**
-Removes a table object from the dictionary cache. */
-static
-void
+Removes a table object from the dictionary cache.
+@return previous table in the LRU list, or NULL */
+static __attribute__((nonnull))
+dict_table_t*
 dict_table_remove_from_cache_low(
 /*=============================*/
 	dict_table_t*	table,		/*!< in, own: table */
@@ -1651,18 +1647,16 @@ dict_table_remove_from_cache_low(
 	dict_index_t*	index;
 	ulint		size;
 
-	ut_ad(table);
 	ut_ad(dict_lru_validate());
 	ut_a(table->n_ref_count == 0);
 	ut_a(table->n_rec_locks == 0);
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
 
-#if 0
-	fputs("Removing table ", stderr);
-	ut_print_name(stderr, table->name, ULINT_UNDEFINED);
-	fputs(" from dictionary cache\n", stderr);
-#endif
+	/* Get the newest table definition. */
+	while (table->newer) {
+		table = table->newer;
+	}
 
 	/* Remove the foreign constraints from the cache */
 
@@ -1683,6 +1677,17 @@ dict_table_remove_from_cache_low(
 		foreign->referenced_index = NULL;
 	}
 
+	/* Remove table from the hash tables of tables */
+
+	HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
+		    ut_fold_string(table->name), table);
+
+free_older_prev:
+	dict_table_t*	prev_table = UT_LIST_GET_PREV(table_LRU, table);
+free_older:
+	HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
+		    ut_fold_ull(table->id), table);
+
 	/* Remove the indexes from the cache */
 
 	for (index = UT_LIST_GET_LAST(table->indexes);
@@ -1692,14 +1697,6 @@ dict_table_remove_from_cache_low(
 		dict_index_remove_from_cache_low(table, index, lru_evict);
 	}
 
-	/* Remove table from the hash tables of tables */
-
-	HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
-		    ut_fold_string(table->name), table);
-
-	HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
-		    ut_fold_ull(table->id), table);
-
 	/* Remove table from LRU or non-LRU list. */
 	if (table->can_be_evicted) {
 		ut_ad(dict_lru_find_table(table));
@@ -1740,7 +1737,21 @@ dict_table_remove_from_cache_low(
 
 	dict_sys->size -= size;
 
+	dict_table_t*	old_table = table->older;
+
 	dict_mem_table_free(table);
+
+	table = old_table;
+
+	if (!table) {
+		return(prev_table);
+	}
+
+	if (table == prev_table) {
+		goto free_older_prev;
+	} else {
+		goto free_older;
+	}
 }
 
 /**********************************************************************//**
@@ -5916,20 +5927,18 @@ dict_close(void)
 
 	/* Free the hash elements. We don't remove them from the table
 	because we are going to destroy the table anyway. */
-	for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) {
+	for (i = 0; i < hash_get_n_cells(dict_sys->table_id_hash); i++) {
 		dict_table_t*	table;
 
 		table = static_cast<dict_table_t*>(
-			HASH_GET_FIRST(dict_sys->table_hash, i));
+			HASH_GET_FIRST(dict_sys->table_id_hash, i));
 
 		while (table) {
 			dict_table_t*	prev_table = table;
 
 			table = static_cast<dict_table_t*>(
-				HASH_GET_NEXT(name_hash, prev_table));
-#ifdef UNIV_DEBUG
-			ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N);
-#endif
+				HASH_GET_NEXT(id_hash, prev_table));
+			ut_ad(prev_table->magic_n == DICT_TABLE_MAGIC_N);
 			/* Acquire only because it's a pre-condition. */
 			mutex_enter(&dict_sys->mutex);
 
@@ -5939,11 +5948,11 @@ dict_close(void)
 		}
 	}
 
-	hash_table_free(dict_sys->table_hash);
+	hash_table_free(dict_sys->table_id_hash);
 
-	/* The elements are the same instance as in dict_sys->table_hash,
+	/* The elements are a subset of dict_sys->table_id_hash,
 	therefore we don't delete the individual elements. */
-	hash_table_free(dict_sys->table_id_hash);
+	hash_table_free(dict_sys->table_hash);
 
 	dict_ind_free();
 

=== modified file 'storage/innobase/dict/dict0mem.cc'
--- a/storage/innobase/dict/dict0mem.cc	revid:marko.makela@stripped510112547-5au30h69jlo94a8z
+++ b/storage/innobase/dict/dict0mem.cc	revid:marko.makela@strippedi0w21gdgsv11x1
@@ -137,7 +137,8 @@ dict_mem_table_free(
         if (dict_table_has_fts_index(table)
             || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
             || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
-		if (table->fts) {
+		if (table->fts && !table->newer) {
+			/* TODO: each table->fts is tied to table->id */
 			fts_free(table);
 		}
 

=== modified file 'storage/innobase/handler/handler0alter.cc'
--- a/storage/innobase/handler/handler0alter.cc	revid:marko.makela@oracle.com-20120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/handler/handler0alter.cc	revid:marko.makela@oracle.com-20120510130933-7li0w21gdgsv11x1
@@ -29,6 +29,7 @@ Smart ALTER TABLE
 #include <sql_alter.h>
 #include <sql_class.h>
 
+#include "dict0boot.h"
 #include "dict0stats.h"
 #include "log0log.h"
 #include "row0log.h"
@@ -205,14 +206,6 @@ ha_innobase::check_if_supported_inplace_
 		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
 	}
 
-	if ((ha_alter_info->handler_flags
-	     & Alter_inplace_info::ALTER_COLUMN_NULLABLE)
-	    && dict_table_is_comp(prebuilt->table)) {
-		/* For now, we only support changing a column from
-		NOT NULL to NULL when ROW_FORMAT=REDUNDANT. */
-		DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
-	}
-
 	if (!(ha_alter_info->handler_flags &
 	      ~(INNOBASE_INPLACE_REFORMAT
 		| Alter_inplace_info::ALTER_COLUMN_NULLABLE))) {
@@ -2876,6 +2869,283 @@ innobase_set_nullable(
 	DBUG_RETURN(false);
 }
 
+/** Redefine a table (online record format changes)
+@param table_share	the TABLE_SHARE
+@param prebuilt		the prebuilt struct
+@param trx		data dictionary transaction
+@param altered_table	new table definition
+@retval true		Failure
+@retval false		Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_table_redefine(
+/*====================*/
+	const TABLE_SHARE*	table_share,
+	row_prebuilt_t*		prebuilt,
+	trx_t*			trx,
+	const TABLE*		altered_table)
+{
+	pars_info_t*	info;
+	dberr_t		error;
+
+	DBUG_ENTER("innobase_set_nullable");
+	DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+	DBUG_ASSERT(!prebuilt->table->newer);
+
+	ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+	ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	/* Rename the current table in the data dictionary and
+	in the cache. Create a new table by the original name. */
+
+	mem_heap_t*	heap
+		= mem_heap_create(1000);
+
+	dict_table_t*	new_table
+		= dict_mem_table_create(
+			prebuilt->table->name, prebuilt->table->space,
+			altered_table->s->fields
+			+ !!(DICT_TF2_FLAG_IS_SET(
+				     prebuilt->table,
+				     DICT_TF2_FTS_HAS_DOC_ID)),
+			prebuilt->table->flags,
+			prebuilt->table->flags2);
+
+	for (uint i = 0; i < altered_table->s->fields; i++) {
+		const Field*	field = altered_table->field[i];
+		ulint		is_unsigned;
+		ulint		field_type
+			= (ulint) field->type();
+		ulint		col_type
+			= get_innobase_type_from_mysql_type(
+				&is_unsigned, field);
+		ulint		charset_no;
+		ulint		col_len;
+
+		ut_a(col_type);
+		/* we assume in dtype_form_prtype() that this
+		fits in two bytes */
+		ut_a(field_type <= MAX_CHAR_COLL_NUM);
+
+		if (!field->null_ptr) {
+			field_type |= DATA_NOT_NULL;
+		}
+
+		if (field->binary()) {
+			field_type |= DATA_BINARY_TYPE;
+		}
+
+		if (is_unsigned) {
+			field_type |= DATA_UNSIGNED;
+		}
+
+		if (dtype_is_string_type(col_type)) {
+			charset_no = (ulint) field->charset()->number;
+
+			ut_a(charset_no <= MAX_CHAR_COLL_NUM);
+		} else {
+			charset_no = 0;
+		}
+
+		col_len = field->pack_length();
+
+		/* The MySQL pack length contains 1 or 2 bytes
+		length field for a true VARCHAR. Let us
+		subtract that, so that the InnoDB column
+		length in the InnoDB data dictionary is the
+		real maximum byte length of the actual data. */
+
+		if (field->type() == MYSQL_TYPE_VARCHAR) {
+			uint32	length_bytes
+				= static_cast<const Field_varstring*>(
+					field)->length_bytes;
+
+			col_len -= length_bytes;
+
+			if (length_bytes == 2) {
+				field_type |= DATA_LONG_TRUE_VARCHAR;
+			}
+		}
+
+		dict_mem_table_add_col(
+			new_table, heap,
+			field->field_name,
+			col_type,
+			dtype_form_prtype(field_type, charset_no),
+			col_len);
+	}
+
+	if (DICT_TF2_FLAG_IS_SET(new_table, DICT_TF2_FTS_HAS_DOC_ID)) {
+		fts_add_doc_id_column(new_table, heap);
+		new_table->fts->doc_col = altered_table->s->fields;
+	}
+
+	table_id_t	new_table_id;
+
+	dict_hdr_get_new_id(&new_table_id, NULL, NULL);
+
+	/* Rename the old table in the data dictionary cache. */
+
+	trx->op_info = "renaming old table definition";
+
+	char* name = static_cast<char*>(
+		mem_heap_alloc(heap, 16 + sizeof "OLD_"));
+	ut_snprintf(name, 16 + sizeof "OLD_", "OLD_" IB_ID_FMT0X,
+		    new_table_id);
+
+	info = pars_info_create();
+
+	pars_info_add_ull_literal(info, "id", prebuilt->table->id);
+	pars_info_add_str_literal(info, "name", name);
+
+	error = que_eval_sql(
+		info,
+		"PROCEDURE RENAME_SYS_TABLES_PROC () IS\n"
+		"BEGIN\n"
+		"UPDATE SYS_TABLES SET NAME=:name\n"
+		"WHERE ID=:id;\n"
+		"END;\n",
+		FALSE, trx);
+
+	DBUG_EXECUTE_IF("ib_table_redefine_error",
+			error = DB_OUT_OF_FILE_SPACE;);
+
+	trx->op_info = "";
+	char* old_name = prebuilt->table->name;
+	/* Temporarily prevent name collisions. */
+	prebuilt->table->name = const_cast<char*>("");
+
+	if (error == DB_SUCCESS) {
+		new_table->id = new_table_id;
+		error = row_create_table_for_mysql(new_table, trx);
+
+		new_table = dict_table_open_on_id(new_table_id, TRUE, FALSE);
+		ut_a(new_table);
+	}
+
+	mem_heap_free(heap);
+
+	if (error == DB_SUCCESS) {
+		/* Create the clustered index. */
+		const dict_index_t*	old_clust_index
+			= dict_table_get_first_index(prebuilt->table);
+		ut_ad(dict_index_is_clust(old_clust_index));
+		ut_ad(old_clust_index->page != FIL_NULL);
+		dict_index_t*		new_clust_index
+			= dict_mem_index_create(
+				new_table->name, old_clust_index->name,
+				new_table->space, old_clust_index->type,
+				old_clust_index->n_user_defined_cols);
+
+		new_clust_index->page = old_clust_index->page;
+
+		/* Copy the primary key fields from the old definition. */
+		for (unsigned i = 0; i < old_clust_index->n_user_defined_cols;
+		     i++) {
+			const dict_field_t* ifield
+				= dict_index_get_nth_field(old_clust_index, i);
+
+			dict_mem_index_add_field(
+				new_clust_index,
+				ifield->name, ifield->prefix_len);
+		}
+
+		/* Create the index in SYS_INDEXES, SYS_FIELDS and in
+		the data dictionary cache. */
+		ut_ad(!dict_table_get_first_index(new_table));
+
+		error = row_merge_create_index_graph(
+			trx, new_table, new_clust_index);
+
+		if (error != DB_SUCCESS) {
+			dict_table_close(new_table, TRUE, FALSE);
+			dict_table_remove_from_cache(new_table);
+		}
+	}
+
+	prebuilt->table->name = old_name;
+
+	if (error == DB_SUCCESS) {
+		ut_ad(!prebuilt->table->newer);
+		ut_ad(dict_table_get_first_index(new_table));
+		ut_ad(!new_table->newer);
+		ut_ad(!new_table->older);
+		prebuilt->table->newer = new_table;
+		new_table->older = prebuilt->table;
+
+		ulint	fold = ut_fold_string(prebuilt->table->name);
+
+		/* Remove the old table definition from dict_sys->table->hash,
+		so that a lookup by name will find new_table. */
+		HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
+			    fold, prebuilt->table);
+
+#ifdef UNIV_DEBUG
+		const dict_table_t*	cached;
+		HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+			    dict_table_t*, cached,
+			    ut_ad(cached->cached),
+			    !ut_strcmp(cached->name, prebuilt->table->name));
+		ut_ad(cached == new_table);
+#endif /* UNIV_DEBUG */
+
+		/* Move secondary indexes to new_table. */
+		dict_index_t* index = dict_table_get_next_index(
+			dict_table_get_first_index(prebuilt->table));
+		while (index) {
+			dict_index_t* next_index = dict_table_get_next_index(
+				index);
+
+			index->table_name = new_table->name;
+			index->table = new_table;
+			UT_LIST_REMOVE(indexes, prebuilt->table->indexes,
+				       index);
+			UT_LIST_ADD_LAST(indexes, new_table->indexes, index);
+
+			index = next_index;
+		}
+
+		/* TODO: each table->fts is tied to table->id */
+		new_table->fts = prebuilt->table->fts;
+
+		/* Move foreign key definitions to new_table. */
+		new_table->foreign_list = prebuilt->table->foreign_list;
+		new_table->referenced_list = prebuilt->table->referenced_list;
+		UT_LIST_INIT(prebuilt->table->foreign_list);
+		UT_LIST_INIT(prebuilt->table->referenced_list);
+
+		for (dict_foreign_t* f = UT_LIST_GET_FIRST(
+			     new_table->foreign_list);
+		     f; f = UT_LIST_GET_NEXT(foreign_list, f)) {
+			DBUG_ASSERT(f->foreign_table == prebuilt->table);
+			f->foreign_table = new_table;
+		}
+
+		for (dict_foreign_t* f = UT_LIST_GET_FIRST(
+			     new_table->referenced_list);
+		     f; f = UT_LIST_GET_NEXT(referenced_list, f)) {
+			DBUG_ASSERT(f->referenced_table == prebuilt->table);
+			f->referenced_table = new_table;
+		}
+
+		DBUG_ASSERT(new_table->n_ref_count == 1);
+		dict_table_close(prebuilt->table, TRUE, FALSE);
+
+		prebuilt->table = new_table;
+	}
+
+	if (error != DB_SUCCESS) {
+		my_error_innodb(error, table_share->table_name.str, 0);
+		trx->error_state = DB_SUCCESS;
+		DBUG_RETURN(true);
+	}
+
+	DBUG_RETURN(false);
+}
+
 /** Commit or rollback the changes made during
 prepare_inplace_alter_table() and inplace_alter_table() inside
 the storage engine. Note that the allowed level of concurrency
@@ -3095,9 +3365,13 @@ processed_field:
 	}
 
 	if (err == 0 && !new_clustered
-	    && (ha_alter_info->handler_flags
-		& Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
-		DBUG_ASSERT(!dict_table_is_comp(prebuilt->table));
+	    && ((ha_alter_info->handler_flags
+		 & (INNOBASE_INPLACE_REFORMAT
+		    | Alter_inplace_info::ALTER_COLUMN_NULLABLE))
+		== Alter_inplace_info::ALTER_COLUMN_NULLABLE)
+	    && !dict_table_is_comp(prebuilt->table)) {
+		/* For ROW_FORMAT=REDUNDANT, MODIFY...NULL
+		does not require a new data dictionary format. */
 
 		List_iterator_fast<Create_field> cf_it;
 		ulint				col = 0;
@@ -3120,11 +3394,14 @@ processed_field:
 				}
 			}
 		}
-	}
-
-	if (err == 0 && !new_clustered
-	    && (ha_alter_info->handler_flags & INNOBASE_INPLACE_REFORMAT)) {
-		ut_error;
+	} else if (err == 0 && !new_clustered
+		   && (ha_alter_info->handler_flags
+		       & (INNOBASE_INPLACE_REFORMAT
+			  | Alter_inplace_info::ALTER_COLUMN_NULLABLE))) {
+		if (innobase_table_redefine(table_share, prebuilt, trx,
+					    altered_table)) {
+			err = -1;
+		}
 	}
 
 	if (err == 0) {

=== modified file 'storage/innobase/include/btr0btr.h'
--- a/storage/innobase/include/btr0btr.h	revid:marko.makela@strippedz
+++ b/storage/innobase/include/btr0btr.h	revid:marko.makela@stripped
@@ -437,14 +437,30 @@ IMPORTANT: if btr_page_reorganize() is i
 page of a non-clustered index, the caller must update the insert
 buffer free bits in the same mini-transaction in such a way that the
 modification will be redo-logged.
-@return	TRUE on success, FALSE on failure */
+@return	true on success, false on failure */
 UNIV_INTERN
-ibool
+bool
 btr_page_reorganize(
 /*================*/
 	buf_block_t*	block,	/*!< in: page to be reorganized */
 	dict_index_t*	index,	/*!< in: record descriptor */
-	mtr_t*		mtr);	/*!< in: mtr */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull));
+/*************************************************************//**
+Converts a clustered index leaf page by reorganizing it.
+IMPORTANT: when invoked on a compressed page, the caller must update
+the insert buffer free bits in the same mini-transaction in such a
+way that the modification will be redo-logged.
+@return	true on success, false on failure */
+UNIV_INTERN
+bool
+btr_page_convert(
+/*=============*/
+	buf_block_t*	block,	/*!< in: page to be converted */
+	index_id_t	index_id,/*!< in: PAGE_INDEX_ID of the page */
+	dict_index_t*	index,	/*!< in: current clustered index */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 Decides if the page should be split at the convergence point of
 inserts converging to left.

=== modified file 'storage/innobase/include/btr0pcur.h'
--- a/storage/innobase/include/btr0pcur.h	revid:marko.makela@stripped5au30h69jlo94a8z
+++ b/storage/innobase/include/btr0pcur.h	revid:marko.makela@strippedsv11x1
@@ -401,6 +401,7 @@ btr_pcur_get_rec(
 # define btr_pcur_get_block(cursor) ((cursor)->btr_cur.page_cur.block)
 # define btr_pcur_get_rec(cursor) ((cursor)->btr_cur.page_cur.rec)
 #endif /* UNIV_DEBUG */
+#define btr_pcur_get_index(c) (btr_cur_get_index(btr_pcur_get_btr_cur(c)))
 /*********************************************************//**
 Checks if the persistent cursor is on a user record. */
 UNIV_INLINE

=== modified file 'storage/innobase/include/btr0sea.ic'
--- a/storage/innobase/include/btr0sea.ic	revid:marko.makela@stripped69jlo94a8z
+++ b/storage/innobase/include/btr0sea.ic	revid:marko.makela@stripped
@@ -66,6 +66,15 @@ btr_search_info_update(
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
 	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+	if (index != cursor->index) {
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(dict_index_is_clust(cursor->index));
+		ut_ad(index->table->newer || index->table->older);
+		ut_ad(cursor->index->table->newer
+		      || cursor->index->table->older);
+	}
+#endif /* UNIV_DEBUG */
 
 	info = btr_search_get_info(index);
 

=== modified file 'storage/innobase/include/dict0dict.h'
--- a/storage/innobase/include/dict0dict.h	revid:marko.makela@stripped8z
+++ b/storage/innobase/include/dict0dict.h	revid:marko.makela@stripped
@@ -669,6 +669,19 @@ dict_index_is_sec_or_ibuf(
 	const dict_index_t*	index)	/*!< in: index */
 	__attribute__((nonnull, pure, warn_unused_result));
 
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Check whether two indexes are equivalent.
+@return true if equivalent */
+UNIV_INLINE
+bool
+dict_index_is_equivalent(
+/*=====================*/
+	const dict_index_t*	rec_index,	/*!< in: index of a record */
+	const dict_index_t*	dict_index)	/*!< in: index of a table */
+	__attribute__((nonnull, pure, warn_unused_result));
+#endif /* UNIV_DEBUG */
+
 /************************************************************************
 Gets the all the FTS indexes for the table. NOTE: must not be called for
 tables which do not have an FTS-index. */
@@ -1017,6 +1030,17 @@ dict_index_get_n_ordering_defined_by_use
 	const dict_index_t*	index)	/*!< in: an internal representation
 					of index (in the dictionary cache) */
 	__attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the clustered index definition corresponding to an index_id.
+@return	clustered index corresponding to index_id, or NULL if not found */
+UNIV_INLINE
+dict_index_t*
+dict_index_get_version(
+/*===================*/
+	dict_index_t*	index,		/*!< in: clustered index */
+	index_id_t	index_id)	/*!< in: PAGE_INDEX_ID */
+	__attribute__((nonnull, warn_unused_result));
+
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.

=== modified file 'storage/innobase/include/dict0dict.ic'
--- a/storage/innobase/include/dict0dict.ic	revid:marko.makela@strippedom-20120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/include/dict0dict.ic	revid:marko.makela@stripped510130933-7li0w21gdgsv11x1
@@ -318,6 +318,38 @@ dict_index_is_sec_or_ibuf(
 	return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF));
 }
 
+#ifdef UNIV_DEBUG
+/********************************************************************//**
+Check whether two indexes are equivalent.
+@return true if equivalent */
+UNIV_INLINE
+bool
+dict_index_is_equivalent(
+/*=====================*/
+	const dict_index_t*	rec_index,	/*!< in: index of a record */
+	const dict_index_t*	dict_index)	/*!< in: index of a table */
+{
+	if (rec_index == dict_index) {
+		return(true);
+	}
+
+	ut_ad(dict_index_is_clust(rec_index));
+	ut_ad(dict_index_is_clust(dict_index));
+	ut_ad(rec_index->table->newer);
+	ut_ad(dict_index->table->older);
+
+	const dict_table_t* table = dict_index->table;
+
+	do {
+		if (dict_table_get_first_index(table) == rec_index) {
+			return(true);
+		}
+	} while ((table = table->older) != NULL);
+
+	return(false);
+}
+#endif /* UNIV_DEBUG */
+
 /********************************************************************//**
 Gets the number of user-defined columns in a table in the dictionary
 cache.
@@ -923,6 +955,36 @@ dict_index_get_n_ordering_defined_by_use
 	return(index->n_user_defined_cols);
 }
 
+/********************************************************************//**
+Gets the clustered index definition corresponding to an index_id.
+@return	clustered index corresponding to index_id, or NULL if not found */
+UNIV_INLINE
+dict_index_t*
+dict_index_get_version(
+/*===================*/
+	dict_index_t*	index,		/*!< in: clustered index */
+	index_id_t	index_id)	/*!< in: PAGE_INDEX_ID */
+{
+	dict_table_t*	table;
+
+	ut_ad(dict_index_is_clust(index));
+
+	for (table = index->table; table->newer; ) {
+		table = table->newer;
+	}
+
+	while (table && dict_table_get_first_index(table)->id != index_id) {
+		ut_ad(table->space == index->space);
+		ut_ad(dict_table_get_first_index(table)->space
+		      == index->space);
+		ut_ad(dict_table_get_first_index(table)->page
+		      == index->page);
+		table = table->older;
+	}
+
+	return(table ? dict_table_get_first_index(table) : NULL);
+}
+
 #ifdef UNIV_DEBUG
 /********************************************************************//**
 Gets the nth field of an index.

=== modified file 'storage/innobase/include/dict0mem.h'
--- a/storage/innobase/include/dict0mem.h	revid:marko.makela@strippedz
+++ b/storage/innobase/include/dict0mem.h	revid:marko.makela@stripped
@@ -628,6 +628,10 @@ struct dict_table_struct{
 	table_id_t	id;	/*!< id of the table */
 	mem_heap_t*	heap;	/*!< memory heap */
 	char*		name;	/*!< table name */
+	dict_table_t*	newer;	/*!< pointer to a newer table definition,
+				or NULL if this is the newest */
+	dict_table_t*	older;	/*!< pointer to an older table definition,
+				or NULL if this is the oldest */
 	const char*	dir_path_of_temp_table;/*!< NULL or the directory path
 				where a TEMPORARY table that was explicitly
 				created by a user should be placed if

=== modified file 'storage/innobase/include/page0page.h'
--- a/storage/innobase/include/page0page.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/page0page.h	revid:marko.makela@stripped
@@ -768,16 +768,19 @@ page_create_zip(
 
 /*************************************************************//**
 Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
+touch the lock table and max trx id on page or compress the page.
+@return true if the operation succeeded */
 UNIV_INTERN
-void
+bool
 page_copy_rec_list_end_no_locks(
 /*============================*/
 	buf_block_t*	new_block,	/*!< in: index page to copy to */
 	buf_block_t*	block,		/*!< in: index page of rec */
 	rec_t*		rec,		/*!< in: record on page */
 	dict_index_t*	index,		/*!< in: record descriptor */
-	mtr_t*		mtr);		/*!< in: mtr */
+	dict_index_t*	new_index,	/*!< in: record descriptor */
+	mtr_t*		mtr)		/*!< in: mtr */
+	__attribute__((nonnull));
 /*************************************************************//**
 Copies records from page to new_page, from the given record onward,
 including that record. Infimum and supremum records are not copied.

=== modified file 'storage/innobase/include/page0zip.h'
--- a/storage/innobase/include/page0zip.h	revid:marko.makela@stripped0120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/include/page0zip.h	revid:marko.makela@stripped933-7li0w21gdgsv11x1
@@ -94,6 +94,17 @@ page_zip_des_init(
 					descriptor */
 
 /**********************************************************************//**
+Write a log record of compressing an index page. */
+UNIV_INTERN
+void
+page_zip_compress_write_log(
+/*========================*/
+	const page_zip_des_t*	page_zip,/*!< in: compressed page */
+	const page_t*		page,	/*!< in: uncompressed page */
+	dict_index_t*		index,	/*!< in: index of the B-tree node */
+	mtr_t*			mtr)	/*!< in: mini-transaction */
+	__attribute__((nonnull));
+/**********************************************************************//**
 Configure the zlib allocator to use the given memory heap. */
 UNIV_INTERN
 void

=== modified file 'storage/innobase/include/rem0rec.h'
--- a/storage/innobase/include/rem0rec.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/rem0rec.h	revid:marko.makela@stripped
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -399,6 +399,7 @@ rec_get_offsets_reverse(
 	ulint*			offsets);/*!< in/out: array consisting of
 					offsets[0] allocated elements */
 
+#ifdef UNIV_DEBUG
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
 @return	TRUE if valid */
@@ -408,9 +409,23 @@ rec_offs_validate(
 /*==============*/
 	const rec_t*		rec,	/*!< in: record or NULL */
 	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
-	const ulint*		offsets);/*!< in: array returned by
+	const ulint*		offsets)/*!< in: array returned by
 					rec_get_offsets() */
-#ifdef UNIV_DEBUG
+	__attribute__((nonnull(3), warn_unused_result));
+
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return	TRUE if valid, allowing a mismatch of a clustered index when
+the record is for a different table definition version */
+UNIV_INLINE
+ibool
+rec_offs_validate_relaxed(
+/*======================*/
+	const rec_t*		rec,	/*!< in: record or NULL */
+	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
+	const ulint*		offsets)/*!< in: array returned by
+					rec_get_offsets() */
+	__attribute__((nonnull(2,3), warn_unused_result));
 /************************************************************//**
 Updates debug data in offsets, in order to avoid bogus
 rec_offs_validate() failures. */
@@ -420,8 +435,9 @@ rec_offs_make_valid(
 /*================*/
 	const rec_t*		rec,	/*!< in: record */
 	const dict_index_t*	index,	/*!< in: record descriptor */
-	ulint*			offsets);/*!< in: array returned by
+	ulint*			offsets)/*!< in: array returned by
 					rec_get_offsets() */
+	__attribute__((nonnull));
 #else
 # define rec_offs_make_valid(rec, index, offsets) ((void) 0)
 #endif /* UNIV_DEBUG */

=== modified file 'storage/innobase/include/rem0rec.ic'
--- a/storage/innobase/include/rem0rec.ic	revid:marko.makela@strippedom-20120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/include/rem0rec.ic	revid:marko.makela@stripped0130933-7li0w21gdgsv11x1
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -942,6 +942,7 @@ rec_offs_n_fields(
 	return(n_fields);
 }
 
+#ifdef UNIV_DEBUG
 /************************************************************//**
 Validates offsets returned by rec_get_offsets().
 @return	TRUE if valid */
@@ -997,7 +998,43 @@ rec_offs_validate(
 	}
 	return(TRUE);
 }
-#ifdef UNIV_DEBUG
+
+/************************************************************//**
+Validates offsets returned by rec_get_offsets().
+@return	TRUE if valid, allowing a mismatch of a clustered index when
+the record is for a different table definition version */
+UNIV_INLINE
+ibool
+rec_offs_validate_relaxed(
+/*======================*/
+	const rec_t*		rec,	/*!< in: record or NULL */
+	const dict_index_t*	index,	/*!< in: record descriptor or NULL */
+	const ulint*		offsets)/*!< in: array returned by
+					rec_get_offsets() */
+{
+	if ((ulint) index != offsets[3]) {
+		dict_table_t*	table = index->table;
+
+		ut_a(dict_index_is_clust(index));
+		ut_a(table->newer || table->older);
+
+		while (table->older) {
+			table = table->older;
+		}
+
+		do {
+			index = dict_table_get_first_index(table);
+			if ((ulint) index == offsets[3]) {
+				break;
+			}
+		} while ((table = table->newer) != NULL);
+
+		ut_a(table);
+	}
+
+	return(rec_offs_validate(rec, index, offsets));
+}
+
 /************************************************************//**
 Updates debug data in offsets, in order to avoid bogus
 rec_offs_validate() failures. */

=== modified file 'storage/innobase/include/row0merge.h'
--- a/storage/innobase/include/row0merge.h	revid:marko.makela@oracle.com-20120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/include/row0merge.h	revid:marko.makela@strippedm-20120510130933-7li0w21gdgsv11x1
@@ -241,6 +241,17 @@ row_merge_rename_index_to_drop(
 	index_id_t	index_id)	/*!< in: index identifier */
 	__attribute__((nonnull));
 /*********************************************************************//**
+Create and execute a query graph for creating an index.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_merge_create_index_graph(
+/*=========================*/
+	trx_t*		trx,		/*!< in: trx */
+	dict_table_t*	table,		/*!< in: table */
+	dict_index_t*	index)		/*!< in: index */
+	__attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
 Create the index and load in to the dictionary.
 @return	index, or NULL on error */
 UNIV_INTERN

=== modified file 'storage/innobase/include/row0row.ic'
--- a/storage/innobase/include/row0row.ic	revid:marko.makela@strippedlo94a8z
+++ b/storage/innobase/include/row0row.ic	revid:marko.makela@stripped
@@ -43,7 +43,7 @@ row_get_trx_id_offset(
 	ulint	len;
 
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(rec_offs_validate(NULL, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(NULL, index, offsets));
 
 	pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
 
@@ -68,7 +68,7 @@ row_get_rec_trx_id(
 	ulint	offset;
 
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 
 	offset = index->trx_id_offset;
 
@@ -93,7 +93,7 @@ row_get_rec_roll_ptr(
 	ulint	offset;
 
 	ut_ad(dict_index_is_clust(index));
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 
 	offset = index->trx_id_offset;
 

=== modified file 'storage/innobase/include/univ.i'
--- a/storage/innobase/include/univ.i	revid:marko.makela@stripped0120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/include/univ.i	revid:marko.makela@stripped7li0w21gdgsv11x1
@@ -422,6 +422,7 @@ macro ULINTPF. */
 # define UINT32PF	"%I32u"
 # define INT64PF	"%I64d"
 # define UINT64PF	"%I64u"
+# define UINT64P0XF	"%016I64X"
 typedef __int64 ib_int64_t;
 typedef unsigned __int64 ib_uint64_t;
 typedef unsigned __int32 ib_uint32_t;
@@ -430,12 +431,14 @@ typedef unsigned __int32 ib_uint32_t;
 # define UINT32PF	"%"PRIu32
 # define INT64PF	"%"PRId64
 # define UINT64PF	"%"PRIu64
+# define UINT64P0XF	"%016"PRIX64
 typedef int64_t ib_int64_t;
 typedef uint64_t ib_uint64_t;
 typedef uint32_t ib_uint32_t;
 # endif /* __WIN__ */
 
 # define IB_ID_FMT	UINT64PF
+# define IB_ID_FMT0X	UINT64P0XF
 
 #ifdef _WIN64
 typedef unsigned __int64	ulint;

=== modified file 'storage/innobase/lock/lock0lock.cc'
--- a/storage/innobase/lock/lock0lock.cc	revid:marko.makela@oracle.com-20120510112547-5au30h69jlo94a8z
+++ b/storage/innobase/lock/lock0lock.cc	revid:marko.makela@stripped20120510130933-7li0w21gdgsv11x1
@@ -501,7 +501,7 @@ lock_check_trx_id_sanity(
 	ibool		is_ok;
 	trx_id_t	max_trx_id;
 
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 
 	max_trx_id = trx_sys_get_max_trx_id();
 	is_ok = trx_id < max_trx_id;
@@ -533,7 +533,7 @@ lock_clust_rec_cons_read_sees(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!dict_index_is_online_ddl(index));
 	ut_ad(page_rec_is_user_rec(rec));
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 
 	/* NOTE that we call this function while holding the search
 	system latch. */
@@ -1413,12 +1413,12 @@ lock_rec_get_prev(
 Checks if a transaction has the specified table lock, or stronger. This
 function should only be called by the thread that owns the transaction.
 @return	lock or NULL */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull, warn_unused_result))
 const lock_t*
-lock_table_has(
-/*===========*/
+lock_table_has_low(
+/*===============*/
 	const trx_t*		trx,	/*!< in: transaction */
-	const dict_table_t*	table,	/*!< in: table */
+	const dict_table_t*	table,	/*!< in: table definition */
 	enum lock_mode		mode)	/*!< in: lock mode */
 {
 	lint			i;
@@ -1458,6 +1458,28 @@ lock_table_has(
 	return(NULL);
 }
 
+/*********************************************************************//**
+Checks if a transaction has the specified table lock, or stronger. This
+function should only be called by the thread that owns the transaction.
+@return	lock or NULL */
+UNIV_INLINE __attribute__((nonnull, warn_unused_result))
+const lock_t*
+lock_table_has(
+/*===========*/
+	const trx_t*		trx,	/*!< in: transaction */
+	const dict_table_t*	table,	/*!< in: table */
+	enum lock_mode		mode)	/*!< in: lock mode */
+{
+	do {
+		const lock_t*	lock = lock_table_has_low(trx, table, mode);
+		if (lock) {
+			return(lock);
+		}
+	} while ((table = table->older) != NULL);
+
+	return(NULL);
+}
+
 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
 
 /*********************************************************************//**
@@ -5414,7 +5436,7 @@ lock_rec_queue_validate(
 
 	ut_a(rec);
 	ut_a(block->frame == page_align(rec));
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
 	ut_ad(!index || !dict_index_is_online_ddl(index));
@@ -5919,7 +5941,7 @@ lock_rec_convert_impl_to_expl(
 
 	ut_ad(!lock_mutex_own());
 	ut_ad(page_rec_is_user_rec(rec));
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
 	ut_ad(!dict_index_is_online_ddl(index));
 
@@ -5986,7 +6008,7 @@ lock_clust_rec_modify_check_and_lock(
 	dberr_t	err;
 	ulint	heap_no;
 
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!dict_index_is_online_ddl(index));
 	ut_ad(block->frame == page_align(rec));
@@ -6223,7 +6245,7 @@ lock_clust_rec_read_check_and_lock(
 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
 	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
 	      || gap_mode == LOCK_REC_NOT_GAP);
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
 
 	if (flags & BTR_NO_LOCKING_FLAG) {
 
@@ -6239,10 +6261,23 @@ lock_clust_rec_read_check_and_lock(
 
 	lock_mutex_enter();
 
-	ut_ad(mode != LOCK_X
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
-	ut_ad(mode != LOCK_S
-	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
+#ifdef UNIV_DEBUG
+	const trx_t*	trx = thr_get_trx(thr);
+	enum lock_mode	tlock_mode;
+
+	switch (mode) {
+	default:
+		ut_error;
+	case LOCK_X:
+		tlock_mode = LOCK_IX;
+		break;
+	case LOCK_S:
+		tlock_mode = LOCK_IS;
+		break;
+	}
+
+	ut_ad(lock_table_has(trx, index->table, tlock_mode));
+#endif /* UNIV_DEBUG */
 
 	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
 

=== modified file 'storage/innobase/page/page0page.cc'
--- a/storage/innobase/page/page0page.cc	revid:marko.makela@strippedu30h69jlo94a8z
+++ b/storage/innobase/page/page0page.cc	revid:marko.makela@stripped1x1
@@ -523,15 +523,17 @@ page_create_zip(
 
 /*************************************************************//**
 Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page. */
+touch the lock table and max trx id on page or compress the page.
+@return true if the operation succeeded */
 UNIV_INTERN
-void
+bool
 page_copy_rec_list_end_no_locks(
 /*============================*/
 	buf_block_t*	new_block,	/*!< in: index page to copy to */
 	buf_block_t*	block,		/*!< in: index page of rec */
 	rec_t*		rec,		/*!< in: record on page */
 	dict_index_t*	index,		/*!< in: record descriptor */
+	dict_index_t*	new_index,	/*!< in: record descriptor */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_t*		new_page	= buf_block_get_frame(new_block);
@@ -549,7 +551,7 @@ page_copy_rec_list_end_no_locks(
 		page_cur_move_to_next(&cur1);
 	}
 
-	btr_assert_not_corrupted(new_block, index);
+	btr_assert_not_corrupted(new_block, new_index);
 	ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
 	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
 	     (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
@@ -558,39 +560,132 @@ page_copy_rec_list_end_no_locks(
 
 	/* Copy records from the original page to the new page */
 
-	while (!page_cur_is_after_last(&cur1)) {
-		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
-		rec_t*	ins_rec;
-		offsets = rec_get_offsets(cur1_rec, index, offsets,
-					  ULINT_UNDEFINED, &heap);
-		ins_rec = page_cur_insert_rec_low(cur2, index,
-						  cur1_rec, offsets, mtr);
-		if (UNIV_UNLIKELY(!ins_rec)) {
-			/* Track an assertion failure reported on the mailing
-			list on June 18th, 2003 */
-
-			buf_page_print(new_page, 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-			buf_page_print(page_align(rec), 0,
-				       BUF_PAGE_PRINT_NO_CRASH);
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"InnoDB: rec offset %lu, cur1 offset %lu,"
-				" cur2 offset %lu\n",
-				(ulong) page_offset(rec),
-				(ulong) page_offset(page_cur_get_rec(&cur1)),
-				(ulong) page_offset(cur2));
-			ut_error;
+	if (index != new_index) {
+		/* convert cur1_rec to new_index */
+		ut_ad(dict_index_is_clust(index));
+		ut_ad(dict_index_is_clust(new_index));
+		ut_ad(page_is_leaf(block->frame));
+		/* TODO: remove these when implementing
+		ADD COLUMN, DROP COLUMN, reordering of columns */
+		ut_ad(page_is_comp(block->frame));
+		ut_ad(new_index->n_nullable > index->n_nullable);
+		ut_ad(new_index->n_fields == index->n_fields);
+
+		ulint	size = (1 + REC_OFFS_HEADER_SIZE) + 1
+			+ dict_index_get_n_fields(new_index);
+
+		if (!heap) {
+			heap = mem_heap_create(size * sizeof *offsets * 2);
 		}
 
-		page_cur_move_to_next(&cur1);
-		cur2 = ins_rec;
+		ulint*	new_offsets = static_cast<ulint*>(
+			mem_heap_alloc(heap, size * sizeof *new_offsets));
+		rec_offs_set_n_alloc(new_offsets, size);
+		new_offsets[1] = dict_index_get_n_fields(new_index);
+
+		mem_heap_t*	rec_heap = mem_heap_create(UNIV_PAGE_SIZE / 2);
+
+		while (!page_cur_is_after_last(&cur1)) {
+			rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+			rec_t*	ins_rec;
+			offsets = rec_get_offsets(cur1_rec, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+
+			/* Convert the record. For now, mimic
+			rec_init_offsets_comp_ordinary().
+			TODO: add/drop/shuffle columns */
+
+			int	extra_size_diff
+				= UT_BITS_IN_BYTES(new_index->n_nullable)
+				- UT_BITS_IN_BYTES(index->n_nullable);
+
+			byte*	buf = static_cast<byte*>(
+				mem_heap_alloc(
+					rec_heap,
+					rec_offs_size(offsets)
+					+ extra_size_diff));
+			byte*	new_rec = buf + rec_offs_extra_size(offsets)
+				+ extra_size_diff;
+
+			/* mimic rec_convert_dtuple_to_rec_comp() */
+			byte*	nulls = new_rec
+				- (REC_N_NEW_EXTRA_BYTES + 1);
+			byte*	lens = nulls - UT_BITS_IN_BYTES(
+				new_index->n_nullable);
+
+			/* Copy the lengths. */
+			memcpy(buf, cur1_rec - rec_offs_extra_size(offsets),
+			       (lens + 1) - buf);
+			/* Clear the SQL-null flags. */
+			memset(lens + 1, 0, nulls - lens);
+			/* Copy the header and data. */
+			memcpy(new_rec - REC_N_NEW_EXTRA_BYTES,
+			       cur1_rec - REC_N_NEW_EXTRA_BYTES,
+			       rec_offs_data_size(offsets)
+			       + REC_N_NEW_EXTRA_BYTES);
+			/* Copy the SQL-null flags. */
+			for (uint i = 0, null_mask = 1;
+			     i < dict_index_get_n_fields(new_index); i++) {
+				const dict_field_t*	field
+					= dict_index_get_nth_field(
+						new_index, i);
+				if (field->col->prtype & DATA_NOT_NULL) {
+					ut_ad(!rec_offs_nth_sql_null(
+						      offsets, i));
+					continue;
+				}
+
+				if (!(byte) null_mask) {
+					nulls--;
+					null_mask = 1;
+				}
+
+				if (rec_offs_nth_sql_null(offsets, i)) {
+					*nulls |= null_mask;
+				}
+
+				null_mask <<= 1;
+			}
+
+			rec_init_offsets_comp_ordinary(
+				new_rec, REC_N_NEW_EXTRA_BYTES,
+				new_index, new_index->n_nullable,
+				new_offsets);
+
+			ins_rec = page_cur_insert_rec_low(
+				cur2, new_index, new_rec, new_offsets, mtr);
+
+			if (!ins_rec) {
+				mem_heap_free(rec_heap);
+				mem_heap_free(heap);
+				return(false);
+			}
+
+			page_cur_move_to_next(&cur1);
+			cur2 = ins_rec;
+		}
+
+		mem_heap_free(rec_heap);
+	} else {
+		while (!page_cur_is_after_last(&cur1)) {
+			rec_t*	cur1_rec = page_cur_get_rec(&cur1);
+			rec_t*	ins_rec;
+			offsets = rec_get_offsets(cur1_rec, index, offsets,
+						  ULINT_UNDEFINED, &heap);
+			ins_rec = page_cur_insert_rec_low(
+				cur2, new_index, cur1_rec, offsets, mtr);
+			ut_a(ins_rec);
+
+			page_cur_move_to_next(&cur1);
+			cur2 = ins_rec;
+		}
 	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
+
+	return(true);
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -644,7 +739,7 @@ page_copy_rec_list_end(
 						       index, mtr);
 	} else {
 		page_copy_rec_list_end_no_locks(new_block, block, rec,
-						index, mtr);
+						index, index, mtr);
 	}
 
 	/* Update PAGE_MAX_TRX_ID on the uncompressed page.

=== modified file 'storage/innobase/page/page0zip.cc'
--- a/storage/innobase/page/page0zip.cc	revid:marko.makela@stripped30h69jlo94a8z
+++ b/storage/innobase/page/page0zip.cc	revid:marko.makela@stripped1
@@ -312,7 +312,7 @@ page_zip_dir_get(
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
 Write a log record of compressing an index page. */
-static
+UNIV_INTERN
 void
 page_zip_compress_write_log(
 /*========================*/
@@ -4498,7 +4498,7 @@ page_zip_reorganize(
 
 	page_copy_rec_list_end_no_locks(block, temp_block,
 					page_get_infimum_rec(temp_page),
-					index, mtr);
+					index, index, mtr);
 
 	if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
 		/* Copy max trx id to recreated page */

=== modified file 'storage/innobase/row/row0merge.cc'
--- a/storage/innobase/row/row0merge.cc	revid:marko.makela@stripped
+++ b/storage/innobase/row/row0merge.cc	revid:marko.makela@stripped-20120510130933-7li0w21gdgsv11x1
@@ -3001,7 +3001,7 @@ err_exit:
 /*********************************************************************//**
 Create and execute a query graph for creating an index.
 @return	DB_SUCCESS or error code */
-static __attribute__((nonnull, warn_unused_result))
+UNIV_INTERN
 dberr_t
 row_merge_create_index_graph(
 /*=========================*/
@@ -3014,10 +3014,6 @@ row_merge_create_index_graph(
 	que_thr_t*	thr;		/*!< Query thread */
 	dberr_t		err;
 
-	ut_ad(trx);
-	ut_ad(table);
-	ut_ad(index);
-
 	heap = mem_heap_create(512);
 
 	index->table = table;

=== modified file 'storage/innobase/row/row0mysql.cc'
--- a/storage/innobase/row/row0mysql.cc	revid:marko.makela@strippeda8z
+++ b/storage/innobase/row/row0mysql.cc	revid:marko.makela@stripped
@@ -2185,12 +2185,8 @@ err_exit:
 	case TRX_DICT_OP_NONE:
 		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
 	case TRX_DICT_OP_TABLE:
-		break;
 	case TRX_DICT_OP_INDEX:
-		/* If the transaction was previously flagged as
-		TRX_DICT_OP_INDEX, we should be creating auxiliary
-		tables for full-text indexes. */
-		ut_ad(strstr(table->name, "/FTS_") != NULL);
+		break;
 	}
 
 	node = tab_create_graph_create(table, heap);

=== modified file 'storage/innobase/row/row0sel.cc'
--- a/storage/innobase/row/row0sel.cc	revid:marko.makela@stripped
+++ b/storage/innobase/row/row0sel.cc	revid:marko.makela@oracle.com-20120510130933-7li0w21gdgsv11x1
@@ -2818,7 +2818,8 @@ row_sel_store_mysql_field_func(
 	ut_ad(field_no == templ->clust_rec_field_no
 	      || field_no == templ->rec_field_no
 	      || field_no == templ->icp_rec_field_no);
-	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, index, offsets));
+	/* TODO: ADD COLUMN, DROP COLUMN, reorder columns */
 
 	if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
 
@@ -2957,7 +2958,7 @@ row_sel_store_mysql_rec(
 {
 	ulint	i;
 
-	ut_ad(rec_clust || index == prebuilt->index);
+	ut_ad(rec_clust || dict_index_is_equivalent(index, prebuilt->index));
 	ut_ad(!rec_clust || dict_index_is_clust(index));
 
 	if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
@@ -3569,7 +3570,7 @@ row_search_idx_cond_check(
 	enum icp_result result;
 	ulint		i;
 
-	ut_ad(rec_offs_validate(rec, prebuilt->index, offsets));
+	ut_ad(rec_offs_validate_relaxed(rec, prebuilt->index, offsets));
 
 	if (!prebuilt->idx_cond) {
 		return(ICP_MATCH);
@@ -4331,13 +4332,28 @@ wrong_offs:
 	/* Calculate the 'offsets' associated with 'rec' */
 
 	ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX);
-	ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id);
+
+	{
+		index_id_t page_index_id = btr_page_get_index_id(
+			page_align(rec));
+
+		if (index->id != page_index_id) {
+			ut_ad(index == clust_index);
+			index = dict_index_get_version(index, page_index_id);
+			if (!index) {
+				/* The index definition must be found. */
+				ut_a(srv_force_recovery > 0);
+				goto skip_rec;
+			}
+		}
+	}
 
 	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
 	if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
 		if (!rec_validate(rec, offsets)
 		    || !btr_index_rec_validate(rec, index, FALSE)) {
+skip_rec:
 			fprintf(stderr,
 				"InnoDB: Index corruption: rec offs %lu"
 				" next offs %lu, page no %lu,\n"
@@ -4482,7 +4498,7 @@ wrong_offs:
 		col1 >= 100, and we find a record where col1 = 100, then no
 		need to lock the gap before that record. */
 
-		if (index == clust_index
+		if (dict_index_is_clust(index)
 		    && mode == PAGE_CUR_GE
 		    && direction == 0
 		    && dtuple_get_n_fields_cmp(search_tuple)
@@ -4493,7 +4509,7 @@ no_gap_lock:
 		}
 
 		err = sel_set_rec_lock(btr_pcur_get_block(pcur),
-				       rec, index, offsets,
+				       rec, btr_pcur_get_index(pcur), offsets,
 				       prebuilt->select_lock_type,
 				       lock_type, thr);
 
@@ -4517,7 +4533,7 @@ no_gap_lock:
 			if (UNIV_LIKELY(prebuilt->row_read_type
 					!= ROW_READ_TRY_SEMI_CONSISTENT)
 			    || unique_search
-			    || index != clust_index) {
+			    || !dict_index_is_clust(index)) {
 
 				goto lock_wait_or_error;
 			}
@@ -4580,7 +4596,7 @@ no_gap_lock:
 			/* Do nothing: we let a non-locking SELECT read the
 			latest version of the record */
 
-		} else if (index == clust_index) {
+		} else if (dict_index_is_clust(index)) {
 
 			/* Fetch a previous version of the row if the current
 			one is not visible in the snapshot; if we have a very
@@ -4677,7 +4693,7 @@ locks_ok:
 		applicable to unique secondary indexes. Current behaviour is
 		to widen the scope of a lock on an already delete marked record
 		if the same record is deleted twice by the same transaction */
-		if (index == clust_index && unique_search) {
+		if (dict_index_is_clust(index) && unique_search) {
 			err = DB_RECORD_NOT_FOUND;
 
 			goto normal_return;
@@ -4703,10 +4719,11 @@ locks_ok:
 	/* Get the clustered index record if needed, if we did not do the
 	search using the clustered index. */
 
-	if (index != clust_index && prebuilt->need_to_access_clustered) {
+	if (!dict_index_is_clust(index)
+	    && prebuilt->need_to_access_clustered) {
 
 requires_clust_rec:
-		ut_ad(index != clust_index);
+		ut_ad(!dict_index_is_clust(index));
 		/* We use a 'goto' to the preceding label if a consistent
 		read of a secondary index record requires us to look up old
 		versions of the associated clustered index record. */

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk-wl5854 branch (marko.makela:3766 to 3767) WL#5854WL#6255marko.makela22 May