List:Commits« Previous MessageNext Message »
From:marko.makela Date:May 28 2012 2:14pm
Subject:bzr push into mysql-trunk-wl6255 branch (marko.makela:3881 to 3886) WL#6255
View as plain text  
 3886 Marko Mäkelä	2012-05-28
      WL#6255: Logging of rolled-back transactions.
      
      To avoid dereferencing freed BLOB pages during online table rebuild,
      we must keep track on rollbacks of inserts to the table that is
      being rebuilt, and skip any records that were inserted by such a
      transaction. As an optimization, we also keep track of rolled-back updates,
      so that we can copy less data during online table rebuild.
      
      row_log_table_rollback(): Report a rolled-back transaction. Called by
      row_undo_ins() and row_undo_mod().
      
      row_log_table_is_rollback(): Determine if a given transaction was
      rolled back after it modified the table that is being rebuilt.

    modified:
      storage/innobase/include/row0log.h
      storage/innobase/row/row0log.cc
      storage/innobase/row/row0uins.cc
      storage/innobase/row/row0umod.cc
 3885 Marko Mt fields for online table rebuild.
      
      row_log_allocate(): Add the parameters table, same_pk.
      
      row_log_table_get_error(): Accessor to index->online_log->error.
      
      row_log_t: Online table rebuild log record types

    modified:
      storage/innobase/handler/handler0alter.cc
      storage/innobase/include/row0log.h
      storage/innobase/row/row0log.cc
 3884 Marko Mified:
      storage/innobase/row/row0log.cc
 3883 Marko Mäkelä	2012-05-28
      WL#6255 preparation: Add dummy query graph to ha_innobase_inplace_ctx,
      for applying the online table rebuild log.

    modified:
      storage/innobase/handler/handler0alter.cc
 3882 Marko MäkelBASE_ONLINE_OPERATIONS.

    modified:
      storage/innobase/handler/handler0alter.cc
 3881 Marko Mäkelä	2012-05-28
      WL#6255 preparation: Simplify the index->online_status for rebuilding table.
      
      dict_index_is_online_ddl(): On clustered index, assert that online_status
      is either ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE.
      
      dict_table_is_online_rebuild(): Invoke dict_index_is_online_ddl().

    modified:
      storage/innobase/include/dict0dict.h
      storage/innobase/include/dict0dict.ic
      storage/innobase/include/dict0mem.h
=== modified file 'storage/innobase/handler/handler0alter.cc'
--- a/storage/innobase/handler/handler0alter.cc	revid:marko.makela@strippedz3qs070
+++ b/storage/innobase/handler/handler0alter.cc	revid:marko.makela@strippeddcz
@@ -40,6 +40,7 @@ Smart ALTER TABLE
 #include "handler0alter.h"
 #include "srv0mon.h"
 #include "fts0priv.h"
+#include "pars0pars.h"
 
 #include "ha_innodb.h"
 
@@ -78,9 +79,8 @@ static const Alter_inplace_info::HA_ALTE
 /** Operations that InnoDB can perform online */
 static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_OPERATIONS
 	= INNOBASE_INPLACE_IGNORE
-	| Alter_inplace_info::ADD_INDEX
+	| INNOBASE_ONLINE_CREATE
 	| Alter_inplace_info::DROP_INDEX
-	| Alter_inplace_info::ADD_UNIQUE_INDEX
 	| Alter_inplace_info::DROP_UNIQUE_INDEX
 	| Alter_inplace_info::DROP_INDEX
 	| Alter_inplace_info::DROP_FOREIGN_KEY
@@ -1168,6 +1168,8 @@ innobase_create_temporary_tablename(
 class ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
 {
 public:
+	/** Dummy query graph */
+	que_thr_t*	thr;
 	/** InnoDB indexes being created */
 	dict_index_t**	add;
 	/** MySQL key numbers for the InnoDB indexes that are being created */
@@ -1190,7 +1192,8 @@ public:
 	trx_t*		trx;
 	/** table where the indexes are being created or dropped */
 	dict_table_t*	indexed_table;
-	ha_innobase_inplace_ctx(dict_index_t** add_arg,
+	ha_innobase_inplace_ctx(trx_t* user_trx,
+				dict_index_t** add_arg,
 				const ulint* add_key_numbers_arg,
 				ulint num_to_add_arg,
 				dict_index_t** drop_arg,
@@ -1216,6 +1219,8 @@ public:
 			ut_ad(drop[i]->to_be_dropped);
 		}
 #endif /* UNIV_DEBUG */
+
+		thr = pars_complete_graph_for_exec(NULL, user_trx, heap);
 	}
 	~ha_innobase_inplace_ctx() {
 		mem_heap_free(heap);
@@ -1712,7 +1717,8 @@ col_fail:
 					error = DB_OUT_OF_MEMORY;
 					goto error_handling;);
 			rw_lock_x_lock(&add_index[num_created]->lock);
-			bool ok = row_log_allocate(add_index[num_created]);
+			bool ok = row_log_allocate(add_index[num_created],
+						   NULL, true);
 			rw_lock_x_unlock(&add_index[num_created]->lock);
 
 			if (!ok) {
@@ -1808,7 +1814,7 @@ error_handling:
 			     user_table, CHECK_PARTIAL_OK));
 		ut_d(mutex_exit(&dict_sys->mutex));
 		ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
-			add_index, add_key_nums, n_add_index,
+			user_trx, add_index, add_key_nums, n_add_index,
 			drop_index, n_drop_index,
 			drop_foreign, n_drop_foreign,
 			!locked && !new_clustered && !num_fts_index,
@@ -2305,7 +2311,7 @@ index_needed:
 				== Alter_info::ALTER_TABLE_LOCK_SHARED;
 			ha_alter_info->handler_ctx
 				= new ha_innobase_inplace_ctx(
-					NULL, NULL, 0,
+					prebuilt->trx, NULL, NULL, 0,
 					drop_index, n_drop_index,
 					drop_fk, n_drop_fk, !locked,
 					heap, NULL, indexed_table);

=== modified file 'storage/innobase/include/row0log.h'
--- a/storage/innobase/include/row0log.h	revid:marko.makela@oracle.com-20120528133824-t71zvj9ttz3qs070
+++ b/storage/innobase/include/row0log.h	revid:marko.makela@stripped120528140900-k1hub8ffw2u2ldcz
@@ -18,7 +18,7 @@ this program; if not, write to the Free
 
 /**************************************************//**
 @file include/row0log.h
-Modification log for online index creation
+Modification log for online index creation and online table rebuild
 
 Created 2011-05-26 Marko Makela
 *******************************************************/
@@ -28,9 +28,11 @@ Created 2011-05-26 Marko Makela
 
 #include "univ.i"
 #include "row0types.h"
+#include "rem0types.h"
 #include "data0types.h"
 #include "dict0types.h"
 #include "trx0types.h"
+#include "que0types.h"
 
 /******************************************************//**
 Allocate the row log for an index and flag the index
@@ -40,8 +42,12 @@ UNIV_INTERN
 bool
 row_log_allocate(
 /*=============*/
-	dict_index_t*	index)	/*!< in/out: index */
-	__attribute__((nonnull));
+	dict_index_t*	index,	/*!< in/out: index */
+	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
+				or NULL when creating a secondary index */
+	bool		same_pk)/*!< in: whether the definition of the
+				PRIMARY KEY has remained the same */
+	__attribute__((nonnull(1), warn_unused_result));
 /******************************************************//**
 Free the row log for an index on which online creation was aborted. */
 UNIV_INTERN
@@ -64,6 +70,38 @@ row_log_online_op(
 	UNIV_COLD __attribute__((nonnull));
 
 /******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: clustered index of a table
+					that is being rebuilt online */
+	__attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Notes that a transaction is being rolled back. */
+UNIV_INTERN
+void
+row_log_table_rollback(
+/*===================*/
+	dict_index_t*	index,	/*!< in/out: clustered index */
+	trx_id_t	trx_id)	/*!< in: transaction being rolled back */
+	UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Check if a transaction rollback has been initiated.
+@return true if inserts of this transaction were rolled back */
+UNIV_INTERN
+bool
+row_log_table_is_rollback(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	trx_id_t		trx_id)	/*!< in: transaction id */
+	__attribute__((nonnull));
+
+/******************************************************//**
 Get the latest transaction ID that has invoked row_log_online_op()
 during online creation.
 @return latest transaction ID, or 0 if nothing was logged */

=== modified file 'storage/innobase/row/row0log.cc'
--- a/storage/innobase/row/row0log.cc	revid:marko.makela@strippedj9ttz3qs070
+++ b/storage/innobase/row/row0log.cc	revid:marko.makela@stripped
@@ -18,7 +18,7 @@ this program; if not, write to the Free
 
 /**************************************************//**
 @file row/row0log.cc
-Modification log for online index creation
+Modification log for online index creation and online table rebuild
 
 Created 2011-05-26 Marko Makela
 *******************************************************/
@@ -29,6 +29,20 @@ Created 2011-05-26 Marko Makela
 #include "row0upd.h"
 #include "row0merge.h"
 #include "data0data.h"
+#include "que0que.h"
+
+#include<set>
+
+/** Table row modification operations during online table rebuild.
+Delete-marked records are not copied to the rebuilt table. */
+enum row_tab_op {
+	/** Insert a record */
+	ROW_T_INSERT,
+	/** Update a record in place */
+	ROW_T_UPDATE,
+	/** Delete (purge) a record */
+	ROW_T_DELETE
+};
 
 #ifdef UNIV_DEBUG
 /** Write information about the applied record to the error log */
@@ -44,7 +58,7 @@ static bool row_log_apply_print;
 #define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
 
 /** Log block for modifications during online index creation */
-struct row_log_buf_struct {
+struct row_log_buf_t {
 	byte*		block;	/*!< file block buffer */
 	mrec_buf_t	buf;	/*!< buffer for accessing a record
 				that spans two blocks */
@@ -52,20 +66,36 @@ struct row_log_buf_struct {
 	ulint		bytes;	/*!< current position within buf */
 };
 
-/** Log block for modifications during online index creation */
-typedef struct row_log_buf_struct row_log_buf_t;
+/** Set of transactions that rolled back inserts of BLOBs during
+online table rebuild */
+typedef std::set<trx_id_t> trx_id_set;
 
 /** @brief Buffer for logging modifications during online index creation
 
 All modifications to an index that is being created will be logged by
 row_log_online_op() to this buffer.
 
+All modifications to a table that is being rebuilt will be logged by
+row_log_table_delete(), row_log_table_update(), row_log_table_insert()
+to this buffer.
+
 When head.blocks == tail.blocks, the reader will access tail.block
 directly. When also head.bytes == tail.bytes, both counts will be
 reset to 0 and the file will be truncated. */
 struct row_log_t {
 	int		fd;	/*!< file descriptor */
-	mutex_t		mutex;	/*!< mutex protecting max_trx and tail */
+	mutex_t		mutex;	/*!< mutex protecting trx_log, error,
+				max_trx and tail */
+	trx_id_set*	trx_rb;	/*!< set of transactions that rolled back
+				inserts of BLOBs during online table rebuild;
+				protected by mutex */
+	dict_table_t*	table;	/*!< table that is being rebuilt,
+				or NULL when this is a secondary
+				index that is being created online */
+	bool		same_pk;/*!< whether the definition of the PRIMARY KEY
+				has remained the same */
+	dberr_t		error;	/*!< error that occurred during online
+				table rebuild */
 	trx_id_t	max_trx;/*!< biggest observed trx_id in
 				row_log_online_op();
 				protected by mutex and index->lock S-latch,
@@ -236,6 +266,84 @@ write_failed:
 }
 
 /******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+	const dict_index_t*	index)	/*!< in: clustered index of a table
+					that is being rebuilt online */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_online_ddl(index));
+	return(index->online_log->error);
+}
+
+/******************************************************//**
+Notes that a transaction is being rolled back. */
+UNIV_INTERN
+void
+row_log_table_rollback(
+/*===================*/
+	dict_index_t*	index,	/*!< in/out: clustered index */
+	trx_id_t	trx_id)	/*!< in: transaction being rolled back */
+{
+	ut_ad(dict_index_is_clust(index));
+#ifdef UNIV_DEBUG
+	ibool	corrupt	= FALSE;
+	ut_ad(trx_rw_is_active(trx_id, &corrupt));
+	ut_ad(!corrupt);
+#endif /* UNIV_DEBUG */
+
+	/* Protect transitions of index->online_status and access to
+	index->online_log. */
+	rw_lock_s_lock(&index->lock);
+
+	if (dict_index_is_online_ddl(index)) {
+		ut_ad(index->online_log);
+		ut_ad(index->online_log->table);
+		mutex_enter(&index->online_log->mutex);
+		trx_id_set*	trxs = index->online_log->trx_rb;
+
+		if (!trxs) {
+			index->online_log->trx_rb = trxs = new trx_id_set();
+		}
+
+		trxs->insert(trx_id);
+
+		mutex_exit(&index->online_log->mutex);
+	}
+
+	rw_lock_s_unlock(&index->lock);
+}
+
+/******************************************************//**
+Check if a transaction rollback has been initiated.
+@return true if inserts of this transaction were rolled back */
+UNIV_INTERN
+bool
+row_log_table_is_rollback(
+/*======================*/
+	const dict_index_t*	index,	/*!< in: clustered index */
+	trx_id_t		trx_id)	/*!< in: transaction id */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(dict_index_is_online_ddl(index));
+	ut_ad(index->online_log);
+
+	if (const trx_id_set* trxs = index->online_log->trx_rb) {
+		mutex_enter(&index->online_log->mutex);
+		bool is_rollback = trxs->find(trx_id) != trxs->end();
+		mutex_exit(&index->online_log->mutex);
+
+		return(is_rollback);
+	}
+
+	return(false);
+}
+
+/******************************************************//**
 Allocate the row log for an index and flag the index
 for online creation.
 @retval true if success, false if not */
@@ -243,13 +351,20 @@ UNIV_INTERN
 bool
 row_log_allocate(
 /*=============*/
-	dict_index_t*	index)	/*!< in/out: index */
+	dict_index_t*	index,	/*!< in/out: index */
+	dict_table_t*	table,	/*!< in/out: new table being rebuilt,
+				or NULL when creating a secondary index */
+	bool		same_pk)/*!< in: whether the definition of the
+				PRIMARY KEY has remained the same */
 {
 	byte*		buf;
 	row_log_t*	log;
 	ulint		size;
 
 	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(dict_index_is_clust(index) == !!table);
+	ut_ad(!table || index->table != table);
+	ut_ad(same_pk || table);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
@@ -264,6 +379,9 @@ row_log_allocate(
 	log->fd = row_merge_file_create_low();
 	mutex_create(index_online_log_key, &log->mutex,
 		     SYNC_INDEX_ONLINE_LOG);
+	log->table = table;
+	log->same_pk = same_pk;
+	log->error = DB_SUCCESS;
 	log->max_trx = 0;
 	log->head.block = buf;
 	log->tail.block = buf + srv_sort_buf_size;
@@ -290,6 +408,7 @@ row_log_free_low(
 {
 	MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
 
+	delete log->trx_rb;
 	row_merge_file_destroy_low(log->fd);
 	mutex_free(&log->mutex);
 	os_mem_free_large(log->head.block, log->size);
@@ -344,7 +463,7 @@ row_log_apply_op_low(
 					allocating offsets; can be emptied */
 	mem_heap_t*	heap,		/*!< in/out: memory heap for
 					allocating data tuples */
-	ibool		has_index_lock, /*!< in: TRUE if holding index->lock
+	bool		has_index_lock, /*!< in: true if holding index->lock
 					in exclusive mode */
 	enum row_op	op,		/*!< in: operation being applied */
 	trx_id_t	trx_id,		/*!< in: transaction identifier */
@@ -636,7 +755,7 @@ row_log_apply_op(
 					allocating offsets; can be emptied */
 	mem_heap_t*	heap,		/*!< in/out: memory heap for
 					allocating data tuples */
-	ibool		has_index_lock, /*!< in: TRUE if holding index->lock
+	bool		has_index_lock, /*!< in: true if holding index->lock
 					in exclusive mode */
 	const mrec_t*	mrec,		/*!< in: merge record */
 	const mrec_t*	mrec_end,	/*!< in: end of buffer */
@@ -774,7 +893,7 @@ row_log_apply_ops(
 	mem_heap_t*	offsets_heap;
 	mem_heap_t*	heap;
 	ulint*		offsets;
-	ibool		has_index_lock;
+	bool		has_index_lock;
 	const ulint	i	= 1 + REC_OFFS_HEADER_SIZE
 		+ dict_index_get_n_fields(index);
 
@@ -792,7 +911,7 @@ row_log_apply_ops(
 
 	offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
 	heap = mem_heap_create(UNIV_PAGE_SIZE);
-	has_index_lock = TRUE;
+	has_index_lock = true;
 
 next_block:
 	ut_ad(has_index_lock);
@@ -851,7 +970,7 @@ all_done:
 			* srv_sort_buf_size;
 
 		if (has_index_lock) {
-			has_index_lock = FALSE;
+			has_index_lock = false;
 			rw_lock_x_unlock(dict_index_get_lock(index));
 		}
 
@@ -1005,7 +1124,7 @@ all_done:
 			mrec = NULL;
 process_next_block:
 			rw_lock_x_lock(dict_index_get_lock(index));
-			has_index_lock = TRUE;
+			has_index_lock = true;
 
 			index->online_log->head.bytes = 0;
 			index->online_log->head.blocks++;

=== modified file 'storage/innobase/row/row0uins.cc'
--- a/storage/innobase/row/row0uins.cc	revid:marko.makela@stripped
+++ b/storage/innobase/row/row0uins.cc	revid:marko.makela@stripped
@@ -38,6 +38,7 @@ Created 2/25/1997 Heikki Tuuri
 #include "mach0data.h"
 #include "row0undo.h"
 #include "row0vers.h"
+#include "row0log.h"
 #include "trx0trx.h"
 #include "trx0rec.h"
 #include "row0row.h"
@@ -408,9 +409,18 @@ row_undo_ins(
 
 	/* Iterate over all the indexes and undo the insert.*/
 
+	node->index = dict_table_get_first_index(node->table);
+	ut_ad(dict_index_is_clust(node->index));
+
+	if (dict_index_is_online_ddl(node->index)) {
+		/* Note that we are rolling back this transaction, so
+		that all inserts and updates with this DB_TRX_ID can
+		be skipped. */
+		row_log_table_rollback(node->index, node->trx->id);
+	}
+
 	/* Skip the clustered index (the first index) */
-	node->index = dict_table_get_next_index(
-		dict_table_get_first_index(node->table));
+	node->index = dict_table_get_next_index(node->index);
 
 	dict_table_skip_corrupt_index(node->index);
 

=== modified file 'storage/innobase/row/row0umod.cc'
--- a/storage/innobase/row/row0umod.cc	revid:marko.makela@strippedqs070
+++ b/storage/innobase/row/row0umod.cc	revid:marko.makela@stripped
@@ -37,6 +37,7 @@ Created 2/27/1997 Heikki Tuuri
 #include "mach0data.h"
 #include "row0undo.h"
 #include "row0vers.h"
+#include "row0log.h"
 #include "trx0trx.h"
 #include "trx0rec.h"
 #include "row0row.h"
@@ -940,8 +941,18 @@ row_undo_mod(
 		return(DB_SUCCESS);
 	}
 
-	node->index = dict_table_get_next_index(
-		dict_table_get_first_index(node->table));
+	node->index = dict_table_get_first_index(node->table);
+	ut_ad(dict_index_is_clust(node->index));
+
+	if (dict_index_is_online_ddl(node->index)) {
+		/* Note that we are rolling back this transaction, so
+		that all inserts and updates with this DB_TRX_ID can
+		be skipped. */
+		row_log_table_rollback(node->index, node->trx->id);
+	}
+
+	/* Skip the clustered index (the first index) */
+	node->index = dict_table_get_next_index(node->index);
 
 	/* Skip all corrupted secondary index */
 	dict_table_skip_corrupt_index(node->index);

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk-wl6255 branch (marko.makela:3881 to 3886) WL#6255marko.makela29 May