List:Commits« Previous MessageNext Message »
From:marko.makela Date:January 24 2011 10:28am
Subject:bzr commit into mysql-trunk-innodb branch (marko.makela:3453)
View as plain text  
#At file:///home/marko/innobase/dev/mysql2a/5.6-innodb/ based on revid:marko.makela@strippedr26rvg36hev7

 3453 Marko Mäkelä	2011-01-24
      Instead of checking if trx->state != some_expected_value, check
      trx->state against all allowed values, to detect corruption.
      
      trx_state_eq(trx, state): Test if trx->state == state. In debug builds,
      check trx->state, trx->in_mysql_trx_list, and trx->in_trx_list for corruption.
      
      trx_assert_started(): A debug predicate. Assert if trx is not in one
      of the active states.
      
      trx_in_trx_list(): Assert trx_was_started(). This allows us to remove
      the trx->state assertions in the caller.
      
      lock_table_queue_validate(), lock_rec_queue_validate(),
      lock_rec_validate_page(): Remove unnecessary acquisition of trx->mutex.
      Use trx_state_eq(), trx_was_started(), or trx_in_trx_list() for the
      trx->state checks.
      
      trx_is_active_low(), trx_recover_for_mysql(), trx_get_trx_by_xid(),
      read_view_open_now_low(), read_cursor_view_create_for_mysql(),
      row_vers_build_for_semi_consistent_read():
      Use trx_state_eq().
      
      trx_rollback_for_mysql(), trx_rollback_last_sql_stat_for_mysql(),
      trx_rollback_to_savepoint_for_mysql(), trx_rollback_resurrected(),
      trx_commit_or_rollback_prepare(), trx_mark_sql_stat_end():
      Check trx->state against all allowed values. Committed transactions
      must not be rolled back or committed. Partial rollback of prepared
      transactions is forbidden. Committed or prepared transactions must not
      execute statements.
      
      trx_rollback_to_savepoint_low(): Refactored from
      trx_rollback_to_savepoint_for_mysql(). Roll back to a savepoint
      identified by trx_named_savept_t.
      
      trx_release_savepoint_for_mysql(): Add debug assertions.
      
      trx_sys_init_at_db_start(): Use trx_state_eq(trx, TRX_STATE_ACTIVE).
      This might correct a bug where rows_to_undo would include the rows of
      recovered committed transactions.
      
      trx_list_insert_ordered(), trx_start_low(), trx_commit():
      Add debug assertions.
      
      rb://579 approved by Jimmy Yang

    modified:
      storage/innobase/include/trx0roll.h
      storage/innobase/include/trx0sys.h
      storage/innobase/include/trx0sys.ic
      storage/innobase/include/trx0trx.h
      storage/innobase/include/trx0trx.ic
      storage/innobase/lock/lock0lock.c
      storage/innobase/read/read0read.c
      storage/innobase/row/row0vers.c
      storage/innobase/trx/trx0roll.c
      storage/innobase/trx/trx0sys.c
      storage/innobase/trx/trx0trx.c
=== modified file 'storage/innobase/include/trx0roll.h'
--- a/storage/innobase/include/trx0roll.h	revid:marko.makela@stripped
+++ b/storage/innobase/include/trx0roll.h	revid:marko.makela@oracle.com-20110124102828-1o7l37nibd18jwja
@@ -149,7 +149,7 @@ UNIV_INTERN
 int
 trx_rollback_for_mysql(
 /*===================*/
-	trx_t*	trx);	/*!< in: transaction handle */
+	trx_t*	trx);	/*!< in/out: transaction */
 /*******************************************************************//**
 Rollback the latest SQL statement for MySQL.
 @return	error code or DB_SUCCESS */
@@ -157,7 +157,7 @@ UNIV_INTERN
 int
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
-	trx_t*	trx);	/*!< in: transaction handle */
+	trx_t*	trx);	/*!< in/out: transaction */
 /*******************************************************************//**
 Rollback a transaction used in MySQL.
 @return	error code or DB_SUCCESS */

=== modified file 'storage/innobase/include/trx0sys.h'
--- a/storage/innobase/include/trx0sys.h	revid:marko.makela@stripped110124082539-jxezr26rvg36hev7
+++ b/storage/innobase/include/trx0sys.h	revid:marko.makela@stripped8-1o7l37nibd18jwja
@@ -331,7 +331,7 @@ UNIV_INTERN
 ibool
 trx_in_trx_list(
 /*============*/
-	trx_t*	in_trx);/*!< in: trx */
+	const trx_t*	in_trx);/*!< in: transaction */
 #endif /* UNIV_DEBUG */
 /*****************************************************************//**
 Updates the offset information about the end of the MySQL binlog entry

=== modified file 'storage/innobase/include/trx0sys.ic'
--- a/storage/innobase/include/trx0sys.ic	revid:marko.makela@stripped
+++ b/storage/innobase/include/trx0sys.ic	revid:marko.makela@oracle.com-20110124102828-1o7l37nibd18jwja
@@ -415,8 +415,7 @@ trx_is_active_low(
 		trx = trx_get_on_id_low(trx_id);
 
 		if (trx != NULL
-		    && (trx->state != TRX_STATE_ACTIVE
-			&& trx->state != TRX_STATE_PREPARED)) {
+		    && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
 
 			trx = NULL;
 		}

=== modified file 'storage/innobase/include/trx0trx.h'
--- a/storage/innobase/include/trx0trx.h	revid:marko.makela@strippedg36hev7
+++ b/storage/innobase/include/trx0trx.h	revid:marko.makela@stripped
@@ -152,7 +152,7 @@ UNIV_INTERN
 ulint
 trx_commit_for_mysql(
 /*=================*/
-	trx_t*	trx);	/*!< in: trx handle */
+	trx_t*	trx);	/*!< in/out: transaction */
 /**********************************************************************//**
 Does the transaction prepare for MySQL. */
 UNIV_INTERN
@@ -212,7 +212,7 @@ UNIV_INTERN
 void
 trx_commit_or_rollback_prepare(
 /*===========================*/
-	trx_t*		trx);		/*!< in: transaction */
+	trx_t*	trx);	/*!< in/out: transaction */
 /*********************************************************************//**
 Creates a commit command node struct.
 @return	own: commit node struct */
@@ -315,6 +315,31 @@ trx_set_dict_operation(
 
 #ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx_sys->lock, or it must be the thread
+that is serving a running transaction.
+@return	TRUE if trx->state == state */
+UNIV_INLINE
+ibool
+trx_state_eq(
+/*=========*/
+	const trx_t*	trx,	/*!< in: transaction */
+	trx_state_t	state)	/*!< in: state */
+	__attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+/**********************************************************************//**
+Asserts that a transaction has been started.
+The caller must hold trx_sys->lock.
+@return TRUE if started */
+UNIV_INTERN
+ibool
+trx_assert_started(
+/*===============*/
+	const trx_t*	trx)	/*!< in: transaction */
+	__attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
 Determines if the currently running transaction has been interrupted.
 @return	TRUE if interrupted */
 UNIV_INTERN

=== modified file 'storage/innobase/include/trx0trx.ic'
--- a/storage/innobase/include/trx0trx.ic	revid:marko.makela@stripped2539-jxezr26rvg36hev7
+++ b/storage/innobase/include/trx0trx.ic	revid:marko.makela@stripped7nibd18jwja
@@ -23,6 +23,36 @@ The transaction
 Created 3/26/1996 Heikki Tuuri
 *******************************************************/
 
+/**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx_sys->lock, or it must be the thread
+that is serving a running transaction.
+@return	TRUE if trx->state == state */
+UNIV_INLINE
+ibool
+trx_state_eq(
+/*=========*/
+	const trx_t*	trx,	/*!< in: transaction */
+	trx_state_t	state)	/*!< in: state */
+{
+#ifdef UNIV_DEBUG
+	switch (trx->state) {
+	case TRX_STATE_NOT_STARTED:
+		/* This state is not allowed for running transactions. */
+		ut_a(state == TRX_STATE_NOT_STARTED);
+		ut_ad(!trx->in_trx_list);
+		return(state == trx->state);
+	case TRX_STATE_ACTIVE:
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+	case TRX_STATE_PREPARED:
+		ut_ad(trx->in_trx_list);
+		return(state == trx->state);
+	}
+	ut_error;
+#endif /* UNIV_DEBUG */
+	return(trx->state == state);
+}
+
 /****************************************************************//**
 Retrieves the error_info field from a trx.
 @return	the error info */

=== modified file 'storage/innobase/lock/lock0lock.c'
--- a/storage/innobase/lock/lock0lock.c	revid:marko.makela@stripped
+++ b/storage/innobase/lock/lock0lock.c	revid:marko.makela@oracle.com-20110124102828-1o7l37nibd18jwja
@@ -4726,13 +4726,13 @@ lock_print_info_all_transactions(
 /*=============================*/
 	FILE*	file)	/*!< in: file where to print */
 {
-	lock_t*	lock;
-	ibool	load_page_first = TRUE;
-	ulint	nth_trx		= 0;
-	ulint	nth_lock	= 0;
-	ulint	i;
-	mtr_t	mtr;
-	trx_t*	trx;
+	const lock_t*	lock;
+	ibool		load_page_first = TRUE;
+	ulint		nth_trx		= 0;
+	ulint		nth_lock	= 0;
+	ulint		i;
+	mtr_t		mtr;
+	const trx_t*	trx;
 
 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
 
@@ -4747,16 +4747,14 @@ lock_print_info_all_transactions(
 	     trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
 
 		ut_ad(trx->in_mysql_trx_list);
+
 		/* trx->state cannot change from or to NOT_STARTED
 		while we are holding the trx_sys->lock. It may change
 		from ACTIVE to PREPARED, but it may not change to
 		COMMITTED, because we are holding the lock_sys->mutex. */
-		if (trx->state == TRX_STATE_NOT_STARTED) {
+		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
 			fputs("---", file);
 			trx_print_latched(file, trx, 600);
-			ut_ad(!trx->in_trx_list);
-		} else {
-			ut_ad(trx->in_trx_list);
 		}
 	}
 
@@ -4929,13 +4927,11 @@ lock_table_queue_validate(
 	     lock != NULL;
 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
 
-		trx_mutex_enter(lock->trx);
-		ut_ad(lock->trx->in_trx_list);
-
-		ut_a((lock->trx->state == TRX_STATE_ACTIVE)
-		     || (lock->trx->state == TRX_STATE_PREPARED)
-		     || (lock->trx->state
-			 == TRX_STATE_COMMITTED_IN_MEMORY));
+		/* lock->trx->state cannot change from or to NOT_STARTED
+		while we are holding the trx_sys->lock. It may change
+		from ACTIVE to PREPARED, but it may not change to
+		COMMITTED, because we are holding the lock_sys->mutex. */
+		ut_ad(trx_assert_started(lock->trx));
 
 		if (!lock_get_wait(lock)) {
 
@@ -4946,8 +4942,6 @@ lock_table_queue_validate(
 
 			ut_a(lock_table_has_to_wait_in_queue(lock));
 		}
-
-		trx_mutex_exit(lock->trx);
 	}
 
 	return(TRUE);
@@ -4991,17 +4985,6 @@ lock_rec_queue_validate(
 		     lock != NULL;
 		     lock = lock_rec_get_next_const(heap_no, lock)) {
 
-			trx_mutex_enter(lock->trx);
-
-			switch(lock->trx->state) {
-			case TRX_STATE_ACTIVE:
-			case TRX_STATE_PREPARED:
-			case TRX_STATE_COMMITTED_IN_MEMORY:
-				break;
-			default:
-				ut_error;
-			}
-
 			ut_a(trx_in_trx_list(lock->trx));
 
 			if (lock_get_wait(lock)) {
@@ -5011,8 +4994,6 @@ lock_rec_queue_validate(
 			if (index) {
 				ut_a(lock->index == index);
 			}
-
-			trx_mutex_exit(lock->trx);
 		}
 
 		if (!have_lock_trx_sys_mutex) {
@@ -5057,11 +5038,6 @@ lock_rec_queue_validate(
 	     lock != NULL;
 	     lock = lock_rec_get_next_const(heap_no, lock)) {
 
-		trx_mutex_enter(lock->trx);
-
-		ut_a(lock->trx->state == TRX_STATE_ACTIVE
-		     || lock->trx->state == TRX_STATE_PREPARED
-		     || lock->trx->state == TRX_STATE_COMMITTED_IN_MEMORY);
 		ut_a(trx_in_trx_list(lock->trx));
 
 		if (index) {
@@ -5084,8 +5060,6 @@ lock_rec_queue_validate(
 
 			ut_a(lock_rec_has_to_wait_in_queue(lock));
 		}
-
-		trx_mutex_exit(lock->trx);
 	}
 
 	if (!have_lock_trx_sys_mutex) {
@@ -5172,14 +5146,6 @@ loop:
 
 	ut_a(trx_in_trx_list(lock->trx));
 
-	trx_mutex_enter(lock->trx);
-
-	ut_a(lock->trx->state == TRX_STATE_ACTIVE
-	     || lock->trx->state == TRX_STATE_PREPARED
-	     || lock->trx->state == TRX_STATE_COMMITTED_IN_MEMORY);
-
-	trx_mutex_exit(lock->trx);
-
 # ifdef UNIV_SYNC_DEBUG
 	/* Only validate the record queues when this thread is not
 	holding a space->latch.  Deadlocks are possible due to

=== modified file 'storage/innobase/read/read0read.c'
--- a/storage/innobase/read/read0read.c	revid:marko.makela@stripped7
+++ b/storage/innobase/read/read0read.c	revid:marko.makela@stripped
@@ -259,8 +259,9 @@ UNIV_INLINE
 read_view_t*
 read_view_clone(
 /*============*/
-	read_view_t*	view,	/*!< in: view to clone */
-	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
+	const read_view_t*	view,	/*!< in: view to clone */
+	mem_heap_t*		heap)	/*!< in: memory heap
+					from which allocated */
 {
 	ulint		sz;
 	read_view_t*	clone;
@@ -374,14 +375,13 @@ read_view_open_now_low(
 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 
 		ut_ad(trx->in_trx_list);
+
 		/* trx->state cannot change from or to NOT_STARTED
 		while we are holding the trx_sys->lock. It may change
 		from ACTIVE to PREPARED or COMMITTED. */
 
 		if (trx->id != cr_trx_id
-		    && (trx->state == TRX_STATE_ACTIVE
-			|| trx->state == TRX_STATE_PREPARED)) {
-
+		    && !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
 			ut_ad(n_trx < view->n_trx_ids);
 
 			view->trx_ids[n_trx++] = trx->id;
@@ -392,6 +392,11 @@ read_view_open_now_low(
 			transaction starts, we initialize trx->no to
 			IB_ULONGLONG_MAX. */
 
+			/* trx->no is protected by trx_sys->lock, which
+			we are holding. It is assigned by trx_commit()
+			before lock_trx_release_locks() assigns
+			trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
+
 			if (view->low_limit_no > trx->no) {
 
 				view->low_limit_no = trx->no;
@@ -675,11 +680,10 @@ read_cursor_view_create_for_mysql(
 	     trx != NULL;
 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 
-		ut_ad(trx->in_trx_list);
-
-		if (trx->state == TRX_STATE_ACTIVE
-		    || trx->state == TRX_STATE_PREPARED) {
-
+		/* trx->state cannot change from or to NOT_STARTED
+		while we are holding the trx_sys->lock. It may change
+		from ACTIVE to PREPARED or COMMITTED. */
+		if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
 			ut_a(n_trx < view->n_trx_ids);
 
 			view->trx_ids[n_trx++] = trx->id;
@@ -690,6 +694,11 @@ read_cursor_view_create_for_mysql(
 			transaction starts, we initialize trx->no to
 			IB_ULONGLONG_MAX. */
 
+			/* trx->no is protected by trx_sys->lock, which
+			we are holding. It is assigned by trx_commit()
+			before lock_trx_release_locks() assigns
+			trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
+
 			if (view->low_limit_no > trx->no) {
 
 				view->low_limit_no = trx->no;

=== modified file 'storage/innobase/row/row0vers.c'
--- a/storage/innobase/row/row0vers.c	revid:marko.makela@oracle.com-20110124082539-jxezr26rvg36hev7
+++ b/storage/innobase/row/row0vers.c	revid:marko.makela@stripped124102828-1o7l37nibd18jwja
@@ -670,7 +670,7 @@ row_vers_build_for_semi_consistent_read(
 	version = rec;
 
 	for (;;) {
-		trx_t*		version_trx;
+		const trx_t*	version_trx;
 		mem_heap_t*	heap2;
 		rec_t*		prev_version;
 		trx_id_t	version_trx_id;
@@ -680,12 +680,19 @@ row_vers_build_for_semi_consistent_read(
 			rec_trx_id = version_trx_id;
 		}
 
-		version_trx = trx_get_on_id(version_trx_id);
+		rw_lock_s_lock(&trx_sys->lock);
+		version_trx = trx_get_on_id_low(version_trx_id);
+		/* version_trx->state cannot change from or to
+		NOT_STARTED while we are holding the trx_sys->lock.
+		It may change from ACTIVE to PREPARED or COMMITTED. */
+		if (version_trx
+		    && trx_state_eq(version_trx,
+				    TRX_STATE_COMMITTED_IN_MEMORY)) {
+			version_trx = NULL;
+		}
+		rw_lock_s_unlock(&trx_sys->lock);
 
-		if (!version_trx
-		    || version_trx->state == TRX_STATE_NOT_STARTED
-		    || version_trx->state
-		    == TRX_STATE_COMMITTED_IN_MEMORY) {
+		if (!version_trx) {
 
 			/* We found a version that belongs to a
 			committed transaction: return it. */

=== modified file 'storage/innobase/trx/trx0roll.c'
--- a/storage/innobase/trx/trx0roll.c	revid:marko.makela@stripped124082539-jxezr26rvg36hev7
+++ b/storage/innobase/trx/trx0roll.c	revid:marko.makela@stripped37nibd18jwja
@@ -158,32 +158,47 @@ UNIV_INTERN
 int
 trx_rollback_for_mysql(
 /*===================*/
-	trx_t*	trx)	/*!< in: transaction handle */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
-
-	if (trx->state == TRX_STATE_NOT_STARTED) {
-
+	/* We are reading trx->state without holding trx_sys->lock
+	here, because the rollback should be invoked for a running
+	active MySQL transaction (or recovered prepared transaction)
+	that is associated with the current thread. */
+
+	switch (trx->state) {
+	case TRX_STATE_NOT_STARTED:
+		ut_ad(trx->in_mysql_trx_list);
 		return(DB_SUCCESS);
-	}
-
-	srv_active_wake_master_thread();
+	case TRX_STATE_ACTIVE:
+		ut_ad(trx->in_mysql_trx_list);
+		/* fall through */
+	case TRX_STATE_PREPARED:
+		ut_ad(trx->in_trx_list);
+
+		srv_active_wake_master_thread();
+
+		trx->op_info = "rollback";
+
+		/* If we are doing the XA recovery of prepared transactions,
+		then the transaction object does not have an InnoDB session
+		object, and we set a dummy session that we use for all MySQL
+		transactions. */
 
-	trx->op_info = "rollback";
+		trx_general_rollback_for_mysql_low(trx, NULL);
 
-	/* If we are doing the XA recovery of prepared transactions,
-	then the transaction object does not have an InnoDB session
-	object, and we set a dummy session that we use for all MySQL
-	transactions. */
-
-	trx_general_rollback_for_mysql_low(trx, NULL);
+		trx->op_info = "";
 
-	trx->op_info = "";
+		ut_a(trx->error_state == DB_SUCCESS);
 
-	ut_a(trx->error_state == DB_SUCCESS);
+		srv_active_wake_master_thread();
 
-	srv_active_wake_master_thread();
+		return((int) trx->error_state);
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		break;
+	}
 
-	return((int) trx->error_state);
+	ut_error;
+	return((int) DB_CORRUPTION);
 }
 
 /*******************************************************************//**
@@ -193,25 +208,42 @@ UNIV_INTERN
 int
 trx_rollback_last_sql_stat_for_mysql(
 /*=================================*/
-	trx_t*	trx)	/*!< in: transaction handle */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	int	err;
 
-	if (trx->state == TRX_STATE_NOT_STARTED) {
+	/* We are reading trx->state without holding trx_sys->lock
+	here, because the statement rollback should be invoked for a
+	running active MySQL transaction that is associated with the
+	current thread. */
+	ut_ad(trx->in_mysql_trx_list);
 
+	switch (trx->state) {
+	case TRX_STATE_NOT_STARTED:
 		return(DB_SUCCESS);
-	}
+	case TRX_STATE_ACTIVE:
+		ut_ad(trx->in_trx_list);
 
-	trx->op_info = "rollback of SQL statement";
+		trx->op_info = "rollback of SQL statement";
 
-	err = trx_general_rollback_for_mysql(trx, &trx->last_sql_stat_start);
+		err = trx_general_rollback_for_mysql(
+			trx, &trx->last_sql_stat_start);
 
-	/* The following call should not be needed, but we play safe: */
-	trx_mark_sql_stat_end(trx);
+		/* The following call should not be needed,
+		but we play it safe: */
+		trx_mark_sql_stat_end(trx);
 
-	trx->op_info = "";
+		trx->op_info = "";
 
-	return(err);
+		return(err);
+	case TRX_STATE_PREPARED:
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		/* The statement rollback is only allowed on an ACTIVE
+		transaction, not a PREPARED or COMMITTED one. */
+		break;
+	}
+
+	ut_error;
 }
 
 /*******************************************************************//**
@@ -284,6 +316,56 @@ were set after this savepoint are delete
 otherwise DB_SUCCESS */
 UNIV_INTERN
 ulint
+trx_rollback_to_savepoint_low(
+/*==========================*/
+	trx_t*			trx,	/*!< in/out: transaction */
+	trx_named_savept_t*	savep,	/*!< in/out: savepoint */
+	ib_int64_t*		mysql_binlog_cache_pos)
+					/*!< out: the MySQL binlog
+					cache position corresponding
+					to this savepoint; MySQL needs
+					this information to remove the
+					binlog entries of the queries
+					executed after the savepoint */
+{
+	ulint	err;
+
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(trx->in_mysql_trx_list);
+
+	/* Free all savepoints strictly later than savep. */
+
+	trx_roll_savepoints_free(
+		trx, UT_LIST_GET_NEXT(trx_savepoints, savep));
+
+	*mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
+
+	trx->op_info = "rollback to a savepoint";
+
+	err = trx_general_rollback_for_mysql(trx, &savep->savept);
+
+	/* Store the current undo_no of the transaction so that
+	we know where to roll back if we have to roll back the
+	next SQL statement: */
+
+	trx_mark_sql_stat_end(trx);
+
+	trx->op_info = "";
+
+	return(err);
+}
+
+/*******************************************************************//**
+Rolls back a transaction back to a named savepoint. Modifications after the
+savepoint are undone but InnoDB does NOT release the corresponding locks
+which are stored in memory. If a lock is 'implicit', that is, a new inserted
+row holds a lock where the lock information is carried by the trx id stored in
+the row, these locks are naturally released in the rollback. Savepoints which
+were set after this savepoint are deleted.
+@return if no savepoint of the name found then DB_NO_SAVEPOINT,
+otherwise DB_SUCCESS */
+UNIV_INTERN
+ulint
 trx_rollback_to_savepoint_for_mysql(
 /*================================*/
 	trx_t*		trx,			/*!< in: transaction handle */
@@ -295,43 +377,39 @@ trx_rollback_to_savepoint_for_mysql(
 						binlog entries of the queries
 						executed after the savepoint */
 {
-	ulint			err;
 	trx_named_savept_t*	savep;
 
+	/* We are reading trx->state without holding trx_sys->lock
+	here, because the savepoint rollback should be invoked for a
+	running active MySQL transaction that is associated with the
+	current thread. */
+	ut_ad(trx->in_mysql_trx_list);
+
 	savep = trx_savepoint_find(trx, savepoint_name);
 
 	if (savep == NULL) {
-		err = DB_NO_SAVEPOINT;
-	} else if (trx->state == TRX_STATE_NOT_STARTED) {
+		return(DB_NO_SAVEPOINT);
+	}
+
+	switch (trx->state) {
+	case TRX_STATE_NOT_STARTED:
 		ut_print_timestamp(stderr);
 		fputs("  InnoDB: Error: transaction has a savepoint ", stderr);
 		ut_print_name(stderr, trx, FALSE, savep->name);
 		fputs(" though it is not started\n", stderr);
-		err = DB_ERROR;
-	} else {
-
-		/* We can now free all savepoints strictly later than
-		savepoint_name. */
-
-		trx_roll_savepoints_free(
-			trx, UT_LIST_GET_NEXT(trx_savepoints, savep));
-
-		*mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
-
-		trx->op_info = "rollback to a savepoint";
-
-		err = trx_general_rollback_for_mysql(trx, &savep->savept);
-
-		/* Store the current undo_no of the transaction so that
-		we know where to roll back if we have to roll back the
-		next SQL statement: */
-
-		trx_mark_sql_stat_end(trx);
-
-		trx->op_info = "";
+		return(DB_ERROR);
+	case TRX_STATE_ACTIVE:
+		return(trx_rollback_to_savepoint_low(trx, savep,
+						     mysql_binlog_cache_pos));
+	case TRX_STATE_PREPARED:
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		/* The savepoint rollback is only allowed on an ACTIVE
+		transaction, not a PREPARED or COMMITTED one. */
+		break;
 	}
 
-	return(err);
+	ut_error;
+	return(DB_CORRUPTION);
 }
 
 /*******************************************************************//**
@@ -395,6 +473,9 @@ trx_release_savepoint_for_mysql(
 {
 	trx_named_savept_t*	savep;
 
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(trx->in_mysql_trx_list);
+
 	savep = trx_savepoint_find(trx, savepoint_name);
 
 	if (savep != NULL) {
@@ -575,56 +656,48 @@ trx_rollback_resurrected(
 	ibool	all)	/*!< in: FALSE=roll back dictionary transactions;
 			TRUE=roll back all non-PREPARED transactions */
 {
-	ibool	cleaned_or_rolledback;
-
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&trx_sys->lock, RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
 
-	/* The trx_t::is_recovered flag and the trx_t::state
-	are set atomically under the protection of the trx_t::mutex in
-	lock_trx_release_locks(). We don't want to accidentally cleanup
-	a non-recovered transaction here. */
+	/* The trx->is_recovered flag and trx->state are set
+	atomically under the protection of the trx->mutex (and
+	lock_sys->mutex) in lock_trx_release_locks(). We do not want
+	to accidentally clean up a non-recovered transaction here. */
 
 	trx_mutex_enter(trx);
 
 	if (!trx->is_recovered) {
-
-		trx_mutex_exit(trx);
-
-		cleaned_or_rolledback = FALSE;
-	} else if (trx->state == TRX_STATE_COMMITTED_IN_MEMORY) {
-
 		trx_mutex_exit(trx);
+		return(FALSE);
+	}
 
+	switch (trx->state) {
+	case TRX_STATE_COMMITTED_IN_MEMORY:
 		rw_lock_s_unlock(&trx_sys->lock);
-
+		trx_mutex_exit(trx);
 		fprintf(stderr,
 			"InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
 			trx->id);
-
 		trx_cleanup_at_db_startup(trx);
-
-		cleaned_or_rolledback  = TRUE;
-	} else if (trx->state == TRX_STATE_ACTIVE
-		   && (all
-		       || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE)) {
-
+		return(TRUE);
+	case TRX_STATE_ACTIVE:
 		trx_mutex_exit(trx);
-
-		rw_lock_s_unlock(&trx_sys->lock);
-
-		trx_rollback_active(trx);
-
-		cleaned_or_rolledback  = TRUE;
-	} else {
-
+		if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
+			rw_lock_s_unlock(&trx_sys->lock);
+			trx_rollback_active(trx);
+			return(TRUE);
+		}
+		return(FALSE);
+	case TRX_STATE_PREPARED:
 		trx_mutex_exit(trx);
-
-		cleaned_or_rolledback  = FALSE;
+		return(FALSE);
+	case TRX_STATE_NOT_STARTED:
+		break;
 	}
 
-	return(cleaned_or_rolledback);
+	ut_error;
+	return(FALSE);
 }
 
 /*******************************************************************//**

=== modified file 'storage/innobase/trx/trx0sys.c'
--- a/storage/innobase/trx/trx0sys.c	revid:marko.makela@strippedrvg36hev7
+++ b/storage/innobase/trx/trx0sys.c	revid:marko.makela@stripped
@@ -637,14 +637,16 @@ UNIV_INTERN
 ibool
 trx_in_trx_list(
 /*============*/
-	trx_t*	in_trx)	/*!< in: trx */
+	const trx_t*	in_trx)	/*!< in: transaction */
 {
-	trx_t*	trx;
+	const trx_t*	trx;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&trx_sys->lock, RW_LOCK_SHARED));
 #endif /* UNIV_SYNC_DEBUG */
 
+	ut_ad(trx_assert_started(in_trx));
+
 	for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
 	     trx != NULL && trx != in_trx;
 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
@@ -984,23 +986,19 @@ trx_sys_init_at_db_start(void)
 
 	trx_lists_init_at_db_start();
 
-	if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
-		trx_t*	trx;
+	rw_lock_s_lock(&trx_sys->lock);
 
-		trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+	if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+		const trx_t*	trx;
 
-		for (;;) {
-			ut_ad(trx->in_trx_list);
+		for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
+		     trx != NULL;
+		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+			ut_ad(trx->is_recovered);
 
-			if (trx->state != TRX_STATE_PREPARED) {
+			if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
 				rows_to_undo += trx->undo_no;
 			}
-
-			trx = UT_LIST_GET_NEXT(trx_list, trx);
-
-			if (!trx) {
-				break;
-			}
 		}
 
 		if (rows_to_undo > 1000000000) {
@@ -1019,6 +1017,8 @@ trx_sys_init_at_db_start(void)
 			(ullint) trx_sys->max_trx_id);
 	}
 
+	rw_lock_s_unlock(&trx_sys->lock);
+
 	UT_LIST_INIT(trx_sys->view_list);
 
 	mtr_commit(&mtr);

=== modified file 'storage/innobase/trx/trx0trx.c'
--- a/storage/innobase/trx/trx0trx.c	revid:marko.makela@oracle.com-20110124082539-jxezr26rvg36hev7
+++ b/storage/innobase/trx/trx0trx.c	revid:marko.makela@stripped2828-1o7l37nibd18jwja
@@ -335,6 +335,7 @@ trx_list_insert_ordered(
 	ut_a(srv_is_being_started);
 	ut_ad(!trx->in_trx_list);
 	ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+	ut_ad(trx->is_recovered);
 
 	for (trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
 	     trx2 != NULL;
@@ -638,7 +639,8 @@ trx_start_low(
 
 	ut_ad(trx->rseg == NULL);
 
-	ut_ad(trx->state != TRX_STATE_ACTIVE);
+	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+	ut_ad(!trx->is_recovered);
 	ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
 
 	/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
@@ -687,6 +689,9 @@ trx_commit(
 	ib_uint64_t		lsn = 0;
 	page_t*			update_hdr_page;
 
+	ut_ad(trx->in_trx_list);
+	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+
 	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
 		trx_rseg_t*	rseg;
 
@@ -945,26 +950,38 @@ UNIV_INTERN
 void
 trx_commit_or_rollback_prepare(
 /*===========================*/
-	trx_t*		trx)		/*!< in: transaction */
+	trx_t*	trx)		/*!< in/out: transaction */
 {
-	if (trx->state == TRX_STATE_NOT_STARTED) {
+	/* We are reading trx->state without holding trx_sys->lock
+	here, because the commit or rollback should be invoked for a
+	running (or recovered prepared) transaction that is associated
+	with the current thread. */
 
+	switch (trx->state) {
+	case TRX_STATE_NOT_STARTED:
 		trx_start_low(trx);
-	}
+		/* fall through */
+	case TRX_STATE_ACTIVE:
+	case TRX_STATE_PREPARED:
+		/* If the trx is in a lock wait state, moves the waiting
+		query thread to the suspended state */
 
-	/* If the trx is in a lock wait state, moves the waiting
-	query thread to the suspended state */
+		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
 
-	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+			ut_a(trx->lock.wait_thr != NULL);
+			trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
+			trx->lock.wait_thr = NULL;
 
-		ut_a(trx->lock.wait_thr != NULL);
-		trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
-		trx->lock.wait_thr = NULL;
+			trx->lock.que_state = TRX_QUE_RUNNING;
+		}
 
-		trx->lock.que_state = TRX_QUE_RUNNING;
+		ut_a(trx->lock.n_active_thrs == 1);
+		return;
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		break;
 	}
 
-	ut_a(trx->lock.n_active_thrs == 1);
+	ut_error;
 }
 
 /*********************************************************************//**
@@ -1043,7 +1060,7 @@ UNIV_INTERN
 ulint
 trx_commit_for_mysql(
 /*=================*/
-	trx_t*	trx)	/*!< in: trx handle */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	/* Because we do not do the commit by sending an Innobase
 	sig to the transaction, we must here make sure that trx has been
@@ -1138,11 +1155,19 @@ trx_mark_sql_stat_end(
 {
 	ut_a(trx);
 
-	if (trx->state == TRX_STATE_NOT_STARTED) {
+	switch (trx->state) {
+	case TRX_STATE_PREPARED:
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		break;
+	case TRX_STATE_NOT_STARTED:
 		trx->undo_no = 0;
+		/* fall through */
+	case TRX_STATE_ACTIVE:
+		trx->last_sql_stat_start.least_undo_no = trx->undo_no;
+		return;
 	}
 
-	trx->last_sql_stat_start.least_undo_no = trx->undo_no;
+	ut_error;
 }
 
 /**********************************************************************//**
@@ -1333,6 +1358,41 @@ trx_print(
 	rw_lock_s_unlock(&trx_sys->lock);
 }
 
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Asserts that a transaction has been started.
+The caller must hold trx_sys->lock.
+@return TRUE if started */
+UNIV_INTERN
+ibool
+trx_assert_started(
+/*===============*/
+	const trx_t*	trx)	/*!< in: transaction */
+{
+# ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&trx_sys->lock, RW_LOCK_SHARED));
+# endif /* UNIV_SYNC_DEBUG */
+
+	/* trx->state cannot change from or to NOT_STARTED while we
+	are holding trx_sys->lock. It may change from ACTIVE to
+	PREPARED. Unless we are holding lock_sys->mutex, it may also
+	change to COMMITTED. */
+
+	switch (trx->state) {
+	case TRX_STATE_ACTIVE:
+	case TRX_STATE_PREPARED:
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		ut_ad(trx->in_trx_list);
+		return(TRUE);
+	case TRX_STATE_NOT_STARTED:
+		break;
+	}
+
+	ut_error;
+	return(FALSE);
+}
+#endif /* UNIV_DEBUG */
+
 /*******************************************************************//**
 Compares the "weight" (or size) of two transactions. Transactions that
 have edited non-transactional tables are considered heavier than ones
@@ -1534,9 +1594,11 @@ trx_recover_for_mysql(
 	     trx != NULL;
 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
 
-		ut_ad(trx->in_trx_list);
-
-		if (trx->state == TRX_STATE_PREPARED) {
+		/* trx->state cannot change from or to NOT_STARTED
+		while we are holding the trx_sys->lock. It may change
+		to PREPARED, but not if trx->is_recovered. It may also
+		change to COMMITTED. */
+		if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
 			xid_list[count] = trx->xid;
 
 			if (count == 0) {
@@ -1618,7 +1680,7 @@ trx_get_trx_by_xid(
 		}
 	}
 
-	if (trx != NULL && trx->state != TRX_STATE_PREPARED) {
+	if (trx != NULL && !trx_state_eq(trx, TRX_STATE_PREPARED)) {
 
 		trx = NULL;
 	}

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20110124102828-1o7l37nibd18jwja.bundle
Thread
bzr commit into mysql-trunk-innodb branch (marko.makela:3453) marko.makela24 Jan