List:Commits« Previous MessageNext Message »
From:marko.makela Date:February 7 2011 11:25am
Subject:bzr push into mysql-trunk-innodb branch (marko.makela:3481 to 3482) WL#5458
View as plain text  
 3482 Marko Mäkelä	2011-02-07
      WL#5458 clean-up and optimizations
      
      When releasing multiple mutexes, release the coarsest mutex first,
      hoping to help performance when a context switch occurs between the
      releases. That is, release lock_sys->mutex before trx_sys->lock before
      trx->mutex.
      
      Reduce the hold time of trx->mutex.
      
      Replace some if-else if-else checks with switch.
      
      Add UNIV_UNLIKELY hints to or assertion-like checks or tests for rare
      conditions.
      
      lock_grant(), lock_rec_cancel(): lock_reset_lock_and_trx_wait() does
      not need trx->mutex.
      
      lock_deadlock_occurs(): Remove unnecessary local variable ret. On
      LOCK_VICTIM_EXCEED_MAX_DEPTH, release trx->mutex immediately, because
      lock_deadlock_trx_print() does not need it.
      
      lock_print_info_all_transactions(): Remove a redundant goto statement.
      
      lock_rec_insert_check_and_lock(): Do not acquire trx->mutex except for
      the lock_rec_enqueue_waiting() call, because this code is invoked for
      a running transaction by its serving thread.
      
      lock_wait_table_reserve_slot(): Do not test twice for the end-of-loop
      condition. Fix some formatting.
      
      lock_wait_check_and_cancel(): Only acquire the mutexes when cancelling
      the lock request.
      
      que_thr_end_lock_wait(): Add an assertion and remove a redundant one.
      
      que_thr_stop(): Remove a local variable. Use direct return instead.
      
      row_search_for_mysql(): Replace if-else if-else with switch.
      
      trx_purge_sys_create(): Remove unnecessary acquisition of
      purge_sys->trx->mutex. The purge_sys is allocated in this function;
      therefore the mutex does not provide any protection.
      
      trx_purge_sys_close(): Assert that purge_sys->sess->trx == purge_sys->trx.
      
      trx_rollback_or_clean_recovered(): Correct a typo.
      
      trx_prepare(), trx_prepare_for_mysql(): Do not acquire or release
      trx->mutex. This code is supposed to be invoked on a running
      transaction by the serving thread. trx->state changes are protected by
      lock_sys->mutex. Add assertions.
      
      rb://546

    modified:
      storage/innobase/lock/lock0lock.c
      storage/innobase/lock/lock0wait.c
      storage/innobase/que/que0que.c
      storage/innobase/row/row0sel.c
      storage/innobase/trx/trx0purge.c
      storage/innobase/trx/trx0roll.c
      storage/innobase/trx/trx0trx.c
 3481 Vasil Dimov	2011-02-07 [merge]
      Merge mysql-5.5-innodb -> mysql-trunk-innodb

    modified:
      mysql-test/valgrind.supp
=== modified file 'storage/innobase/lock/lock0lock.c'
--- a/storage/innobase/lock/lock0lock.c	revid:vasil.dimov@stripped10207092103-usuct0u785h0dno2
+++ b/storage/innobase/lock/lock0lock.c	revid:marko.makela@strippede0adanjba07bzre4
@@ -2261,14 +2261,13 @@ lock_grant(
 {
 	ut_ad(lock_mutex_own());
 
-	trx_mutex_enter(lock->trx);
-
 	lock_reset_lock_and_trx_wait(lock);
+	trx_mutex_enter(lock->trx);
 
 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
 		dict_table_t*	table = lock->un_member.tab_lock.table;
 
-		if (table->autoinc_trx == lock->trx) {
+		if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) {
 			fprintf(stderr,
 				"InnoDB: Error: trx already had"
 				" an AUTO-INC lock!\n");
@@ -2319,8 +2318,6 @@ lock_rec_cancel(
 	ut_ad(lock_mutex_own());
 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
 
-	trx_mutex_enter(lock->trx);
-
 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
 
@@ -2330,6 +2327,8 @@ lock_rec_cancel(
 
 	/* The following function releases the trx from lock wait */
 
+	trx_mutex_enter(lock->trx);
+
 	thr = que_thr_end_lock_wait(lock->trx);
 
 	if (thr != NULL) {
@@ -3488,7 +3487,6 @@ lock_deadlock_occurs(
 	trx_t*	trx)	/*!< in/out: transaction */
 {
 	trx_t*		mark_trx;
-	ulint		ret;
 	ulint		cost	= 0;
 
 	ut_ad(trx);
@@ -3520,15 +3518,22 @@ retry:
 
 	trx_mutex_enter(trx);
 
-	ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
-
-	switch (ret) {
+	switch (lock_deadlock_recursive(trx, trx, lock, &cost, 0)) {
 	case LOCK_VICTIM_IS_OTHER:
 		/* We chose some other trx as a victim: retry if there still
 		is a deadlock */
 		goto retry;
 
 	case LOCK_VICTIM_EXCEED_MAX_DEPTH:
+		/* Release the mutex to obey the latching order.
+		This is safe, because lock_deadlock_occurs() is invoked
+		when a lock wait is enqueued for the currently running
+		transaction. Because trx is a running transaction
+		(it is not currently suspended because of a lock wait),
+		its state can only be changed by this thread, which is
+		currently associated with the transaction. */
+		trx_mutex_exit(trx);
+
 		/* If the lock search exceeds the max step
 		or the max depth, the current trx will be
 		the victim. Print its information. */
@@ -3540,32 +3545,31 @@ retry:
 			" FOLLOWING TRANSACTION \n\n"
 			"*** TRANSACTION:\n");
 
-		/* To obey the latching order */
-		trx_mutex_exit(trx);
-
 		lock_deadlock_trx_print(trx, 3000);
 
-		trx_mutex_enter(trx);
-
 		lock_deadlock_fputs(
 			"*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
 
 		lock_deadlock_lock_print(lock);
 
+		trx_mutex_enter(trx);
 		break;
 
 	case LOCK_VICTIM_IS_START:
 		lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
 		break;
 
-	default:
-		/* No deadlock detected*/
+	case LOCK_VICTIM_NONE:
+		/* No deadlock detected */
+		ut_ad(trx_mutex_own(trx));
+		ut_ad(lock_mutex_own());
 		return(FALSE);
 	}
 
 	lock_deadlock_found = TRUE;
 
 	ut_ad(trx_mutex_own(trx));
+	ut_ad(lock_mutex_own());
 
 	return(TRUE);
 }
@@ -3658,7 +3662,6 @@ lock_deadlock_recursive(
 
 			if (lock_trx == start) {
 
-				/* To obey the latching order */
 				trx_mutex_exit(start);
 
 				/* We came back to the recursion starting
@@ -4771,9 +4774,8 @@ loop:
 
 	if (trx == NULL) {
 
-		rw_lock_s_unlock(&trx_sys->lock);
-
 		lock_mutex_exit();
+		rw_lock_s_unlock(&trx_sys->lock);
 
 		ut_ad(lock_validate());
 
@@ -4858,9 +4860,8 @@ loop:
 				goto print_rec;
 			}
 
-			rw_lock_s_unlock(&trx_sys->lock);
-
 			lock_mutex_exit();
+			rw_lock_s_unlock(&trx_sys->lock);
 
 			mtr_start(&mtr);
 
@@ -4897,8 +4898,6 @@ print_rec:
 
 		nth_trx++;
 		nth_lock = 0;
-
-		goto loop;
 	}
 
 	goto loop;
@@ -5257,10 +5256,10 @@ lock_validate(void)
 		}
 	}
 
-	rw_lock_s_unlock(&trx_sys->lock);
-
 	lock_mutex_exit();
 
+	rw_lock_s_unlock(&trx_sys->lock);
+
 	return(TRUE);
 }
 #endif /* UNIV_DEBUG */
@@ -5307,8 +5306,9 @@ lock_rec_insert_check_and_lock(
 	next_rec_heap_no = page_rec_get_heap_no(next_rec);
 
 	lock_mutex_enter();
-
-	trx_mutex_enter(trx);
+	/* Because this code is invoked for a running transaction by
+	the thread that is serving the transaction, it is not necessary
+	to hold trx->mutex here. */
 
 	/* When inserting a record into an index, the table must be at
 	least IX-locked or we must be building an index, in which case
@@ -5322,8 +5322,6 @@ lock_rec_insert_check_and_lock(
 	if (UNIV_LIKELY(lock == NULL)) {
 		/* We optimize CPU time usage in the simplest case */
 
-		trx_mutex_exit(trx);
-
 		lock_mutex_exit();
 
 		if (!dict_index_is_clust(index)) {
@@ -5355,16 +5353,16 @@ lock_rec_insert_check_and_lock(
 		    block, next_rec_heap_no, trx)) {
 
 		/* Note that we may get DB_SUCCESS also here! */
+		trx_mutex_enter(trx);
 		err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
 					       | LOCK_INSERT_INTENTION,
 					       block, next_rec_heap_no,
 					       index, thr);
+		trx_mutex_exit(trx);
 	} else {
 		err = DB_SUCCESS;
 	}
 
-	trx_mutex_exit(trx);
-
 	lock_mutex_exit();
 
 	switch (err) {
@@ -6222,9 +6220,8 @@ lock_trx_handle_wait(
 		err = DB_SUCCESS;
 	}
 
-	trx_mutex_exit(trx);
-
 	lock_mutex_exit();
+	trx_mutex_exit(trx);
 
 	return(err);
 }

=== modified file 'storage/innobase/lock/lock0wait.c'
--- a/storage/innobase/lock/lock0wait.c	revid:vasil.dimov@oracle.com-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/lock/lock0wait.c	revid:marko.makela@stripped10207112317-e0adanjba07bzre4
@@ -150,59 +150,48 @@ lock_wait_table_reserve_slot(
 
 	slot = lock_sys->waiting_threads;
 
-	for (i = 0; i < OS_THREAD_MAX_N; ++i, ++slot) {
+	for (i = OS_THREAD_MAX_N; i--; ++slot) {
 		if (!slot->in_use) {
-			break;
-		}
-	}
+			slot->in_use = TRUE;
+			slot->thr = thr;
+			slot->thr->slot = slot;
+			slot->id = os_thread_get_curr_id();
+			slot->handle = os_thread_get_curr();
 
-	/* Check if we have run out of slots. */
-	if (slot == lock_sys->waiting_threads+ OS_THREAD_MAX_N) {
+			if (slot->event == NULL) {
+				slot->event = os_event_create(NULL);
+				ut_a(slot->event);
+			}
 
-		ut_print_timestamp(stderr);
+			os_event_reset(slot->event);
+			slot->suspended = TRUE;
+			slot->suspend_time = ut_time();
+			slot->wait_timeout = wait_timeout;
 
-		fprintf(stderr,
-			"  InnoDB: There appear to be %lu user"
-			" threads currently waiting\n"
-			"InnoDB: inside InnoDB, which is the"
-			" upper limit. Cannot continue operation.\n"
-			"InnoDB: We intentionally generate"
-			" a seg fault to print a stack trace\n"
-			"InnoDB: on Linux. But first we print"
-			" a list of waiting threads.\n", (ulong) i);
+			if (slot == lock_sys->last_slot) {
+				++lock_sys->last_slot;
+			}
 
-		lock_wait_table_print();
-
-		ut_error;
-	} else {
+			ut_ad(lock_sys->last_slot
+			      <= lock_sys->waiting_threads + OS_THREAD_MAX_N);
 
-		ut_a(slot->in_use == FALSE);
-
-		slot->in_use = TRUE;
-		slot->thr = thr;
-		slot->thr->slot = slot;
-		slot->id = os_thread_get_curr_id();
-		slot->handle = os_thread_get_curr();
-
-		if (slot->event == NULL) {
-			slot->event = os_event_create(NULL);
-			ut_a(slot->event);
+			return(slot);
 		}
-
-		os_event_reset(slot->event);
-		slot->suspended = TRUE;
-		slot->suspend_time = ut_time();
-		slot->wait_timeout = wait_timeout;
 	}
 
-	if (slot == lock_sys->last_slot) {
-		++lock_sys->last_slot;
-	}
+	ut_print_timestamp(stderr);
 
-	ut_ad(lock_sys->last_slot
-	      <= lock_sys->waiting_threads+ OS_THREAD_MAX_N);
+	fprintf(stderr,
+		"  InnoDB: There appear to be %lu user"
+		" threads currently waiting\n"
+		"InnoDB: inside InnoDB, which is the"
+		" upper limit. Cannot continue operation.\n"
+		"InnoDB: As a last thing, we print"
+		" a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N);
 
-	return(slot);
+	lock_wait_table_print();
+
+	ut_error;
 }
 
 /***************************************************************//**
@@ -245,7 +234,7 @@ lock_wait_suspend_thread(
 
 	if (thr->state == QUE_THR_RUNNING) {
 
-		ut_ad(thr->is_active == TRUE);
+		ut_ad(thr->is_active);
 
 		/* The lock has already been released or this transaction
 		was chosen as a deadlock victim: no need to suspend */
@@ -256,13 +245,12 @@ lock_wait_suspend_thread(
 			trx->lock.was_chosen_as_deadlock_victim = FALSE;
 		}
 
-		trx_mutex_exit(trx);
-
 		lock_wait_mutex_exit();
+		trx_mutex_exit(trx);
 		return;
 	}
 
-	ut_ad(thr->is_active == FALSE);
+	ut_ad(!thr->is_active);
 
 	slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout);
 
@@ -283,8 +271,8 @@ lock_wait_suspend_thread(
 
 	os_event_set(srv_timeout_event);
 
-	trx_mutex_exit(trx);
 	lock_wait_mutex_exit();
+	trx_mutex_exit(trx);
 
 	if (trx->declared_to_be_inside_innodb) {
 
@@ -395,7 +383,7 @@ lock_wait_release_thread_if_suspended(
 
 	/* We own both the lock mutex and the trx_t::mutex but not the
 	lock wait mutex. This is OK because other threads will see the state
-	of this mutex as being in use and no other thread can change the state
+	of this slot as being in use and no other thread can change the state
 	of the slot to free unless that thread also owns the lock mutex. */
 
 	if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) {

=== modified file 'storage/innobase/que/que0que.c'
--- a/storage/innobase/que/que0que.c	revid:vasil.dimov@oracle.com-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/que/que0que.c	revid:marko.makela@stripped0207112317-e0adanjba07bzre4
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -206,6 +206,7 @@ que_thr_end_lock_wait(
 	ibool		was_active;
 
 	ut_ad(lock_mutex_own());
+	ut_ad(trx_mutex_own(trx));
 
 	thr = trx->lock.wait_thr;
 
@@ -215,8 +216,6 @@ que_thr_end_lock_wait(
 	/* In MySQL this is the only possible state here */
 	ut_a(thr->state == QUE_THR_LOCK_WAIT);
 
-	ut_ad(thr->state == QUE_THR_LOCK_WAIT);
-
 	was_active = thr->is_active;
 
 	que_thr_move_to_run_state(thr);
@@ -704,7 +703,6 @@ que_thr_stop(
 	que_thr_t*	thr)	/*!< in: query thread */
 {
 	que_t*		graph;
-	ibool		ret	= TRUE;
 	trx_t*		trx = thr_get_trx(thr);;
 
 	graph = thr->graph;
@@ -732,10 +730,10 @@ que_thr_stop(
 	} else {
 		ut_ad(graph->state == QUE_FORK_ACTIVE);
 
-		ret = FALSE;
+		return(FALSE);
 	}
 
-	return(ret);
+	return(TRUE);
 }
 
 /**********************************************************************//**

=== modified file 'storage/innobase/row/row0sel.c'
--- a/storage/innobase/row/row0sel.c	revid:vasil.dimov@strippedm-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/row/row0sel.c	revid:marko.makela@stripped7-e0adanjba07bzre4
@@ -4366,7 +4366,8 @@ no_gap_lock:
 
 			err = lock_trx_handle_wait(trx);
 
-			if (err == DB_SUCCESS) {
+			switch (err) {
+			case DB_SUCCESS:
 				/* The lock was granted while we were
 				searching for the last committed version.
 				Do a normal locking read. */
@@ -4374,12 +4375,14 @@ no_gap_lock:
 				offsets = rec_get_offsets(
 					rec, index, offsets, ULINT_UNDEFINED,
 					&heap);
-				break;
-			} else if (err == DB_DEADLOCK) {
+				goto locks_ok;
+			case DB_DEADLOCK:
 				goto lock_wait_or_error;
- 			} else {
-				ut_a(err == DB_LOCK_WAIT);
+			case DB_LOCK_WAIT:
 				err = DB_SUCCESS;
+				break;
+			default:
+				ut_error;
 			}
 
 			if (old_vers == NULL) {
@@ -4469,6 +4472,7 @@ no_gap_lock:
 		}
 	}
 
+locks_ok:
 	/* NOTE that at this point rec can be an old version of a clustered
 	index record built for a consistent read. We cannot assume after this
 	point that rec is on a buffer pool page. Functions like

=== modified file 'storage/innobase/trx/trx0purge.c'
--- a/storage/innobase/trx/trx0purge.c	revid:vasil.dimov@stripped20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/trx/trx0purge.c	revid:marko.makela@stripped7-e0adanjba07bzre4
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -160,8 +160,6 @@ trx_purge_sys_create(
 
 	ut_a(purge_sys->trx->sess == purge_sys->sess);
 
-	trx_mutex_enter(purge_sys->trx);
-
 	/* A purge transaction is not a real transaction, we use a transaction
 	here only because the query threads code requires it. It is otherwise
 	quite unnecessary. We should get rid of it eventually. */
@@ -169,8 +167,6 @@ trx_purge_sys_create(
 	purge_sys->trx->start_time = ut_time();
 	purge_sys->trx->state = TRX_STATE_ACTIVE;
 
-	trx_mutex_exit(purge_sys->trx);
-
 	purge_sys->query = trx_purge_graph_build(
 		purge_sys->trx, n_purge_threads);
 
@@ -187,8 +183,9 @@ trx_purge_sys_close(void)
 	que_graph_free(purge_sys->query);
 
 	ut_a(purge_sys->trx->id == 0);
+	ut_a(purge_sys->sess->trx == purge_sys->trx);
 
-	purge_sys->sess->trx->state = TRX_STATE_NOT_STARTED;
+	purge_sys->trx->state = TRX_STATE_NOT_STARTED;
 
 	sess_close(purge_sys->sess);
 

=== modified file 'storage/innobase/trx/trx0roll.c'
--- a/storage/innobase/trx/trx0roll.c	revid:vasil.dimov@strippedom-20110207092103-usuct0u785h0dno2
+++ b/storage/innobase/trx/trx0roll.c	revid:marko.makela@stripped317-e0adanjba07bzre4
@@ -743,7 +743,7 @@ trx_rollback_or_clean_recovered(
 			ut_ad(trx->in_trx_list);
 
 			/* If this function does a cleanup or rollback
-			then it will release the trx sys mutex, therefore
+			then it will release the trx_sys->lock, therefore
 			we need to reacquire it before retrying the loop. */
 
 			if (trx_rollback_resurrected(trx, all)) {

=== modified file 'storage/innobase/trx/trx0trx.c'
--- a/storage/innobase/trx/trx0trx.c	revid:vasil.dimov@stripped0dno2
+++ b/storage/innobase/trx/trx0trx.c	revid:marko.makela@stripped
@@ -1445,18 +1445,19 @@ static
 void
 trx_prepare(
 /*========*/
-	trx_t*	trx)	/*!< in: transaction */
+	trx_t*	trx)	/*!< in/out: transaction */
 {
 	trx_rseg_t*	rseg;
-	ib_uint64_t	lsn		= 0;
+	ib_uint64_t	lsn;
 	mtr_t		mtr;
 
 	rseg = trx->rseg;
+	/* Only fresh user transactions can be prepared.
+	Recovered transactions cannot. */
+	ut_a(!trx->is_recovered);
 
 	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
 
-		trx_mutex_exit(trx);
-
 		mtr_start(&mtr);
 
 		/* Change the undo log segment states from TRX_UNDO_ACTIVE
@@ -1489,16 +1490,13 @@ trx_prepare(
 					world */
 		/*--------------*/
 		lsn = mtr.end_lsn;
-
-		trx_mutex_enter(trx);
+		ut_ad(lsn);
+	} else {
+		lsn = 0;
 	}
 
-	ut_ad(trx_mutex_own(trx));
-
-	/* Note: This state change is only covered by the trx_t::mutex and
-	not the trx_sys_t::lock. */
-
 	/*--------------------------------------*/
+	ut_a(trx->state == TRX_STATE_ACTIVE);
 	trx->state = TRX_STATE_PREPARED;
 	/*--------------------------------------*/
 
@@ -1520,8 +1518,6 @@ trx_prepare(
 		TODO: find out if MySQL holds some mutex when calling this.
 		That would spoil our group prepare algorithm. */
 
-		trx_mutex_exit(trx);
-
 		if (srv_flush_log_at_trx_commit == 0) {
 			/* Do nothing */
 		} else if (srv_flush_log_at_trx_commit == 1) {
@@ -1544,8 +1540,6 @@ trx_prepare(
 		} else {
 			ut_error;
 		}
-
-		trx_mutex_enter(trx);
 	}
 }
 
@@ -1559,15 +1553,11 @@ trx_prepare_for_mysql(
 {
 	trx_start_if_not_started_xa(trx);
 
-	trx_mutex_enter(trx);
-
 	trx->op_info = "preparing";
 
 	trx_prepare(trx);
 
 	trx->op_info = "";
-
-	trx_mutex_exit(trx);
 }
 
 /**********************************************************************//**

Attachment: [text/bzr-bundle] bzr/marko.makela@oracle.com-20110207112317-e0adanjba07bzre4.bundle
Thread
bzr push into mysql-trunk-innodb branch (marko.makela:3481 to 3482) WL#5458marko.makela7 Feb