List:Commits« Previous MessageNext Message »
From:Sunny Bains Date:March 4 2011 1:49am
Subject:bzr push into mysql-trunk-innodb branch (Sunny.Bains:3528 to 3529)
Bug#11833442
View as plain text  
 3529 Sunny Bains	2011-03-04
      Bug# 11833442 - GET RID OF THE NEED TO ACQUIRE TRX_SYS_T::LOCK IN S MODE IN LOCK_DEADLOCK_OCCURS
      
      There is an unnecessary contention point in the deadlock checking where
      we acquire the trx_sys_t::lock in S mode to reset the trx_t specific node
      visited flag for deadlock checking. This should reduce the pressure on
      trx_start and trx_commit a lot.
      
      rb://610 Approved by Marko.

    modified:
      storage/innobase/include/trx0trx.h
      storage/innobase/lock/lock0lock.c
 3528 Sunny Bains	2011-03-03
      Bug# 11765850 - 58854: HIGH CONCURRENCY SYSTEM TEST SHOWS SIGNIFICANT PERFORMANCE DROP
      
      The bug is that the InnoDB pre-fetch cache was not being used in
      row_search_for_mysql().  Secondly the changeset that planted the
      bug also introduced some inefficient code. It would read an extra
      row, convert it to MySQL row format (for ICP==off), copy the row
      to the pre-fetch cache row buffer, then check for cache overflow
      and dequeue the row that was pushed if there was a possibility of
      a cache overflow.
      
      No rb entry, approved via email by Marko.

    modified:
      storage/innobase/row/row0sel.c
=== modified file 'storage/innobase/include/trx0trx.h'
--- a/storage/innobase/include/trx0trx.h	revid:sunny.bains@stripped
+++ b/storage/innobase/include/trx0trx.h	revid:sunny.bains@stripped
@@ -448,9 +448,9 @@ struct trx_lock_struct {
 					hold lock_sys->mutex, except when
 					they are holding trx->mutex and
 					wait_lock==NULL */
-	ulint		deadlock_mark;	/*!< a mark field used in deadlock
-					checking algorithm. This is only
-					covered by the lock_sys->mutex. */
+	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
+					to and checked against lock_mark_counter
+				       	by lock_deadlock_recursive(). */
 	ibool		was_chosen_as_deadlock_victim;
 					/*!< when the transaction decides to
 				       	wait for a lock, it sets this to FALSE;

=== modified file 'storage/innobase/lock/lock0lock.c'
--- a/storage/innobase/lock/lock0lock.c	revid:sunny.bains@stripped
+++ b/storage/innobase/lock/lock0lock.c	revid:sunny.bains@stripped
@@ -424,9 +424,12 @@ lock_deadlock_recursive(
 	ulint*	cost,		/*!< in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
 				we return LOCK_VICTIM_EXCEED_MAX_DEPTH */
-	ulint	depth);		/*!< in: recursion depth: if this exceeds
+	ulint	depth,		/*!< in: recursion depth: if this exceeds
 				LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
 				return LOCK_VICTIM_EXCEED_MAX_DEPTH */
+	const ib_uint64_t
+		mark_start);	/*!< in: Value of lock_mark_count at the start
+				of the deadlock check. */
 
 /*********************************************************************//**
 Gets the nth bit of a record lock.
@@ -3474,6 +3477,9 @@ lock_deadlock_lock_print(
 	}
 }
 
+/** Used in deadlock tracking. Protected by lock_sys->mutex. */
+static ib_uint64_t	lock_mark_counter = 0;
+
 /********************************************************************//**
 Checks if a lock request results in a deadlock.
 @return TRUE if a deadlock was detected and we chose trx as a victim;
@@ -3486,7 +3492,6 @@ lock_deadlock_occurs(
 	lock_t*	lock,	/*!< in: lock the transaction is requesting */
 	trx_t*	trx)	/*!< in/out: transaction */
 {
-	trx_t*		mark_trx;
 	ulint		cost	= 0;
 
 	ut_ad(trx);
@@ -3500,25 +3505,9 @@ retry:
 	does not produce a cycle. First mark all active transactions
 	with 0: */
 
-	/* To obey the latching order. Since we have the lock mutex, this
-	transaction's state can't be changed. */
-	trx_mutex_exit(trx);
-
-	rw_lock_s_lock(&trx_sys->lock);
-
-	for (mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
-	     mark_trx != NULL;
-	     mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx)) {
-
-		ut_ad(mark_trx->in_trx_list);
-		mark_trx->lock.deadlock_mark = 0;
-	}
+	switch (lock_deadlock_recursive(
+		trx, trx, lock, &cost, 0, lock_mark_counter)) {
 
-	rw_lock_s_unlock(&trx_sys->lock);
-
-	trx_mutex_enter(trx);
-
-	switch (lock_deadlock_recursive(trx, trx, lock, &cost, 0)) {
 	case LOCK_VICTIM_IS_OTHER:
 		/* We chose some other trx as a victim: retry if there still
 		is a deadlock */
@@ -3592,9 +3581,12 @@ lock_deadlock_recursive(
 	ulint*	cost,		/*!< in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
 				we return LOCK_VICTIM_EXCEED_MAX_DEPTH */
-	ulint	depth)		/*!< in: recursion depth: if this exceeds
+	ulint	depth,		/*!< in: recursion depth: if this exceeds
 				LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
 				return LOCK_VICTIM_EXCEED_MAX_DEPTH */
+	const ib_uint64_t
+		mark_start)	/*!< in: Value of lock_mark_count at the start
+				of the deadlock check. */
 {
 	lock_victim_t	ret;
 	lock_t*		lock;
@@ -3605,8 +3597,9 @@ lock_deadlock_recursive(
 	ut_a(wait_lock);
 	ut_ad(lock_mutex_own());
 	ut_ad(trx->in_trx_list);
+	ut_ad(mark_start <= lock_mark_counter);
 
-	if (trx->lock.deadlock_mark) {
+	if (trx->lock.deadlock_mark > mark_start) {
 		/* We have already exhaustively searched the subtree starting
 		from this trx */
 
@@ -3645,7 +3638,14 @@ lock_deadlock_recursive(
 
 		if (lock == NULL || lock == wait_lock) {
 			/* We can mark this subtree as searched */
-			trx->lock.deadlock_mark = 1;
+			ut_a(trx->lock.deadlock_mark <= mark_start);
+			trx->lock.deadlock_mark = ++lock_mark_counter;
+
+			/* We are not prepared for an overflow. This 64-bit
+			counter should never wrap around. At 10^9 increments
+			per second, it would take 10^3 years of uptime. */
+
+			ut_a(lock_mark_counter > 0);
 
 			return(LOCK_VICTIM_NONE);
 		}
@@ -3765,7 +3765,7 @@ lock_deadlock_recursive(
 				ret = lock_deadlock_recursive(
 					start, lock_trx,
 					lock_trx->lock.wait_lock,
-					cost, depth + 1);
+					cost, depth + 1, mark_start);
 
 				if (ret != LOCK_VICTIM_NONE) {
 


Attachment: [text/bzr-bundle] bzr/sunny.bains@oracle.com-20110304014748-zlwaz2em9syw5iw6.bundle
Thread
bzr push into mysql-trunk-innodb branch (Sunny.Bains:3528 to 3529)Bug#11833442Sunny Bains4 Mar