List:Commits« Previous MessageNext Message »
From:Sunny Bains Date:April 12 2011 1:35am
Subject:bzr push into mysql-trunk-innodb branch (Sunny.Bains:3573 to 3574)
View as plain text  
 3574 Sunny Bains	2011-04-12
      Bug 12348462 - ALLOW MULTIPLE SYNC WAIT ARRAYS
      
      Once threads start to queue on any InnoDB mutex/rw_lock the single OS mutex
      in the single sync wait array instance becomes a big bottleneck.
      
      New config variable:
         --innodb-sync-array-size : default is 32 max is 1024.
      
      We loop over all the arrays only when we need to print out diagnostic
      information or when we need to handle lost wakeup.
      
      This is an interim solution until we get rid of the sync0arr.c code completely.
      
      rb://635 Approved by Marko

    modified:
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/include/srv0srv.h
      storage/innobase/include/sync0arr.h
      storage/innobase/include/sync0arr.ic
      storage/innobase/include/sync0rw.ic
      storage/innobase/include/sync0sync.h
      storage/innobase/sync/sync0arr.c
      storage/innobase/sync/sync0rw.c
      storage/innobase/sync/sync0sync.c
 3573 Inaam Rana	2011-04-11
      PFS keys for trx_sys::lock and hash_table::lock should be defined
      under UNIV_PFS_RWLOCK instead of UNIV_PFS_MUTEX.
      
      Approved by: Jimmy

    modified:
      storage/innobase/ha/hash0hash.c
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/include/sync0rw.h
      storage/innobase/include/sync0sync.h
      storage/innobase/trx/trx0sys.c
=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	revid:inaam.rana@stripped
+++ b/storage/innobase/handler/ha_innodb.cc	revid:sunny.bains@stripped
@@ -12091,6 +12091,14 @@ static MYSQL_SYSVAR_ULONG(purge_threads,
   0,			/* Minimum value */
   32, 0);		/* Maximum value */
 
+static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
+  PLUGIN_VAR_OPCMDARG,
+  "Size of the mutex/lock wait array. Default is 32.",
+  NULL, NULL,
+  32,			/* Default setting */
+  1,			/* Minimum value */
+  1024, 0);		/* Maximum value */
+
 static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
   PLUGIN_VAR_OPCMDARG,
   "Speeds up the shutdown process of the InnoDB storage engine. Possible "
@@ -12507,6 +12515,7 @@ static struct st_mysql_sys_var* innobase
 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
   MYSQL_SYSVAR(print_all_deadlocks),
   MYSQL_SYSVAR(rollback_segments),
+  MYSQL_SYSVAR(sync_array_size),
   NULL
 };
 

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	revid:inaam.rana@stripped
+++ b/storage/innobase/include/srv0srv.h	revid:sunny.bains@stripped
@@ -304,6 +304,9 @@ extern ulong srv_purge_batch_size;
 /* the number of rollback segments to use */
 extern ulong srv_rollback_segments;
 
+/* the number of sync wait arrays */
+extern ulong srv_sync_array_size;
+
 /* variable that counts amount of data read in total (in bytes) */
 extern ulint srv_data_read;
 

=== modified file 'storage/innobase/include/sync0arr.h'
--- a/storage/innobase/include/sync0arr.h	revid:inaam.rana@stripped
+++ b/storage/innobase/include/sync0arr.h	revid:sunny.bains@stripped
@@ -36,32 +36,6 @@ typedef struct sync_cell_struct		sync_ce
 /** Synchronization wait array */
 typedef struct sync_array_struct	sync_array_t;
 
-/** Parameters for sync_array_create() @{ */
-#define SYNC_ARRAY_OS_MUTEX	1	/*!< protected by os_mutex_t */
-#define SYNC_ARRAY_MUTEX	2	/*!< protected by mutex_t */
-/* @} */
-
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return	own: created wait array */
-UNIV_INTERN
-sync_array_t*
-sync_array_create(
-/*==============*/
-	ulint	n_cells,	/*!< in: number of cells in the array
-				to create */
-	ulint	protection);	/*!< in: either SYNC_ARRAY_OS_MUTEX or
-				SYNC_ARRAY_MUTEX: determines the type
-				of mutex protecting the data structure */
-/******************************************************************//**
-Frees the resources in a wait array. */
-UNIV_INTERN
-void
-sync_array_free(
-/*============*/
-	sync_array_t*	arr);	/*!< in, own: sync wait array */
 /******************************************************************//**
 Reserves a wait array cell for waiting for an object.
 The event of the cell is reset to nonsignalled state. */
@@ -99,9 +73,9 @@ sync_array_free_cell(
 Note that one of the wait objects was signalled. */
 UNIV_INTERN
 void
-sync_array_object_signalled(
-/*========================*/
-	sync_array_t*	arr);	/*!< in: wait array */
+sync_array_object_signalled(void);
+/*=============================*/
+
 /**********************************************************************//**
 If the wakeup algorithm does not work perfectly at semaphore relases,
 this function will do the waking (see the comment in mutex_exit). This
@@ -132,11 +106,30 @@ sync_array_validate(
 Prints info of the wait array. */
 UNIV_INTERN
 void
-sync_array_print_info(
+sync_array_print(
+/*=============*/
+	FILE*		file);	/*!< in: file where to print */
+
+/**********************************************************************//**
+Create the primary system wait array(s), they are protected by an OS mutex */
+UNIV_INTERN
+void
+sync_array_init(
+/*============*/
+	ulint		n_threads);	/*!< in: Number of slots to create */
+/**********************************************************************//**
+Close sync array wait sub-system. */
+UNIV_INTERN
+void
+sync_array_close(void);
 /*==================*/
-	FILE*		file,	/*!< in: file where to print */
-	sync_array_t*	arr);	/*!< in: wait array */
 
+/**********************************************************************//**
+Get an instance of the sync wait array. */
+UNIV_INTERN
+sync_array_t*
+sync_array_get(void);
+/*================*/
 
 #ifndef UNIV_NONINL
 #include "sync0arr.ic"

=== modified file 'storage/innobase/include/sync0arr.ic'
--- a/storage/innobase/include/sync0arr.ic	revid:inaam.rana@stripped
+++ b/storage/innobase/include/sync0arr.ic	revid:sunny.bains@stripped
@@ -24,4 +24,3 @@ Inline code
 
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
-

=== modified file 'storage/innobase/include/sync0rw.ic'
--- a/storage/innobase/include/sync0rw.ic	revid:inaam.rana@stripped
+++ b/storage/innobase/include/sync0rw.ic	revid:sunny.bains@stripped
@@ -504,7 +504,7 @@ rw_lock_s_unlock_func(
                 anyway. We do not wake other waiters, because they can't
                 exist without wait_ex waiter and wait_ex waiter goes first.*/
 		os_event_set(lock->wait_ex_event);
-		sync_array_object_signalled(sync_primary_wait_array);
+		sync_array_object_signalled();
 
 	}
 
@@ -578,7 +578,7 @@ rw_lock_x_unlock_func(
 		if (lock->waiters) {
 			rw_lock_reset_waiter_flag(lock);
 			os_event_set(lock->event);
-			sync_array_object_signalled(sync_primary_wait_array);
+			sync_array_object_signalled();
 		}
 	}
 

=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h	revid:inaam.rana@stripped
+++ b/storage/innobase/include/sync0sync.h	revid:sunny.bains@stripped
@@ -774,11 +774,6 @@ struct mutex_struct {
 #endif
 };
 
-/** The global array of wait cells for implementation of the databases own
-mutexes and read-write locks. */
-extern sync_array_t*	sync_primary_wait_array;/* Appears here for
-						debugging purposes only! */
-
 /** Constant determining how long spin wait is continued before suspending
 the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
 to 20 microseconds. */

=== modified file 'storage/innobase/sync/sync0arr.c'
--- a/storage/innobase/sync/sync0arr.c	revid:inaam.rana@stripped
+++ b/storage/innobase/sync/sync0arr.c	revid:sunny.bains@stripped
@@ -122,8 +122,6 @@ struct sync_array_struct {
 	ulint		n_cells;	/*!< number of cells in the
 					wait array */
 	sync_cell_t*	array;		/*!< pointer to wait array */
-	ulint		protection;	/*!< this flag tells which
-					mutex protects the data */
 	mutex_t		mutex;		/*!< possible database mutex
 					protecting this data structure */
 	os_mutex_t	os_mutex;	/*!< Possible operating system mutex
@@ -133,12 +131,23 @@ struct sync_array_struct {
 					to prevent infinite recursion
 					in implementation, we fall back to
 					an OS mutex. */
-	ulint		sg_count;	/*!< count of how many times an
-					object has been signalled */
 	ulint		res_count;	/*!< count of cell reservations
 					since creation of the array */
 };
 
+/** User configured sync array size */
+UNIV_INTERN ulong	srv_sync_array_size = 32;
+
+/** Locally stored copy of srv_sync_array_size */
+static	ulint		sync_array_size;
+
+/** The global array of wait cells for implementation of the database's own
+mutexes and read-write locks */
+static	sync_array_t**	sync_wait_array;
+
+/** count of how many times an object has been signalled */
+static ulint		sg_count;
+
 #ifdef UNIV_PFS_MUTEX
 /* Key to register the mutex with performance schema */
 UNIV_INTERN mysql_pfs_key_t	syn_arr_mutex_key;
@@ -184,17 +193,7 @@ sync_array_enter(
 /*=============*/
 	sync_array_t*	arr)	/*!< in: sync wait array */
 {
-	ulint	protection;
-
-	protection = arr->protection;
-
-	if (protection == SYNC_ARRAY_OS_MUTEX) {
-		os_mutex_enter(arr->os_mutex);
-	} else if (protection == SYNC_ARRAY_MUTEX) {
-		mutex_enter(&(arr->mutex));
-	} else {
-		ut_error;
-	}
+	os_mutex_enter(arr->os_mutex);
 }
 
 /******************************************************************//**
@@ -205,17 +204,7 @@ sync_array_exit(
 /*============*/
 	sync_array_t*	arr)	/*!< in: sync wait array */
 {
-	ulint	protection;
-
-	protection = arr->protection;
-
-	if (protection == SYNC_ARRAY_OS_MUTEX) {
-		os_mutex_exit(arr->os_mutex);
-	} else if (protection == SYNC_ARRAY_MUTEX) {
-		mutex_exit(&(arr->mutex));
-	} else {
-		ut_error;
-	}
+	os_mutex_exit(arr->os_mutex);
 }
 
 /*******************************************************************//**
@@ -223,15 +212,12 @@ Creates a synchronization wait array. It
 which is automatically reserved when the functions operating on it
 are called.
 @return	own: created wait array */
-UNIV_INTERN
+static
 sync_array_t*
 sync_array_create(
 /*==============*/
-	ulint	n_cells,	/*!< in: number of cells in the array
+	ulint	n_cells)	/*!< in: number of cells in the array
 				to create */
-	ulint	protection)	/*!< in: either SYNC_ARRAY_OS_MUTEX or
-				SYNC_ARRAY_MUTEX: determines the type
-				of mutex protecting the data structure */
 {
 	ulint		sz;
 	sync_array_t*	arr;
@@ -247,46 +233,28 @@ sync_array_create(
 	memset(arr->array, 0x0, sz);
 
 	arr->n_cells = n_cells;
-	arr->protection = protection;
 
 	/* Then create the mutex to protect the wait array complex */
-	if (protection == SYNC_ARRAY_OS_MUTEX) {
-		arr->os_mutex = os_mutex_create();
-	} else if (protection == SYNC_ARRAY_MUTEX) {
-		mutex_create(syn_arr_mutex_key,
-			     &arr->mutex, SYNC_NO_ORDER_CHECK);
-	} else {
-		ut_error;
-	}
+	arr->os_mutex = os_mutex_create();
 
 	return(arr);
 }
 
 /******************************************************************//**
 Frees the resources in a wait array. */
-UNIV_INTERN
+static
 void
 sync_array_free(
 /*============*/
 	sync_array_t*	arr)	/*!< in, own: sync wait array */
 {
-	ulint		protection;
-
 	ut_a(arr->n_reserved == 0);
 
 	sync_array_validate(arr);
 
-	protection = arr->protection;
-
 	/* Release the mutex protecting the wait array complex */
 
-	if (protection == SYNC_ARRAY_OS_MUTEX) {
-		os_mutex_free(arr->os_mutex);
-	} else if (protection == SYNC_ARRAY_MUTEX) {
-		mutex_free(&(arr->mutex));
-	} else {
-		ut_error;
-	}
+	os_mutex_free(arr->os_mutex);
 
 	ut_free(arr->array);
 	ut_free(arr);
@@ -849,19 +817,14 @@ sync_array_free_cell(
 Increments the signalled count. */
 UNIV_INTERN
 void
-sync_array_object_signalled(
-/*========================*/
-	sync_array_t*	arr)	/*!< in: wait array */
+sync_array_object_signalled(void)
+/*=============================*/
 {
 #ifdef HAVE_ATOMIC_BUILTINS
-	(void) os_atomic_increment_ulint(&arr->sg_count, 1);
+	(void) os_atomic_increment_ulint(&sg_count, 1);
 #else
-	sync_array_enter(arr);
-
-	arr->sg_count++;
-
-	sync_array_exit(arr);
-#endif
+	++sg_count;
+#endif /* HAVE_ATOMIC_BUILTINS */
 }
 
 /**********************************************************************//**
@@ -872,57 +835,73 @@ function should be called about every 1 
 Note that there's a race condition between this thread and mutex_exit
 changing the lock_word and calling signal_object, so sometimes this finds
 threads to wake up even when nothing has gone wrong. */
-UNIV_INTERN
+static
 void
-sync_arr_wake_threads_if_sema_free(void)
-/*====================================*/
+sync_array_wake_threads_if_sema_free_low(
+/*=====================================*/
+	sync_array_t*	arr)		/* in/out: wait array */
 {
-	sync_array_t*	arr	= sync_primary_wait_array;
-	sync_cell_t*	cell;
+	ulint		i = 0;
 	ulint		count;
-	ulint		i;
-	os_event_t      event;
 
 	sync_array_enter(arr);
 
-	i = 0;
-	count = 0;
-
-	while (count < arr->n_reserved) {
+	for (count = 0;  count < arr->n_reserved; ++i) {
+		sync_cell_t*	cell;
 
 		cell = sync_array_get_nth_cell(arr, i);
-		i++;
 
-		if (cell->wait_object == NULL) {
-			continue;
-		}
+		if (cell->wait_object != NULL) {
+
 			count++;
 
 			if (sync_arr_cell_can_wake_up(cell)) {
+				os_event_t      event;
 
-			event = sync_cell_get_event(cell);
+				event = sync_cell_get_event(cell);
 
-			os_event_set(event);
+				os_event_set(event);
+			}
 		}
-
 	}
 
 	sync_array_exit(arr);
 }
 
 /**********************************************************************//**
+If the wakeup algorithm does not work perfectly at semaphore relases,
+this function will do the waking (see the comment in mutex_exit). This
+function should be called about every 1 second in the server.
+
+Note that there's a race condition between this thread and mutex_exit
+changing the lock_word and calling signal_object, so sometimes this finds
+threads to wake up even when nothing has gone wrong. */
+UNIV_INTERN
+void
+sync_arr_wake_threads_if_sema_free(void)
+/*====================================*/
+{
+	ulint		i;
+
+	for (i = 0; i < sync_array_size; ++i) {
+
+		sync_array_wake_threads_if_sema_free_low(
+			sync_wait_array[i]);
+	}
+}
+
+/**********************************************************************//**
 Prints warnings of long semaphore waits to stderr.
 @return	TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
+static
 ibool
-sync_array_print_long_waits(
-/*========================*/
+sync_array_print_long_waits_low(
+/*============================*/
+	sync_array_t*	arr,	/*!< in: sync array instance */
 	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
-	const void**	sema)	/*!< out: longest-waited-for semaphore */
+	const void**	sema,	/*!< out: longest-waited-for semaphore */
+	ibool*		noticed)/*!< out: TRUE if long wait noticed */
 {
-	sync_cell_t*	cell;
-	ibool		old_val;
-	ibool		noticed = FALSE;
 	ulint		i;
 	ulint		fatal_timeout = srv_fatal_semaphore_wait_threshold;
 	ibool		fatal = FALSE;
@@ -940,12 +919,13 @@ sync_array_print_long_waits(
 # define SYNC_ARRAY_TIMEOUT	240
 #endif
 
-	for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
+	for (i = 0; i < arr->n_cells; i++) {
 
-		double	diff;
-		void*	wait_object;
+		double		diff;
+		sync_cell_t*	cell;
+		void*		wait_object;
 
-		cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
+		cell = sync_array_get_nth_cell(arr, i);
 
 		wait_object = cell->wait_object;
 
@@ -960,7 +940,7 @@ sync_array_print_long_waits(
 			fputs("InnoDB: Warning: a long semaphore wait:\n",
 			      stderr);
 			sync_array_cell_print(stderr, cell);
-			noticed = TRUE;
+			*noticed = TRUE;
 		}
 
 		if (diff > fatal_timeout) {
@@ -974,10 +954,43 @@ sync_array_print_long_waits(
 		}
 	}
 
+#undef SYNC_ARRAY_TIMEOUT
+
+	return(fatal);
+}
+
+/**********************************************************************//**
+Prints warnings of long semaphore waits to stderr.
+@return	TRUE if fatal semaphore wait threshold was exceeded */
+UNIV_INTERN
+ibool
+sync_array_print_long_waits(
+/*========================*/
+	os_thread_id_t*	waiter,	/*!< out: longest waiting thread */
+	const void**	sema)	/*!< out: longest-waited-for semaphore */
+{
+	ulint		i;
+	ibool		fatal = FALSE;
+	ibool		noticed = FALSE;
+
+	for (i = 0; i < sync_array_size; ++i) {
+
+		sync_array_t*	arr = sync_wait_array[i];
+
+		if (sync_array_print_long_waits_low(
+				arr, waiter, sema, &noticed)) {
+
+			fatal = TRUE;
+		}
+	}
+
 	if (noticed) {
+		ibool	old_val;
+
 		fprintf(stderr,
 			"InnoDB: ###### Starts InnoDB Monitor"
 			" for 30 secs to print diagnostic info:\n");
+
 		old_val = srv_print_innodb_monitor;
 
 		/* If some crucial semaphore is reserved, then also the InnoDB
@@ -1002,8 +1015,6 @@ sync_array_print_long_waits(
 			" to the standard error stream\n");
 	}
 
-#undef SYNC_ARRAY_TIMEOUT
-
 	return(fatal);
 }
 
@@ -1011,38 +1022,33 @@ sync_array_print_long_waits(
 Prints info of the wait array. */
 static
 void
-sync_array_output_info(
-/*===================*/
+sync_array_print_info_low(
+/*======================*/
 	FILE*		file,	/*!< in: file where to print */
-	sync_array_t*	arr)	/*!< in: wait array; NOTE! caller must own the
-				mutex */
+	sync_array_t*	arr)	/*!< in: wait array */
 {
-	sync_cell_t*	cell;
-	ulint		count;
 	ulint		i;
+	ulint		count = 0;
 
 	fprintf(file,
-		"OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
-						(long) arr->res_count, (long) arr->sg_count);
-	i = 0;
-	count = 0;
+		"OS WAIT ARRAY INFO: reservation count %ld\n",
+		(long) arr->res_count);
 
-	while (count < arr->n_reserved) {
+	for (i = 0; count < arr->n_reserved; ++i) {
+		sync_cell_t*	cell;
 
 		cell = sync_array_get_nth_cell(arr, i);
 
-	if (cell->wait_object != NULL) {
-		count++;
+		if (cell->wait_object != NULL) {
+			count++;
 			sync_array_cell_print(file, cell);
 		}
-
-		i++;
 	}
 }
 
 /**********************************************************************//**
 Prints info of the wait array. */
-UNIV_INTERN
+static
 void
 sync_array_print_info(
 /*==================*/
@@ -1051,7 +1057,94 @@ sync_array_print_info(
 {
 	sync_array_enter(arr);
 
-	sync_array_output_info(file, arr);
+	sync_array_print_info_low(file, arr);
 
 	sync_array_exit(arr);
 }
+
+/**********************************************************************//**
+Create the primary system wait array(s), they are protected by an OS mutex */
+UNIV_INTERN
+void
+sync_array_init(
+/*============*/
+	ulint		n_threads)		/*!< in: Number of slots to
+						create in all arrays */
+{
+	ulint		i;
+	ulint		n_slots;
+
+	ut_a(sync_wait_array == NULL);
+	ut_a(srv_sync_array_size > 0);
+	ut_a(n_threads > srv_sync_array_size);
+
+	sync_array_size = srv_sync_array_size;
+
+	/* We have to use ut_malloc() because the mutex infrastructure
+	hasn't been initialised yet. It is required by mem_alloc() and
+	the heap functions. */
+
+	sync_wait_array = ut_malloc(sizeof(*sync_wait_array) * sync_array_size);
+
+	n_slots = 1 + (n_threads - 1) / sync_array_size;
+
+	for (i = 0; i < sync_array_size; ++i) {
+
+		sync_wait_array[i] = sync_array_create(n_slots);
+	}
+}
+
+/**********************************************************************//**
+Close sync array wait sub-system. */
+UNIV_INTERN
+void
+sync_array_close(void)
+/*==================*/
+{
+	ulint		i;
+
+	for (i = 0; i < sync_array_size; ++i) {
+		sync_array_free(sync_wait_array[i]);
+	}
+
+	ut_free(sync_wait_array);
+	sync_wait_array = NULL;
+}
+
+/**********************************************************************//**
+Print info about the sync array(s). */
+UNIV_INTERN
+void
+sync_array_print(
+/*=============*/
+	FILE*		file)		/*!< in/out: Print to this stream */
+{
+	ulint		i;
+
+	for (i = 0; i < sync_array_size; ++i) {
+		sync_array_print_info(file, sync_wait_array[i]);
+	}
+
+	fprintf(file,
+		"OS WAIT ARRAY INFO: signal count %ld\n", (long) sg_count);
+
+}
+
+/**********************************************************************//**
+Get an instance of the sync wait array. */
+UNIV_INTERN
+sync_array_t*
+sync_array_get(void)
+/*================*/
+{
+	ulint		i;
+	static ulint	count;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+	i = os_atomic_increment_ulint(&count, 1);
+#else
+	i = count++;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+	return(sync_wait_array[i % sync_array_size]);
+}

=== modified file 'storage/innobase/sync/sync0rw.c'
--- a/storage/innobase/sync/sync0rw.c	revid:inaam.rana@stripped
+++ b/storage/innobase/sync/sync0rw.c	revid:sunny.bains@stripped
@@ -381,8 +381,9 @@ rw_lock_s_lock_spin(
 	const char*	file_name, /*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ulint	 index;	/* index of the reserved wait cell */
-	ulint	 i = 0;	/* spin round count */
+	ulint		index;	/* index of the reserved wait cell */
+	ulint		i = 0;	/* spin round count */
+	sync_array_t*	sync_arr;
 
 	ut_ad(rw_lock_validate(lock));
 
@@ -425,17 +426,18 @@ lock_loop:
 
 		rw_s_spin_round_count += i;
 
-		sync_array_reserve_cell(sync_primary_wait_array,
-					lock, RW_LOCK_SHARED,
-					file_name, line,
-					&index);
+		sync_arr = sync_array_get();
+
+		sync_array_reserve_cell(
+			sync_arr, lock, RW_LOCK_SHARED,
+			file_name, line, &index);
 
 		/* Set waiters before checking lock_word to ensure wake-up
                 signal is sent. This may lead to some unnecessary signals. */
 		rw_lock_set_waiter_flag(lock);
 
 		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-			sync_array_free_cell(sync_primary_wait_array, index);
+			sync_array_free_cell(sync_arr, index);
 			return; /* Success */
 		}
 
@@ -453,7 +455,7 @@ lock_loop:
 		lock->count_os_wait++;
 		rw_s_os_wait_count++;
 
-		sync_array_wait_event(sync_primary_wait_array, index);
+		sync_array_wait_event(sync_arr, index);
 
 		i = 0;
 		goto lock_loop;
@@ -495,8 +497,9 @@ rw_lock_x_lock_wait(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ulint index;
-	ulint i = 0;
+	ulint		index;
+	ulint		i = 0;
+	sync_array_t*	sync_arr;
 
 	ut_ad(lock->lock_word <= 0);
 
@@ -511,14 +514,17 @@ rw_lock_x_lock_wait(
 
 		/* If there is still a reader, then go to sleep.*/
 		rw_x_spin_round_count += i;
+
+		sync_arr = sync_array_get();
+
+		sync_array_reserve_cell(
+			sync_arr, lock, RW_LOCK_WAIT_EX,
+			file_name, line, &index);
+
 		i = 0;
-		sync_array_reserve_cell(sync_primary_wait_array,
-					lock,
-					RW_LOCK_WAIT_EX,
-					file_name, line,
-					&index);
+
 		/* Check lock_word to ensure wake-up isn't missed.*/
-		if(lock->lock_word < 0) {
+		if (lock->lock_word < 0) {
 
 			/* these stats may not be accurate */
 			lock->count_os_wait++;
@@ -532,8 +538,7 @@ rw_lock_x_lock_wait(
 					       file_name, line);
 #endif
 
-			sync_array_wait_event(sync_primary_wait_array,
-					      index);
+			sync_array_wait_event(sync_arr, index);
 #ifdef UNIV_SYNC_DEBUG
 			rw_lock_remove_debug_info(lock, pass,
 					       RW_LOCK_WAIT_EX);
@@ -541,8 +546,7 @@ rw_lock_x_lock_wait(
                         /* It is possible to wake when lock_word < 0.
                         We must pass the while-loop check to proceed.*/
 		} else {
-			sync_array_free_cell(sync_primary_wait_array,
-					     index);
+			sync_array_free_cell(sync_arr, index);
 		}
 	}
 	rw_x_spin_round_count += i;
@@ -621,9 +625,10 @@ rw_lock_x_lock_func(
 	const char*	file_name,/*!< in: file name where lock requested */
 	ulint		line)	/*!< in: line where requested */
 {
-	ulint	index;	/*!< index of the reserved wait cell */
-	ulint	i;	/*!< spin round count */
-	ibool	spinning = FALSE;
+	ulint		i;	/*!< spin round count */
+	ulint		index;	/*!< index of the reserved wait cell */
+	sync_array_t*	sync_arr;
+	ibool		spinning = FALSE;
 
 	ut_ad(rw_lock_validate(lock));
 
@@ -671,18 +676,17 @@ lock_loop:
 			(ulong) lock->cline, (ulong) i);
 	}
 
-	sync_array_reserve_cell(sync_primary_wait_array,
-				lock,
-				RW_LOCK_EX,
-				file_name, line,
-				&index);
+	sync_arr = sync_array_get();
+
+	sync_array_reserve_cell(
+		sync_arr, lock, RW_LOCK_EX, file_name, line, &index);
 
 	/* Waiters must be set before checking lock_word, to ensure signal
 	is sent. This could lead to a few unnecessary wake-up signals. */
 	rw_lock_set_waiter_flag(lock);
 
 	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
-		sync_array_free_cell(sync_primary_wait_array, index);
+		sync_array_free_cell(sync_arr, index);
 		return; /* Locking succeeded */
 	}
 
@@ -699,7 +703,7 @@ lock_loop:
 	lock->count_os_wait++;
 	rw_x_os_wait_count++;
 
-	sync_array_wait_event(sync_primary_wait_array, index);
+	sync_array_wait_event(sync_arr, index);
 
 	i = 0;
 	goto lock_loop;

=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c	revid:inaam.rana@stripped
+++ b/storage/innobase/sync/sync0sync.c	revid:sunny.bains@stripped
@@ -182,10 +182,6 @@ static ib_int64_t	mutex_os_wait_count		=
 monitoring. */
 UNIV_INTERN ib_int64_t	mutex_exit_count		= 0;
 
-/** The global array of wait cells for implementation of the database's own
-mutexes and read-write locks */
-UNIV_INTERN sync_array_t*	sync_primary_wait_array;
-
 /** This variable is set to TRUE when sync_init is called */
 UNIV_INTERN ibool	sync_initialized	= FALSE;
 
@@ -488,8 +484,9 @@ mutex_spin_wait(
 					requested */
 	ulint		line)		/*!< in: line where requested */
 {
-	ulint	   index; /* index of the reserved wait cell */
-	ulint	   i;	  /* spin round count */
+	ulint		i;		/* spin round count */
+	ulint		index;		/* index of the reserved wait cell */
+	sync_array_t*	sync_arr;
 #ifdef UNIV_DEBUG
 	ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
 	ulint ltime_diff;
@@ -577,8 +574,10 @@ spin_loop:
 		goto spin_loop;
 	}
 
-	sync_array_reserve_cell(sync_primary_wait_array, mutex,
-				SYNC_MUTEX, file_name, line, &index);
+	sync_arr = sync_array_get();
+
+	sync_array_reserve_cell(
+		sync_arr, mutex, SYNC_MUTEX, file_name, line, &index);
 
 	/* The memory order of the array reservation and the change in the
 	waiters field is important: when we suspend a thread, we first
@@ -593,7 +592,7 @@ spin_loop:
 		if (mutex_test_and_set(mutex) == 0) {
 			/* Succeeded! Free the reserved wait cell */
 
-			sync_array_free_cell(sync_primary_wait_array, index);
+			sync_array_free_cell(sync_arr, index);
 
 			ut_d(mutex->thread_id = os_thread_get_curr_id());
 #ifdef UNIV_SYNC_DEBUG
@@ -641,7 +640,7 @@ spin_loop:
 #endif /* UNIV_HOTBACKUP */
 #endif /* UNIV_DEBUG */
 
-	sync_array_wait_event(sync_primary_wait_array, index);
+	sync_array_wait_event(sync_arr, index);
 	goto mutex_loop;
 
 finish_timing:
@@ -674,7 +673,7 @@ mutex_signal_object(
 	/* The memory order of resetting the waiters field and
 	signaling the object is important. See LEMMA 1 above. */
 	os_event_set(mutex->event);
-	sync_array_object_signalled(sync_primary_wait_array);
+	sync_array_object_signalled();
 }
 
 #ifdef UNIV_SYNC_DEBUG
@@ -1499,11 +1498,8 @@ sync_init(void)
 
 	sync_initialized = TRUE;
 
-	/* Create the primary system wait array which is protected by an OS
-	mutex */
+	sync_array_init(OS_THREAD_MAX_N);
 
-	sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
-						    SYNC_ARRAY_OS_MUTEX);
 #ifdef UNIV_SYNC_DEBUG
 	/* Create the thread latch level array where the latch levels
 	are stored for each OS thread */
@@ -1576,7 +1572,7 @@ sync_close(void)
 {
 	mutex_t*	mutex;
 
-	sync_array_free(sync_primary_wait_array);
+	sync_array_close();
 
 	for (mutex = UT_LIST_GET_FIRST(mutex_list);
 	     mutex != NULL;
@@ -1659,7 +1655,7 @@ sync_print(
 	rw_lock_list_print_info(file);
 #endif /* UNIV_SYNC_DEBUG */
 
-	sync_array_print_info(file, sync_primary_wait_array);
+	sync_array_print(file);
 
 	sync_print_wait_info(file);
 }


Attachment: [text/bzr-bundle] bzr/sunny.bains@oracle.com-20110412012137-fqao8q5x283ub8f6.bundle
Thread
bzr push into mysql-trunk-innodb branch (Sunny.Bains:3573 to 3574) Sunny Bains12 Apr