List:Commits« Previous MessageNext Message »
From:Sunny Bains Date:June 2 2011 7:08am
Subject:bzr push into mysql-trunk branch (Sunny.Bains:3142 to 3143)
View as plain text  
 3143 Sunny Bains	2011-06-02
      Bug 11765863: SHUTDOWN HANG WAITING FOR PURGE THREAD TO BE SUSPENDED
      
      This is really several bug fixes rolled into one:
      
        1. Fix a hang during shutdown
      
        2. Fix multi-threaded purge performance issue when innodb-purge-threads > 1.
      
        3. Remove purge_sys->n_executing
      
        4. Purge unable to keep up with data generation when number of user
           threads >= 256.
      
      Now that worker threads can't simply exit via os_event_wait() and their
      termination is deterministic.  Get rid of the dodgy code that tried to
      handle the case of an unexpected exit. This gets rid of the
      purge_sys->n_executing field altogether. Simplifies the control flow and
      fixes a performance issue when innodb-purge-threads > 1 by removing some
      arbitrary sleeps to handle the above case.
      
      Treat the number of purge threads as a pool of threads, use only as many
      threads as required to keep the history list in check.
      
      Set innodb_purge_batch_size default to 300 from 20. Updated test results
      accordingly.
      
      rb://658 Approved by: Jimmy Yang.

    modified:
      mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/include/trx0purge.h
      storage/innobase/srv/srv0srv.c
      storage/innobase/trx/trx0purge.c
 3142 Bjorn Munch	2011-06-01 [merge]
      12607800 followup

    modified:
      cmake/install_layout.cmake
=== modified file 'mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result	revid:bjorn.munch@stripped
+++ b/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result	revid:sunny.bains@stripped
@@ -1,19 +1,19 @@
 SET @global_start_value = @@global.innodb_purge_batch_size;
 SELECT @global_start_value;
 @global_start_value
-20
+300
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@global.innodb_purge_batch_size = 1;
 SET @@global.innodb_purge_batch_size = DEFAULT;
 SELECT @@global.innodb_purge_batch_size;
 @@global.innodb_purge_batch_size
-20
+300
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_purge_batch_size = 1;
 ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL
 SELECT @@innodb_purge_batch_size;
 @@innodb_purge_batch_size
-20
+300
 SELECT local.innodb_purge_batch_size;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_purge_batch_size = 1;
@@ -95,4 +95,4 @@ SELECT @@global.innodb_purge_batch_size;
 SET @@global.innodb_purge_batch_size = @global_start_value;
 SELECT @@global.innodb_purge_batch_size;
 @@global.innodb_purge_batch_size
-20
+300

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	revid:bjorn.munch@stripped
+++ b/storage/innobase/handler/ha_innodb.cc	revid:sunny.bains@stripped
@@ -12089,7 +12089,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_si
   PLUGIN_VAR_OPCMDARG,
   "Number of UNDO log pages to purge in one batch from the history list.",
   NULL, NULL,
-  20,			/* Default setting */
+  300,			/* Default setting */
   1,			/* Minimum value */
   5000, 0);		/* Maximum value */
 

=== modified file 'storage/innobase/include/trx0purge.h'
--- a/storage/innobase/include/trx0purge.h	revid:bjorn.munch@stripped
+++ b/storage/innobase/include/trx0purge.h	revid:sunny.bains@stripped
@@ -117,11 +117,9 @@ struct trx_purge_struct{
 					obtaining an s-latch here. */
 	read_view_t*	view;		/*!< The purge will not remove undo logs
 					which are >= this view (purge view) */
-	ulint		n_submitted;	/*!< Count of total tasks submitted
+	volatile ulint	n_submitted;	/*!< Count of total tasks submitted
 					to the task queue */
-	ulint		n_executing;	/*!< Count of currently executing purge
-					worker threads */
-	ulint		n_completed;	/*!< Count of total tasks completed */
+	volatile ulint	n_completed;	/*!< Count of total tasks completed */
 
 	/*------------------------------*/
 	/* The following two fields form the 'purge pointer' which advances

=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c	revid:bjorn.munch@stripped
+++ b/storage/innobase/srv/srv0srv.c	revid:sunny.bains@stripped
@@ -1850,7 +1850,7 @@ srv_master_do_purge(void)
 			/* Nothing to purge. */
 			n_pages_purged = 0;
 		} else {
-			n_pages_purged = trx_purge(0, srv_purge_batch_size);
+			n_pages_purged = trx_purge(1, srv_purge_batch_size);
 		}
 
 		total_pages_purged += n_pages_purged;
@@ -2303,8 +2303,6 @@ srv_task_execute(void)
 
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
 
-	os_atomic_inc_ulint(&purge_sys->bh_mutex, &purge_sys->n_executing, 1);
-
 	mutex_enter(&srv_sys->tasks_mutex);
 
 	if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
@@ -2326,8 +2324,6 @@ srv_task_execute(void)
 			&purge_sys->bh_mutex, &purge_sys->n_completed, 1);
 	}
 
-	os_atomic_dec_ulint(&purge_sys->bh_mutex, &purge_sys->n_executing, 1);
-
 	return(thr != NULL);
 }
 
@@ -2404,50 +2400,62 @@ void
 srv_do_purge(
 /*=========*/
 	ulint		n_threads,	/*!< in: number of threads to use */
-	ulint		batch_size,	/*!< in: purge batch size */
 	ulint*		n_total_purged)	/*!< in/out: total pages purged */
 {
-	if (n_threads <= 1) {
-		ulint	n_pages_purged;
+	ulint		n_pages_purged;
+	static ulint	n_use_threads = 0;
+	static ulint	rseg_history_len = 0;
 
-		/* Purge until there are no more records to
-		purge and there is no change in configuration
-		or server state. */
+	ut_a(n_threads > 0);
 
-		do {
-			n_pages_purged = trx_purge(0, batch_size);
+	/* Purge until there are no more records to purge and there is
+	no change in configuration or server state. If the user has
+	configured more than one purge thread then we treat that as a
+	pool of threads and only use the extra threads if purge can't
+	keep up with updates. */
 
-			*n_total_purged += n_pages_purged;
+	if (n_use_threads == 0) {
+		n_use_threads = n_threads;
+	}
 
-		} while (n_pages_purged > 0 && !srv_fast_shutdown);
+	do {
+		if (trx_sys->rseg_history_len > rseg_history_len) {
 
-	} else {
-		ulint	n_pages_purged;
+			/* History length is now longer than what it was
+			when we took the last snapshot. Use more threads. */
 
-		do {
-			n_pages_purged = trx_purge(n_threads, batch_size);
+			if (n_use_threads < n_threads) {
+				++n_use_threads;
+			}
 
-			*n_total_purged += n_pages_purged;
+		} else if (n_use_threads > 1) {
+			/* History length same or smaller since last snapshot,
+			use fewer threads. */
 
-			/* During shutdown the worker threads can
-			exit when they detect a change in state.
-			Force the coordinator thread to do the purge
-			tasks from the work queue. */
+			--n_use_threads;
+		}
 
-			while (srv_get_task_queue_length() > 0) {
+		/* Ensure that the purge threads are less than what
+		was configured. */
 
-				ibool	success;
+		ut_a(n_use_threads > 0);
+		ut_a(n_use_threads <= n_threads);
 
-				ut_a(srv_shutdown_state);
+		/* Take a snapshot of the history list before purge. */
+		rseg_history_len = trx_sys->rseg_history_len;
+#if 0
+		fprintf(stderr, "%lu:%lu %lu\n",
+			rseg_history_len, trx_sys->rseg_history_len,
+			n_use_threads);
+#endif
+		n_pages_purged = trx_purge(n_use_threads, srv_purge_batch_size);
 
-				success = srv_task_execute();
-				ut_a(success);
-			}
+		*n_total_purged += n_pages_purged;
+
+	} while (srv_shutdown_state == SRV_SHUTDOWN_NONE
+		 && srv_fast_shutdown == 0
+		 && n_pages_purged > 0);
 
-		} while (trx_sys->rseg_history_len > 100
-			 && srv_shutdown_state == SRV_SHUTDOWN_NONE
-			 && srv_fast_shutdown == 0);
-	}
 }
 
 /*********************************************************************//**
@@ -2488,31 +2496,14 @@ srv_purge_coordinator_thread(
 
 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
 
-		ulint	n_threads = srv_n_purge_threads;
-		ulint	batch_size = srv_purge_batch_size;
-
-		if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
-
-			/* If shutdown is signalled, then switch
-			to single threaded purge. There are no user
-			threads to contended with and secondly purge
-			worker threads can exit silently, causing a
-			potential hang. We try and avoid that as much
-			as we can until the underlying problem is fixed
-			properly. */
-
-			n_threads = 1;
-		}
-
 		/* If there are very few records to purge or the last
 		purge didn't purge any records then wait for activity.
 	        We peek at the history len without holding any mutex
 		because in the worst case we will end up waiting for
 		the next purge event. */
 
-		if (trx_sys->rseg_history_len < batch_size
-		    || (n_total_purged == 0
-			&& retries >= TRX_SYS_N_RSEGS)) {
+		if (trx_sys->rseg_history_len == 0
+		    || (n_total_purged == 0 && retries >= TRX_SYS_N_RSEGS)) {
 
 			srv_suspend_thread(slot);
 
@@ -2535,7 +2526,7 @@ srv_purge_coordinator_thread(
 			n_total_purged = 0;
 		}
 
-		srv_do_purge(n_threads, batch_size, &n_total_purged);
+		srv_do_purge(srv_n_purge_threads, &n_total_purged);
 	}
 
 	/* The task queue should always be empty, independent of fast

=== modified file 'storage/innobase/trx/trx0purge.c'
--- a/storage/innobase/trx/trx0purge.c	revid:bjorn.munch@stripped
+++ b/storage/innobase/trx/trx0purge.c	revid:sunny.bains@stripped
@@ -972,7 +972,7 @@ trx_purge_fetch_next_rec(
 /*******************************************************************//**
 This function runs a purge batch.
 @return	number of undo log pages handled in the batch */
-UNIV_INTERN
+static
 ulint
 trx_purge_attach_undo_recs(
 /*=======================*/
@@ -986,11 +986,13 @@ trx_purge_attach_undo_recs(
 	ulint		n_pages_handled = 0;
 	ulint		n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
 
+	ut_a(n_purge_threads > 0);
+
 	*limit = purge_sys->iter;
 
 	/* Debug code to validate some pre-requisites and reset done flag. */
 	for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
-	     thr != NULL;
+	     thr != NULL && i < n_purge_threads;
 	     thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
 
 		purge_node_t*		node;
@@ -1005,7 +1007,9 @@ trx_purge_attach_undo_recs(
 		node->done = FALSE;
 	}
 
-	ut_a(i == n_purge_threads || (n_purge_threads == 0 && i == 1));
+	/* There should never be fewer nodes than threads, the inverse
+	however is allowed because we only use purge threads as needed. */
+	ut_a(i == n_purge_threads);
 
 	/* Fetch and parse the UNDO records. The UNDO records are added
 	to a per purge node vector. */
@@ -1014,7 +1018,9 @@ trx_purge_attach_undo_recs(
 
 	ut_ad(trx_purge_check_limit());
 
-	do {
+	i = 0;
+
+	for (;;) {
 		purge_node_t*		node;
 		trx_purge_rec_t*	purge_rec;
 
@@ -1061,10 +1067,12 @@ trx_purge_attach_undo_recs(
 
 		thr = UT_LIST_GET_NEXT(thrs, thr);
 
-		if (thr == NULL) {
+		if (!(++i % n_purge_threads)) {
 			thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
 		}
-	} while (thr);
+
+		ut_a(thr != NULL);
+	}
 
 	ut_ad(trx_purge_check_limit());
 
@@ -1116,55 +1124,45 @@ static
 void
 trx_purge_wait_for_workers_to_complete(
 /*===================================*/
-	trx_purge_t*	purge_sys)	/*!< in: purge instance */ 
+	trx_purge_t*	purge_sys)	/*!< in: purge instance */
 {
-	/* Ensure that the work queue empties out. Note, we are doing
-	a dirty read of purge_sys->n_completed and purge_sys->n_executing. */
-	while (purge_sys->n_submitted > purge_sys->n_completed
-	       || purge_sys->n_executing > 0) {
-
-		if (srv_get_task_queue_length() == 0
-		    && purge_sys->n_executing == 0) {
-
-			ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-			break;
+	ulint		n_submitted = purge_sys->n_submitted;
 
-		} else if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
-			/* This is problematic because the worker threads can
-			simply exit via os_event_wait(). We could end up
-			looping here forever waiting for the task queue to
-			drain. Another problem is that a worker thread can
-			be active but it hasn't incrementd the n_executing
-			field yet. Our only hope is to poll here for a fixed
-			time interval.  Given that the server is shutting down,
-			a few microseconds waiting to check the worker threads
-			should be acceptable. */
+#ifdef HAVE_ATOMIC_BUILTINS
+	/* Ensure that the work queue empties out. */
+	while (!os_compare_and_swap_ulint(
+			&purge_sys->n_completed, n_submitted, n_submitted)) {
+#else
+	mutex_enter(&purge_sys->bh_mutex);
+
+	while (purge_sys->n_completed < n_submitted) {
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+#ifndef HAVE_ATOMIC_BUILTINS
+		mutex_exit(&purge_sys->bh_mutex);
+#endif /* !HAVE_ATOMIC_BUILTINS */
 
-			os_thread_sleep(500000);
-
-			if (srv_get_task_queue_length() > 0) {
-				break;
-			}
+		if (srv_get_task_queue_length() > 0) {
+			srv_release_threads(SRV_WORKER, 1);
 		}
 
-		srv_release_threads(SRV_WORKER, 1);
+		os_thread_yield();
 
-		/* This is an arbitrary choice. */
-		os_thread_sleep(4000);
+#ifndef HAVE_ATOMIC_BUILTINS
+		mutex_enter(&purge_sys->bh_mutex);
+#endif /* !HAVE_ATOMIC_BUILTINS */
 	}
 
-	/* If shutdown is signalled then the worker threads can
-	simply exit via os_event_wait(). The thread initiating the
-	purge should be prepared to handle this case. */
-	if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
-		/* None of the worker threads should be doing any work. */
-		ut_a(purge_sys->n_executing == 0);
-
-		/* There should be no outstanding tasks as long
-		as the worker threads are active. */
-		ut_a(srv_get_task_queue_length() == 0);
-	}
+#ifndef HAVE_ATOMIC_BUILTINS
+	mutex_exit(&purge_sys->bh_mutex);
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+	/* None of the worker threads should be doing any work. */
+	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
+
+	/* There should be no outstanding tasks as long
+	as the worker threads are active. */
+	ut_a(srv_get_task_queue_length() == 0);
 }
 
 /******************************************************************//**
@@ -1204,6 +1202,8 @@ trx_purge(
 	que_thr_t*	thr = NULL;
 	ulint		n_pages_handled;
 
+	ut_a(n_purge_threads > 0);
+
 	srv_dml_needed_delay = trx_purge_dml_delay();
 
 	/* The number of tasks submitted should be completed. */
@@ -1226,7 +1226,7 @@ trx_purge(
 		n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
 
 	/* Do we do an asynchronous purge or not ? */
-	if (n_purge_threads > 0) {
+	if (n_purge_threads > 1) {
 		ulint	i = 0;
 
 		/* Submit the tasks to the work queue. */

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk branch (Sunny.Bains:3142 to 3143) Sunny Bains2 Jun