List:Commits« Previous MessageNext Message »
From:Sunny Bains Date:April 14 2011 6:35am
Subject:bzr push into mysql-trunk-innodb branch (Sunny.Bains:3581 to 3582)
View as plain text  
 3582 Sunny Bains	2011-04-14
      Bug 12324092 - PURGING SHOULD BE EVENT DRIVEN
      
      In 5.6 the purge thread code has some arbitrary sleeps that aren't working.
      There was a big TODO next to them.
      
      The fix is to make the purge coordinator wakeup mechanism, the same as 5.5.
      
      Added a new monitoring counter: MONITOR_PURGE_INVOKED
      
      rb://642 Approved by: Jimmy Yang.

    modified:
      mysql-test/suite/innodb/r/innodb_monitor.result
      storage/innobase/include/srv0mon.h
      storage/innobase/include/srv0srv.h
      storage/innobase/srv/srv0mon.c
      storage/innobase/srv/srv0srv.c
      storage/innobase/trx/trx0purge.c
 3581 Jimmy Yang	2011-04-13
      Skip temp index prefix \377 from index names when filling I_S buffer
      pool tables.
            
      This change will silence Bug 12340873 - ASSERT IN PROTOCOL::END_STATEMENT
      but it will not fix the problem which is that MySQL asserts sometimes when
      an I_S fill table returns 1. This is related to
      http://bugs.mysql.com/bug.php?id=29900
      Bug#29900 MySQL does not return an error to the client if INFORMAT SCHEMA
      fill table fails

    modified:
      storage/innobase/handler/i_s.cc
=== modified file 'mysql-test/suite/innodb/r/innodb_monitor.result'
--- a/mysql-test/suite/innodb/r/innodb_monitor.result	revid:jimmy.yang@stripped
+++ b/mysql-test/suite/innodb/r/innodb_monitor.result	revid:sunny.bains@stripped
@@ -110,6 +110,7 @@ trx_undo_slots_cached	disabled
 trx_rseg_curent_size	disabled
 purge_del_mark_records	disabled
 purge_upd_exist_or_extern_records	disabled
+purge_invoked	disabled
 purge_undo_log_pages	disabled
 purge_dml_delay_usec	disabled
 log_checkpoints	disabled
@@ -289,6 +290,7 @@ trx_undo_slots_cached	enabled
 trx_rseg_curent_size	enabled
 purge_del_mark_records	enabled
 purge_upd_exist_or_extern_records	enabled
+purge_invoked	enabled
 purge_undo_log_pages	enabled
 purge_dml_delay_usec	enabled
 log_checkpoints	enabled
@@ -470,6 +472,7 @@ trx_undo_slots_cached	disabled
 trx_rseg_curent_size	disabled
 purge_del_mark_records	disabled
 purge_upd_exist_or_extern_records	disabled
+purge_invoked	disabled
 purge_undo_log_pages	disabled
 purge_dml_delay_usec	disabled
 log_checkpoints	disabled
@@ -649,6 +652,7 @@ trx_undo_slots_cached	0	disabled
 trx_rseg_curent_size	0	disabled
 purge_del_mark_records	0	disabled
 purge_upd_exist_or_extern_records	0	disabled
+purge_invoked	0	disabled
 purge_undo_log_pages	0	disabled
 purge_dml_delay_usec	0	disabled
 log_checkpoints	0	disabled
@@ -882,6 +886,7 @@ trx_undo_slots_cached	enabled
 trx_rseg_curent_size	enabled
 purge_del_mark_records	enabled
 purge_upd_exist_or_extern_records	enabled
+purge_invoked	enabled
 purge_undo_log_pages	enabled
 purge_dml_delay_usec	enabled
 log_checkpoints	enabled
@@ -1061,6 +1066,7 @@ trx_undo_slots_cached	disabled
 trx_rseg_curent_size	disabled
 purge_del_mark_records	disabled
 purge_upd_exist_or_extern_records	disabled
+purge_invoked	disabled
 purge_undo_log_pages	disabled
 purge_dml_delay_usec	disabled
 log_checkpoints	disabled
@@ -1240,6 +1246,7 @@ trx_undo_slots_cached	enabled
 trx_rseg_curent_size	enabled
 purge_del_mark_records	enabled
 purge_upd_exist_or_extern_records	enabled
+purge_invoked	enabled
 purge_undo_log_pages	enabled
 purge_dml_delay_usec	enabled
 log_checkpoints	enabled
@@ -1419,6 +1426,7 @@ trx_undo_slots_cached	disabled
 trx_rseg_curent_size	disabled
 purge_del_mark_records	disabled
 purge_upd_exist_or_extern_records	disabled
+purge_invoked	disabled
 purge_undo_log_pages	disabled
 purge_dml_delay_usec	disabled
 log_checkpoints	disabled
@@ -1598,6 +1606,7 @@ trx_undo_slots_cached	disabled
 trx_rseg_curent_size	disabled
 purge_del_mark_records	disabled
 purge_upd_exist_or_extern_records	disabled
+purge_invoked	disabled
 purge_undo_log_pages	disabled
 purge_dml_delay_usec	disabled
 log_checkpoints	enabled

=== modified file 'storage/innobase/include/srv0mon.h'
--- a/storage/innobase/include/srv0mon.h	revid:jimmy.yang@stripped
+++ b/storage/innobase/include/srv0mon.h	revid:sunny.bains@stripped
@@ -250,6 +250,7 @@ enum monitor_id_value {
 	MONITOR_MODULE_PURGE,
 	MONITOR_N_DEL_ROW_PURGE,
 	MONITOR_N_UPD_EXIST_EXTERN,
+	MONITOR_PURGE_INVOKED,
 	MONITOR_PURGE_N_PAGE_HANDLED,
 	MONITOR_DML_PURGE_DELAY,
 

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	revid:jimmy.yang@stripped
+++ b/storage/innobase/include/srv0srv.h	revid:sunny.bains@stripped
@@ -522,6 +522,16 @@ srv_master_thread(
 	void*	arg);	/*!< in: a dummy parameter required by
 			os_thread_create */
 /*******************************************************************//**
+Tells the purge thread that there has been activity in the database
+and wakes up the purge thread if it is suspended (not sleeping).  Note
+that there is a small chance that the purge thread stays suspended
+(we do not protect our operation with the srv_sys_t:mutex, for
+performance reasons). */
+UNIV_INTERN
+void
+srv_wake_purge_thread_if_not_active(void);
+/*=====================================*/
+/*******************************************************************//**
 Wakes up the purge thread if it's not already awake. */
 UNIV_INTERN
 void

=== modified file 'storage/innobase/srv/srv0mon.c'
--- a/storage/innobase/srv/srv0mon.c	revid:jimmy.yang@stripped
+++ b/storage/innobase/srv/srv0mon.c	revid:sunny.bains@stripped
@@ -526,6 +526,10 @@ static monitor_info_t	innodb_counter_inf
 	 " updates on delete marked record with externally stored field",
 	 0, 0, MONITOR_N_UPD_EXIST_EXTERN},
 
+	{"purge_invoked", "purge",
+	 "Number of purge was invoked",
+	 0, 0, MONITOR_PURGE_INVOKED},
+
 	{"purge_undo_log_pages", "purge",
 	 "Number of undo log pages handled by the purge",
 	 0, 0, MONITOR_PURGE_N_PAGE_HANDLED},

=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c	revid:jimmy.yang@stripped
+++ b/storage/innobase/srv/srv0srv.c	revid:sunny.bains@stripped
@@ -2086,7 +2086,7 @@ and wakes up the purge thread if it is s
 that there is a small chance that the purge thread stays suspended
 (we do not protect our operation with the srv_sys_t:mutex, for
 performance reasons). */
-static
+UNIV_INTERN
 void
 srv_wake_purge_thread_if_not_active(void)
 /*=====================================*/
@@ -2760,6 +2760,59 @@ srv_worker_thread(
 }
 
 /*********************************************************************//**
+Do the actual purge operation. */
+static
+void
+srv_do_purge(
+/*=========*/
+	ulint		n_threads,	/*!< in: number of threads to use */
+	ulint		batch_size,	/*!< in: purge batch size */
+	ulint*		n_total_purged)	/*!< in/out: total pages purged */
+{
+	if (n_threads <= 1) {
+		ulint	n_pages_purged;
+
+		/* Purge until there are no more records to
+		purge and there is no change in configuration
+		or server state. */
+
+		do {
+			n_pages_purged = trx_purge(0, batch_size);
+
+			*n_total_purged += n_pages_purged;
+
+		} while (n_pages_purged > 0 && !srv_fast_shutdown);
+
+	} else {
+		ulint	n_pages_purged;
+
+		do {
+			n_pages_purged = trx_purge(n_threads, batch_size);
+
+			*n_total_purged += n_pages_purged;
+
+			/* During shutdown the worker threads can
+			exit when they detect a change in state.
+			Force the coordinator thread to do the purge
+			tasks from the work queue. */
+
+			while (srv_get_task_queue_length() > 0) {
+
+				ibool	success;
+
+				ut_a(srv_shutdown_state);
+
+				success = srv_task_execute();
+				ut_a(success);
+			}
+
+		} while (trx_sys->rseg_history_len > 100
+			 && srv_shutdown_state == SRV_SHUTDOWN_NONE
+			 && srv_fast_shutdown == 0);
+	}
+}
+
+/*********************************************************************//**
 Purge coordinator thread that schedules the purge tasks.
 @return	a dummy parameter */
 UNIV_INTERN
@@ -2770,6 +2823,8 @@ srv_purge_coordinator_thread(
 						required by os_thread_create */
 {
 	srv_slot_t*	slot;
+	ulint		retries = 0;
+	ulint           n_total_purged = ULINT_UNDEFINED;
 
 	ut_a(srv_n_purge_threads >= 1);
 
@@ -2793,119 +2848,57 @@ srv_purge_coordinator_thread(
 
 	srv_sys_mutex_exit();
 
-	for (;;) {
-		ulint		n_pages_purged;
-		ib_time_t	last_time = ut_time();
-		ulint		count = srv_sys->activity_count;
-		ulint		batch_size = srv_purge_batch_size;
-		ulint		sleep_ms = ut_rnd_gen_ulint() % 10000;
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
 
-		if (srv_shutdown_state != SRV_SHUTDOWN_NONE
-		    && srv_fast_shutdown != 0) {
+		ulint	n_threads = srv_n_purge_threads;
+		ulint	batch_size = srv_purge_batch_size;
 
-			break;
+		if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+
+			/* If shutdown is signalled, then switch
+			to single threaded purge. There are no user
+			threads to contended with and secondly purge
+			worker threads can exit silently, causing a
+			potential hang. We try and avoid that as much
+			as we can until the underlying problem is fixed
+			properly. */
+
+			n_threads = 1;
 		}
 
-		/* If number of threads is 1 then we let trx_purge() do
-		the actual purge for us. */
-		if (srv_n_purge_threads == 1) {
-
-			do {
-				n_pages_purged = trx_purge(0, batch_size);
-
-				/* FIXME: Do some black magic. This code
-				is purely guess work and needs to be tuned
-				properly after some benchmarking. */
-				if (srv_check_activity(count)) {
-					sleep_ms = 60000;
-					batch_size = srv_purge_batch_size;
-				} else if (n_pages_purged == 0) {
-					sleep_ms = 120000;
-					batch_size = srv_purge_batch_size;
-				} else {
-					sleep_ms = 0;
-					batch_size = 500;
-				}
-
-				/* No point in sleeping during shutdown. */
-				if (srv_shutdown_state == SRV_SHUTDOWN_NONE
-				    && sleep_ms > 0) {
-
-					os_thread_sleep(sleep_ms);
-				}
-
-				/* Take snapshot to check for user
-				activity at every second. */
-				if (ut_time() - last_time >= 1) {
-					count = srv_sys->activity_count;
-					last_time = ut_time();
-				}
+		/* If there are very few records to purge or the last
+		purge didn't purge any records then wait for activity.
+	        We peek at the history len without holding any mutex
+		because in the worst case we will end up waiting for
+		the next purge event. */
 
-			} while (n_pages_purged > 0 && srv_fast_shutdown == 0);
+		if (trx_sys->rseg_history_len < batch_size
+		    || (n_total_purged == 0
+			&& retries >= TRX_SYS_N_RSEGS)) {
 
-		} else {
-			do {
+			srv_suspend_thread(slot);
 
-				n_pages_purged = trx_purge(
-					srv_n_purge_threads, batch_size);
+			os_event_wait(slot->event);
 
-				/* During shutdown the worker threads can
-				exit when they detect a change in state.
-				Force the coordinator thread to do the purge
-				tasks from the work queue. */
-				while (srv_get_task_queue_length() > 0) {
-
-					ibool	success;
-
-					ut_a(srv_shutdown_state);
-
-					success = srv_task_execute();
-					ut_a(success);
-				}
-
-				/* No point in sleeping during shutdown. */
-				if (srv_shutdown_state == SRV_SHUTDOWN_NONE
-				    && sleep_ms > 0) {
-
-					os_thread_sleep(sleep_ms);
-				}
-
-				/* FIXME: Do some black magic. This code
-				is purely guess work and needs to be tuned
-				properly after some benchmarking. */
-				if (!srv_check_activity(count)
-				    && trx_sys->rseg_history_len > 500) {
-					sleep_ms = 0;
-					batch_size = 500;
-				} else {
-					sleep_ms = 60000;
-
-					if (n_pages_purged > 0) {
-						sleep_ms = 150000;
-					}
-
-					batch_size = srv_purge_batch_size;
-				}
-
-				/* Take snapshot to check for user
-				activity at every second. */
-				if (ut_time() - last_time >= 1) {
-					count = srv_sys->activity_count;
-					last_time = ut_time();
-				}
-
-			} while (trx_sys->rseg_history_len > 100
-				 && srv_shutdown_state == SRV_SHUTDOWN_NONE
-				 && srv_fast_shutdown == 0);
+			retries = 0;
 		}
 
-		/* Check if Slow shutdown and no more pages to purge. */
-		if (srv_shutdown_state != SRV_SHUTDOWN_NONE
-		    && srv_fast_shutdown == 0
-		    && n_pages_purged == 0) {
+		/* Check for shutdown and whether we should do purge at all. */
+		if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
+		    || srv_shutdown_state != SRV_SHUTDOWN_NONE
+		    || srv_fast_shutdown) {
 
 			break;
 		}
+
+		if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
+			++retries;
+		} else if (n_total_purged > 0) {
+			retries = 0;
+			n_total_purged = 0;
+		}
+
+		srv_do_purge(n_threads, batch_size, &n_total_purged);
 	}
 
 	/* The task queue should always be empty, independent of fast

=== modified file 'storage/innobase/trx/trx0purge.c'
--- a/storage/innobase/trx/trx0purge.c	revid:jimmy.yang@stripped
+++ b/storage/innobase/trx/trx0purge.c	revid:sunny.bains@stripped
@@ -291,6 +291,10 @@ trx_purge_add_update_undo_to_history(
 	rw_lock_x_unlock(&trx_sys->lock);
 #endif /* HAVE_ATOMIC_BUILTINS */
 
+	if (!(trx_sys->rseg_history_len %  srv_purge_batch_size)) {
+		srv_wake_purge_thread_if_not_active();
+	}
+
 	/* Write the trx number to the undo log header */
 	mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
 
@@ -1297,6 +1301,7 @@ run_synchronously:
 		trx_purge_truncate();
 	}
 
+	MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
 	MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled);
 
 	if (srv_print_thread_releases) {


Attachment: [text/bzr-bundle] bzr/sunny.bains@oracle.com-20110414063144-3rk059m99e6xday8.bundle
Thread
bzr push into mysql-trunk-innodb branch (Sunny.Bains:3581 to 3582) Sunny Bains14 Apr