MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Davi Arnaut Date:September 29 2010 12:09am
Subject:bzr commit into mysql-5.5-bugfixing branch (davi:3206) Bug#56096
View as plain text  
# At a local mysql-5.5-bugfixing repository of davi

 3206 Davi Arnaut	2010-09-28
      Bug#56096: STOP SLAVE hangs if executed in parallel with user sleep
      
      The problem is that although the slave thread is awaken (via a
      spurious event) when a slave stop is issued, the slave thread
      is not explicitly killed. This can lead to a infinite loop
      if the event which the slave thread was waiting for is not
      properly restarted (and comes to a end eventually). Since the
      SLEEP function sleeps on a timed event in order to be killable
      and to perform periodic checks until the requested time has
      elapsed, the spurious wake up was causing the requested sleep
      time to be reset every two seconds.
      
      The solution is to calculate the requested absolute time only
      once and to ensure that the thread only sleeps until this
      time is elapsed. In case of a spurious wake up, the sleep is
      restarted using the previously calculated absolute time. This
      restores the behavior present in previous releases. If a slave
      thread is executing a SLEEP function, a STOP SLAVE statement
      will wait until the time requested in the sleep function
      has elapsed.
     @ mysql-test/extra/rpl_tests/rpl_start_stop_slave.test
        Add test case result for Bug#56096.
     @ mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result
        Add test case for Bug#56096.
     @ sql/item_func.cc
        Reorganize interruptible_wait so that the absolute time can be
        preserved across calls to the function. This allow the sleep
        to be properly restarted in the presence of spurious wake ups
        generated by a STOP SLAVE.

    modified:
      mysql-test/extra/rpl_tests/rpl_start_stop_slave.test
      mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result
      sql/item_func.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_start_stop_slave.test'
--- a/mysql-test/extra/rpl_tests/rpl_start_stop_slave.test	2010-04-28 12:47:49 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_start_stop_slave.test	2010-09-29 00:09:28 +0000
@@ -122,4 +122,35 @@ drop table t1i, t2m;
 
 sync_slave_with_master;
 
+--echo #
+--echo # Bug#56096 STOP SLAVE hangs if executed in parallel with user sleep
+--echo #
+
+--connection master
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT );
+INSERT INTO t1 SELECT SLEEP(4);
+
+--connection slave
+let $wait_condition=
+  SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
+  WHERE STATE = "User sleep" AND INFO = "INSERT INTO t1 SELECT SLEEP(4)";
+--source include/wait_condition.inc
+
+--echo # Trying to stop slave
+STOP SLAVE;
+--source include/wait_for_slave_to_stop.inc
+
+--echo # Start slave again
+--source include/start_slave.inc
+
+--echo # Clean up
+--connection master
+DROP TABLE t1;
+sync_slave_with_master;
+
 # End of tests

=== modified file 'mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result	2010-04-28 12:47:49 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result	2010-09-29 00:09:28 +0000
@@ -43,3 +43,17 @@ one
 1
 include/start_slave.inc
 drop table t1i, t2m;
+#
+# Bug#56096 STOP SLAVE hangs if executed in parallel with user sleep
+#
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT );
+INSERT INTO t1 SELECT SLEEP(4);
+Warnings:
+Note	1592	Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
+# Trying to stop slave
+STOP SLAVE;
+# Start slave again
+include/start_slave.inc
+# Clean up
+DROP TABLE t1;

=== modified file 'sql/item_func.cc'
--- a/sql/item_func.cc	2010-08-25 10:22:34 +0000
+++ b/sql/item_func.cc	2010-09-29 00:09:28 +0000
@@ -3685,36 +3685,40 @@ longlong Item_master_pos_wait::val_int()
   @param lock the associated mutex
   @param abstime the amount of time in seconds to wait
 
+  @remark The absolute time might be preserved across calls.
+
   @retval return value from mysql_cond_timedwait
 */
 
 #define INTERRUPT_INTERVAL (5 * ULL(1000000000))
 
 static int interruptible_wait(THD *thd, mysql_cond_t *cond,
-                              mysql_mutex_t *lock, double time)
+                              mysql_mutex_t *lock,
+                              struct timespec *abstime)
 {
   int error;
-  struct timespec abstime;
-  ulonglong slice, timeout= (ulonglong) (time * 1000000000.0);
+  struct timespec timeout;
 
-  do
+  while (1)
   {
     /* Wait for a fixed interval. */
-    if (timeout > INTERRUPT_INTERVAL)
-      slice= INTERRUPT_INTERVAL;
-    else
-      slice= timeout;
+    set_timespec_nsec(timeout, INTERRUPT_INTERVAL)
+
+    /* But only if not past the absolute time. */
+    if (timeout.tv_sec >= abstime->tv_sec)
+      timeout= *abstime;
 
-    timeout-= slice;
-    set_timespec_nsec(abstime, slice);
-    error= mysql_cond_timedwait(cond, lock, &abstime);
+    error= mysql_cond_timedwait(cond, lock, &timeout);
     if (error == ETIMEDOUT || error == ETIME)
     {
       /* Return error if timed out or connection is broken. */
-      if (!timeout || !thd->is_connected())
+      if ((timeout.tv_sec == abstime->tv_sec) || !thd->is_connected())
         break;
     }
-  } while (error && timeout);
+    /* Otherwise, propagate status to the caller. */
+    else
+      break;
+  }
 
   return error;
 }
@@ -3734,7 +3738,8 @@ longlong Item_func_get_lock::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   String *res=args[0]->val_str(&value);
-  double timeout= args[1]->val_real();
+  longlong timeout=args[1]->val_int();
+  struct timespec abstime;
   THD *thd=current_thd;
   User_level_lock *ull;
   int error;
@@ -3798,11 +3803,18 @@ longlong Item_func_get_lock::val_int()
   thd->mysys_var->current_mutex= &LOCK_user_locks;
   thd->mysys_var->current_cond=  &ull->cond;
 
+  /*
+    Calculate the absolute system time at the start so it can
+    be controlled in slices. It relies on the fact that once
+    the absolute time passes, the timed wait call will fail
+    automatically with a timeout error.
+  */
+  set_timespec(abstime, timeout);
   error= 0;
   while (ull->locked && !thd->killed)
   {
     DBUG_PRINT("info", ("waiting on lock"));
-    error= interruptible_wait(thd, &ull->cond, &LOCK_user_locks, timeout);
+    error= interruptible_wait(thd, &ull->cond, &LOCK_user_locks, &abstime);
     if (error == ETIMEDOUT || error == ETIME)
     {
       DBUG_PRINT("info", ("lock wait timeout"));
@@ -3998,6 +4010,7 @@ longlong Item_func_sleep::val_int()
 {
   THD *thd= current_thd;
   mysql_cond_t cond;
+  struct timespec abstime;
   double timeout;
   int error;
 
@@ -4016,6 +4029,14 @@ longlong Item_func_sleep::val_int()
   if (timeout < 0.00001)
     return 0;
 
+  /*
+    Calculate the absolute system time at the start so it can
+    be controlled in slices. It relies on the fact that once
+    the absolute time passes, the interruptible wait call will
+    fail automatically with a timeout error.
+  */
+  set_timespec_nsec(abstime, (ulonglong) (timeout * 1000000000.0));
+
   mysql_cond_init(key_item_func_sleep_cond, &cond, NULL);
   mysql_mutex_lock(&LOCK_user_locks);
 
@@ -4026,7 +4047,7 @@ longlong Item_func_sleep::val_int()
   error= 0;
   while (!thd->killed)
   {
-    error= interruptible_wait(thd, &cond, &LOCK_user_locks, timeout);
+    error= interruptible_wait(thd, &cond, &LOCK_user_locks, &abstime);
     if (error == ETIMEDOUT || error == ETIME)
       break;
     error= 0;


Attachment: [text/bzr-bundle] bzr/davi.arnaut@oracle.com-20100929000928-ljs2bqbfxfi0uqcm.bundle
Thread
bzr commit into mysql-5.5-bugfixing branch (davi:3206) Bug#56096Davi Arnaut29 Sep
Re: bzr commit into mysql-5.5-bugfixing branch (davi:3206) Bug#56096Konstantin Osipov12 Oct