MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Davi Arnaut Date:October 14 2010 1:54am
Subject:bzr commit into mysql-5.5-bugteam branch (davi:3244) Bug#56096
View as plain text  
# At a local mysql-5.5-bugteam repository of davi

 3244 Davi Arnaut	2010-10-13
      Bug#56096: STOP SLAVE hangs if executed in parallel with user sleep
      
      The root of the problem is that to interrupt a slave SQL thread
      wait, the STOP SLAVE implementation uses thd->awake(THD::NOT_KILLED).
      This appears as a spurious wakeup (e.g. from a sleep on a
      condition variable) to the code that the slave SQL thread is
      executing at the time of the STOP. If the code is not written
      to be spurious-wakeup safe, unexpected behavior can occur. For
      the reported case, this problem led to an infinite loop around
      the interruptible_wait() function in item_func.cc (SLEEP()
      function implementation).  The loop was not being properly
      restarted and, consequently, would not come to an end. Since the
      SLEEP function sleeps on a timed event in order to be killable
      and to perform periodic checks until the requested time has
      elapsed, the spurious wake up was causing the requested sleep
      time to be reset every two seconds.
      
      The solution is to calculate the requested absolute time only
      once and to ensure that the thread only sleeps until this
      time is elapsed. In case of a spurious wake up, the sleep is
      restarted using the previously calculated absolute time. This
      restores the behavior present in previous releases. If a slave
      thread is executing a SLEEP function, a STOP SLAVE statement
      will wait until the time requested in the sleep function
      has elapsed.
     @ mysql-test/extra/rpl_tests/rpl_start_stop_slave.test
        Add test case for Bug#56096.
     @ mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result
        Add test case result for Bug#56096.
     @ sql/item_func.cc
        Reorganize interruptible_wait into a class so that the absolute
        time can be preserved across calls to the wait function. This
        allows the sleep to be properly restarted in the presence of
        spurious wake ups, including those generated by a STOP SLAVE.

    modified:
      mysql-test/extra/rpl_tests/rpl_start_stop_slave.test
      mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result
      sql/item_func.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_start_stop_slave.test'
--- a/mysql-test/extra/rpl_tests/rpl_start_stop_slave.test	2010-04-28 12:47:49 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_start_stop_slave.test	2010-10-14 01:54:07 +0000
@@ -122,4 +122,60 @@ drop table t1i, t2m;
 
 sync_slave_with_master;
 
+--echo #
+--echo # Bug#56096 STOP SLAVE hangs if executed in parallel with user sleep
+--echo #
+
+--connection master
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (a INT );
+
+sync_slave_with_master;
+
+--connection slave1
+--echo # Slave1: lock table for synchronization
+LOCK TABLES t1 WRITE;
+
+--connection master
+--echo # Master: insert into the table
+INSERT INTO t1 SELECT SLEEP(4);
+
+--connection slave
+--echo # Slave: wait for the insert
+let $wait_condition=
+  SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
+  WHERE STATE = "Waiting for table metadata lock"
+  AND INFO = "INSERT INTO t1 SELECT SLEEP(4)";
+--source include/wait_condition.inc
+
+--echo # Slave: send slave stop
+--send STOP SLAVE
+
+--connection slave1
+--echo # Slave1: wait for stop slave
+let $wait_condition=
+  SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
+  WHERE INFO = "STOP SLAVE";
+--source include/wait_condition.inc
+
+--echo # Slave1: unlock the table
+UNLOCK TABLES;
+
+--connection slave
+--echo # Slave: wait for the slave to stop
+--reap
+--source include/wait_for_slave_to_stop.inc
+
+--echo # Start slave again
+--source include/start_slave.inc
+
+--echo # Clean up
+--connection master
+DROP TABLE t1;
+sync_slave_with_master;
+
 # End of tests

=== modified file 'mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result	2010-04-28 12:47:49 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result	2010-10-14 01:54:07 +0000
@@ -43,3 +43,25 @@ one
 1
 include/start_slave.inc
 drop table t1i, t2m;
+#
+# Bug#56096 STOP SLAVE hangs if executed in parallel with user sleep
+#
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (a INT );
+# Slave1: lock table for synchronization
+LOCK TABLES t1 WRITE;
+# Master: insert into the table
+INSERT INTO t1 SELECT SLEEP(4);
+Warnings:
+Note	1592	Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
+# Slave: wait for the insert
+# Slave: send slave stop
+STOP SLAVE;
+# Slave1: wait for stop slave
+# Slave1: unlock the table
+UNLOCK TABLES;
+# Slave: wait for the slave to stop
+# Start slave again
+include/start_slave.inc
+# Clean up
+DROP TABLE t1;

=== modified file 'sql/item_func.cc'
--- a/sql/item_func.cc	2010-10-05 11:33:54 +0000
+++ b/sql/item_func.cc	2010-10-14 01:54:07 +0000
@@ -3691,48 +3691,92 @@ longlong Item_master_pos_wait::val_int()
 }
 
 
+/**
+  Enables a session to wait on a condition until a timeout or a network
+  disconnect occurs.
+
+  @remark The connection is polled every m_interrupt_interval nanoseconds.
+*/
+
+class Interruptible_wait
+{
+  THD *m_thd;
+  struct timespec m_abs_timeout;
+  static const ulonglong m_interrupt_interval;
+
+  public:
+    Interruptible_wait(THD *thd)
+    : m_thd(thd) {}
+
+    ~Interruptible_wait() {}
+
+  public:
+    /**
+      Set the absolute timeout.
+
+      @param timeout The amount of time in nanoseconds to wait
+    */
+    void set_timeout(ulonglong timeout)
+    {
+      /*
+        Calculate the absolute system time at the start so it can
+        be controlled in slices. It relies on the fact that once
+        the absolute time passes, the timed wait call will fail
+        automatically with a timeout error.
+      */
+      set_timespec_nsec(m_abs_timeout, timeout);
+    }
+
+    /** The timed wait. */
+    int wait(mysql_cond_t *, mysql_mutex_t *);
+};
+
+
+/** Time to wait before polling the connection status. */
+const ulonglong Interruptible_wait::m_interrupt_interval= 5 * ULL(1000000000);
+
 
 /**
-  Wait for a given condition to be signaled within the specified timeout.
+  Wait for a given condition to be signaled.
+
+  @param cond   The condition variable to wait on.
+  @param mutex  The associated mutex.
 
-  @param cond the condition variable to wait on
-  @param lock the associated mutex
-  @param abstime the amount of time in seconds to wait
+  @remark The absolute timeout is preserved across calls.
 
   @retval return value from mysql_cond_timedwait
 */
 
-#define INTERRUPT_INTERVAL (5 * ULL(1000000000))
-
-static int interruptible_wait(THD *thd, mysql_cond_t *cond,
-                              mysql_mutex_t *lock, double time)
+int Interruptible_wait::wait(mysql_cond_t *cond, mysql_mutex_t *mutex)
 {
   int error;
-  struct timespec abstime;
-  ulonglong slice, timeout= (ulonglong) (time * 1000000000.0);
+  struct timespec timeout;
 
-  do
+  while (1)
   {
     /* Wait for a fixed interval. */
-    if (timeout > INTERRUPT_INTERVAL)
-      slice= INTERRUPT_INTERVAL;
-    else
-      slice= timeout;
+    set_timespec_nsec(timeout, m_interrupt_interval);
+
+    /* But only if not past the absolute timeout. */
+    if (cmp_timespec(timeout, m_abs_timeout) > 0)
+      timeout= m_abs_timeout;
 
-    timeout-= slice;
-    set_timespec_nsec(abstime, slice);
-    error= mysql_cond_timedwait(cond, lock, &abstime);
+    error= mysql_cond_timedwait(cond, mutex, &timeout);
     if (error == ETIMEDOUT || error == ETIME)
     {
       /* Return error if timed out or connection is broken. */
-      if (!timeout || !thd->is_connected())
+      if (!cmp_timespec(timeout, m_abs_timeout) || !m_thd->is_connected())
         break;
     }
-  } while (error && timeout);
+    /* Otherwise, propagate status to the caller. */
+    else
+      break;
+  }
 
   return error;
 }
 
+
 /**
   Get a user level lock.  If the thread has an old lock this is first released.
 
@@ -3748,10 +3792,11 @@ longlong Item_func_get_lock::val_int()
 {
   DBUG_ASSERT(fixed == 1);
   String *res=args[0]->val_str(&value);
-  double timeout= args[1]->val_real();
+  ulonglong timeout= args[1]->val_int();
   THD *thd=current_thd;
   User_level_lock *ull;
   int error;
+  Interruptible_wait timed_cond(thd);
   DBUG_ENTER("Item_func_get_lock::val_int");
 
   /*
@@ -3812,11 +3857,13 @@ longlong Item_func_get_lock::val_int()
   thd->mysys_var->current_mutex= &LOCK_user_locks;
   thd->mysys_var->current_cond=  &ull->cond;
 
+  timed_cond.set_timeout(timeout * ULL(1000000000));
+
   error= 0;
   while (ull->locked && !thd->killed)
   {
     DBUG_PRINT("info", ("waiting on lock"));
-    error= interruptible_wait(thd, &ull->cond, &LOCK_user_locks, timeout);
+    error= timed_cond.wait(&ull->cond, &LOCK_user_locks);
     if (error == ETIMEDOUT || error == ETIME)
     {
       DBUG_PRINT("info", ("lock wait timeout"));
@@ -4011,6 +4058,7 @@ void Item_func_benchmark::print(String *
 longlong Item_func_sleep::val_int()
 {
   THD *thd= current_thd;
+  Interruptible_wait timed_cond(thd);
   mysql_cond_t cond;
   double timeout;
   int error;
@@ -4030,6 +4078,8 @@ longlong Item_func_sleep::val_int()
   if (timeout < 0.00001)
     return 0;
 
+  timed_cond.set_timeout((ulonglong) (timeout * 1000000000.0));
+
   mysql_cond_init(key_item_func_sleep_cond, &cond, NULL);
   mysql_mutex_lock(&LOCK_user_locks);
 
@@ -4040,7 +4090,7 @@ longlong Item_func_sleep::val_int()
   error= 0;
   while (!thd->killed)
   {
-    error= interruptible_wait(thd, &cond, &LOCK_user_locks, timeout);
+    error= timed_cond.wait(&cond, &LOCK_user_locks);
     if (error == ETIMEDOUT || error == ETIME)
       break;
     error= 0;


Attachment: [text/bzr-bundle] bzr/davi.arnaut@oracle.com-20101014015407-0hhdj147bwsa0gxt.bundle
Thread
bzr commit into mysql-5.5-bugteam branch (davi:3244) Bug#56096Davi Arnaut14 Oct