List:Commits« Previous MessageNext Message »
From:Mats Kindahl Date:August 21 2007 2:00pm
Subject:bk commit into 5.1 tree (mats:1.2555) BUG#29968
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of mats. When mats does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-08-21 16:00:21+02:00, mats@stripped +2 -0
  BUG#29968 (rpl_ndb_circular.test and rpl_ndb_log.test fail):
  
  There appears to be a bug in some implementations of
  pthread_cond_timedwait() in that it does not lock the associated mutex
  when returning, so this patch attempts a workaround to acquire the mutex
  even in that case to prevent the mutex from being destroyed while still
  being locked by the the slave thread.

  sql/slave.cc@stripped, 2007-08-21 16:00:09+02:00, mats@stripped +90 -5
    Making terminate_slave_thread() static since it is not used elsewhere.
    Adding code to handle buggy implementation of pthread_cond_timedwait().

  sql/slave.h@stripped, 2007-08-21 16:00:09+02:00, mats@stripped +0 -4
    Moving terminate_slave_thread() to use internal linkage.

diff -Nrup a/sql/slave.cc b/sql/slave.cc
--- a/sql/slave.cc	2007-07-27 02:26:39 +02:00
+++ b/sql/slave.cc	2007-08-21 16:00:09 +02:00
@@ -132,6 +132,11 @@ static int create_table_from_dump(THD* t
                                   const char* table_name, bool overwrite);
 static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi);
 static Log_event* next_event(RELAY_LOG_INFO* rli);
+static int terminate_slave_thread(THD* thd,
+                                  pthread_mutex_t* term_lock,
+                                  pthread_mutex_t *cond_lock,
+                                  pthread_cond_t* term_cond,
+                                  volatile uint *slave_running);
 
 /*
   Find out which replications threads are running
@@ -348,11 +353,14 @@ int terminate_slave_threads(MASTER_INFO*
 }
 
 
-int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
-                           pthread_mutex_t *cond_lock,
-                           pthread_cond_t* term_cond,
-                           volatile uint *slave_running)
+static int
+terminate_slave_thread(THD* thd,
+                       pthread_mutex_t* term_lock,
+                       pthread_mutex_t *cond_lock, pthread_cond_t* term_cond,
+                       volatile uint *slave_running)
 {
+  int error;
+
   DBUG_ENTER("terminate_slave_thread");
   if (term_lock)
   {
@@ -380,8 +388,85 @@ int terminate_slave_thread(THD* thd, pth
     */
     struct timespec abstime;
     set_timespec(abstime,2);
-    pthread_cond_timedwait(term_cond, cond_lock, &abstime);
+    error= pthread_cond_timedwait(term_cond, cond_lock, &abstime);
+    DBUG_ASSERT(error == ETIMEOUT || error == 0);
+  }
+
+  DBUG_ASSERT(*slave_running == 0);
+
+  /*
+    BUG#29968: At this point, cond_lock should be held since
+    pthread_cond_timedwait() always acquire the associated mutex even
+    in the case of an error. However, it appears to be a bug in some
+    thread libraries in that the function can return without acquiring
+    the mutex (see http://bugs.mysql.com/bug.php?id=29968 for more
+    info), so we have to take some precautions to ensure that the
+    slave thread really have terminated (i.e., executed the
+    pthread_mutex_unlock(cond_lock) statement) and we own the
+    cond_lock mutex.
+
+    We are assuming that the basic operations on mutexes work as they
+    should, but that there is something with the handling of
+    conditions that causes the premature return with the mutex
+    unlocked. If not: there is not really much we can do except advice
+    use of a working POSIX threads library.
+
+    If we could just lock and unlock the cond_lock, we would be home
+    free, but since calling pthread_mutex_lock() when we already own
+    the mutex might cause a deadlock, we cannot do this and instead
+    have to resort to the method below.
+
+    Since *slave_running == 0, we know that the slave thread is
+    executing somewhere after it being set to 0 and the end of the
+    function (either handle_slave_io or handle_slave_sql). The only
+    things that the slave thread does, is signalling some conditions
+    and unlocking some mutexes, i.e., nothing that can block the
+    thread and prevent it from terminating.
+
+    To see if we have the mutex or not, we try to lock it.
+   */
+  error= pthread_mutex_trylock(cond_lock);
+
+  /*
+    We now have three cases to consider:
+    0     The mutex was not locked and we now own it
+    EBUSY The mutex was locked either by us or by the slave thread
+
+    If we got 0, there is nothing more to do, otherwise, we have to
+    decide if we own the mutex and the slave thread has terminated, or
+    if the slave thread is still executing and we need to wait for it
+    to terminate.
+   */
+  DBUG_ASSERT(error == 0 || error == EBUSY);
+  if (error == EBUSY)
+  {
+    /*
+      Here we know that the mutex was locked either by us or by the
+      slave thread. By unlocking it, we can check if we did own it. In
+      the event that the mutex is released by the slave thread, or the
+      slave thread owns it, this call is harmless (and will return the
+      same error in both cases).
+     */
+    error= pthread_mutex_unlock(cond_lock);
+
+    /*
+      We now have two cases:
+      0     We owned the mutex and have now released it
+      EPERM We didn't own the mutex (and doesn't after the call either)
+
+      In either case, we do not own the mutex right now, so it is safe
+      to call pthread_mutex_lock() to acquire the mutex.
+    */
+    DBUG_ASSERT(error == 0 || error == EPERM);
+    pthread_mutex_lock(cond_lock);
   }
+
+  /*
+    Here, we own the cond_lock mutex and am not relying on
+    pthread_cond_timedwait() to work correctly, we are just relying on
+    pthread_mutex_{lock,trylock,unlock} to work correctly.
+   */
+
   if (term_lock)
     pthread_mutex_unlock(term_lock);
   DBUG_RETURN(0);
diff -Nrup a/sql/slave.h b/sql/slave.h
--- a/sql/slave.h	2007-06-09 08:29:44 +02:00
+++ b/sql/slave.h	2007-08-21 16:00:09 +02:00
@@ -133,10 +133,6 @@ bool flush_relay_log_info(RELAY_LOG_INFO
 int register_slave_on_master(MYSQL* mysql);
 int terminate_slave_threads(MASTER_INFO* mi, int thread_mask,
 			     bool skip_lock = 0);
-int terminate_slave_thread(THD* thd, pthread_mutex_t* term_mutex,
-			   pthread_mutex_t* cond_lock,
-			   pthread_cond_t* term_cond,
-			   volatile uint* slave_running);
 int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
 			MASTER_INFO* mi, const char* master_info_fname,
 			const char* slave_info_fname, int thread_mask);
Thread
bk commit into 5.1 tree (mats:1.2555) BUG#29968Mats Kindahl21 Aug
  • Re: bk commit into 5.1 tree (mats:1.2555) BUG#29968Rafal Somla21 Aug
    • Re: bk commit into 5.1 tree (mats:1.2555) BUG#29968Mats Kindahl21 Aug
      • Re: bk commit into 5.1 tree (mats:1.2555) BUG#29968Rafal Somla22 Aug
        • Re: bk commit into 5.1 tree (mats:1.2555) BUG#29968Mats Kindahl22 Aug