Below is the list of changes that have just been committed into a local
5.1 repository of mats. When mats does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-08-23 14:48:21+02:00, mats@stripped +4 -0
BUG#29968 (rpl_ndb_circular.test and rpl_ndb_log.test fail):
There appears to be a bug in some implementations of
pthread_cond_timedwait() in that it does not lock the associated mutex
when returning, so this patch attempts a workaround to acquire the mutex
even in that case to prevent the mutex from being destroyed while still
being locked by the the slave thread.
Also implementing some change to the safe_mutex_lock() to allow
pthread_mutex_trylock() to return immediately when the lock is busy.
include/my_pthread.h@stripped, 2007-08-23 14:48:15+02:00, mats@stripped +3 -3
Changing signature of safe_mutex_lock()
mysys/thr_mutex.c@stripped, 2007-08-23 14:48:15+02:00, mats@stripped +16 -6
Changed safe_mutex_lock() to be able to behave as pthread_mutex_trylock()
by adding code that returns EBUSY when the mutex is busy (the count is
more than 0).
From the specification of pthread_mutex_trylock():
The pthread_mutex_trylock() function shall be equivalent to
pthread_mutex_lock(), except that if the mutex object referenced by
mutex is currently locked (by any thread, including the current
thread), the call shall return immediately.
sql/slave.cc@stripped, 2007-08-23 14:48:16+02:00, mats@stripped +126 -19
Making terminate_slave_thread() static since it is not used elsewhere.
Adding code to handle buggy implementation of pthread_cond_timedwait().
Changing signature of terminate_slave_thread() to accept a skip_lock
parameter instead of two mutexes. This mimics the signature of the
terminate_slave_threads() function. Code is also changed as a result
of this.
sql/slave.h@stripped, 2007-08-23 14:48:16+02:00, mats@stripped +0 -4
Moving terminate_slave_thread() to use internal linkage.
diff -Nrup a/include/my_pthread.h b/include/my_pthread.h
--- a/include/my_pthread.h 2007-06-25 18:21:16 +02:00
+++ b/include/my_pthread.h 2007-08-23 14:48:15 +02:00
@@ -491,7 +491,7 @@ typedef struct st_safe_mutex_info_t
int safe_mutex_init(safe_mutex_t *mp, const pthread_mutexattr_t *attr,
const char *file, uint line);
-int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line);
+int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint line);
int safe_mutex_unlock(safe_mutex_t *mp,const char *file, uint line);
int safe_mutex_destroy(safe_mutex_t *mp,const char *file, uint line);
int safe_cond_wait(pthread_cond_t *cond, safe_mutex_t *mp,const char *file,
@@ -514,12 +514,12 @@ void safe_mutex_end(FILE *file);
#undef pthread_cond_timedwait
#undef pthread_mutex_trylock
#define pthread_mutex_init(A,B) safe_mutex_init((A),(B),__FILE__,__LINE__)
-#define pthread_mutex_lock(A) safe_mutex_lock((A),__FILE__,__LINE__)
+#define pthread_mutex_lock(A) safe_mutex_lock((A), FALSE, __FILE__, __LINE__)
#define pthread_mutex_unlock(A) safe_mutex_unlock((A),__FILE__,__LINE__)
#define pthread_mutex_destroy(A) safe_mutex_destroy((A),__FILE__,__LINE__)
#define pthread_cond_wait(A,B) safe_cond_wait((A),(B),__FILE__,__LINE__)
#define pthread_cond_timedwait(A,B,C) safe_cond_timedwait((A),(B),(C),__FILE__,__LINE__)
-#define pthread_mutex_trylock(A) pthread_mutex_lock(A)
+#define pthread_mutex_trylock(A) safe_mutex_lock((A), TRUE, __FILE__, __LINE__)
#define pthread_mutex_t safe_mutex_t
#define safe_mutex_assert_owner(mp) \
DBUG_ASSERT((mp)->count > 0 && \
diff -Nrup a/mysys/thr_mutex.c b/mysys/thr_mutex.c
--- a/mysys/thr_mutex.c 2007-02-23 12:13:48 +01:00
+++ b/mysys/thr_mutex.c 2007-08-23 14:48:15 +02:00
@@ -91,7 +91,7 @@ int safe_mutex_init(safe_mutex_t *mp,
}
-int safe_mutex_lock(safe_mutex_t *mp,const char *file, uint line)
+int safe_mutex_lock(safe_mutex_t *mp, my_bool try_lock, const char *file, uint line)
{
int error;
if (!mp->file)
@@ -104,12 +104,22 @@ int safe_mutex_lock(safe_mutex_t *mp,con
}
pthread_mutex_lock(&mp->global);
- if (mp->count > 0 && pthread_equal(pthread_self(),mp->thread))
+ if (mp->count > 0)
{
- fprintf(stderr,"safe_mutex: Trying to lock mutex at %s, line %d, when the mutex was already locked at %s, line %d in thread %s\n",
- file,line,mp->file, mp->line, my_thread_name());
- fflush(stderr);
- abort();
+ if (try_lock)
+ {
+ pthread_mutex_unlock(&mp->global);
+ return EBUSY;
+ }
+ else if pthread_equal(pthread_self(),mp->thread)
+ {
+ fprintf(stderr,
+ "safe_mutex: Trying to lock mutex at %s, line %d, when the"
+ " mutex was already locked at %s, line %d in thread %s\n",
+ file,line,mp->file, mp->line, my_thread_name());
+ fflush(stderr);
+ abort();
+ }
}
pthread_mutex_unlock(&mp->global);
error=pthread_mutex_lock(&mp->mutex);
diff -Nrup a/sql/slave.cc b/sql/slave.cc
--- a/sql/slave.cc 2007-07-27 02:26:39 +02:00
+++ b/sql/slave.cc 2007-08-23 14:48:16 +02:00
@@ -132,6 +132,11 @@ static int create_table_from_dump(THD* t
const char* table_name, bool overwrite);
static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi);
static Log_event* next_event(RELAY_LOG_INFO* rli);
+static int terminate_slave_thread(THD* thd,
+ pthread_mutex_t* term_lock,
+ pthread_cond_t* term_cond,
+ volatile uint *slave_running,
+ bool skip_lock);
/*
Find out which replications threads are running
@@ -312,23 +317,15 @@ int terminate_slave_threads(MASTER_INFO*
DBUG_RETURN(0); /* successfully do nothing */
int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
- pthread_mutex_t *sql_cond_lock,*io_cond_lock;
- sql_cond_lock=sql_lock;
- io_cond_lock=io_lock;
-
- if (skip_lock)
- {
- sql_lock = io_lock = 0;
- }
if ((thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) && mi->slave_running)
{
DBUG_PRINT("info",("Terminating IO thread"));
mi->abort_slave=1;
if ((error=terminate_slave_thread(mi->io_thd,io_lock,
- io_cond_lock,
&mi->stop_cond,
- &mi->slave_running)) &&
+ &mi->slave_running,
+ skip_lock)) &&
!force_all)
DBUG_RETURN(error);
}
@@ -338,9 +335,9 @@ int terminate_slave_threads(MASTER_INFO*
DBUG_ASSERT(mi->rli.sql_thd != 0) ;
mi->rli.abort_slave=1;
if ((error=terminate_slave_thread(mi->rli.sql_thd,sql_lock,
- sql_cond_lock,
&mi->rli.stop_cond,
- &mi->rli.slave_running)) &&
+ &mi->rli.slave_running,
+ skip_lock)) &&
!force_all)
DBUG_RETURN(error);
}
@@ -348,13 +345,46 @@ int terminate_slave_threads(MASTER_INFO*
}
-int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
- pthread_mutex_t *cond_lock,
- pthread_cond_t* term_cond,
- volatile uint *slave_running)
+/**
+ Wait for a slave thread to terminate.
+
+ This function is called after requesting the thread to terminate
+ (by setting @c abort_slave member of @c Relay_log_info or @c
+ Master_info structure to 1). Termination of the thread is
+ controlled with the the predicate <code>*slave_running</code>.
+
+ Function will acquire @c term_lock before waiting on the condition
+ unless @c skip_lock is true in which case the mutex should be owned
+ by the caller of this function and will remain acquired after
+ return from the function.
+
+ @param term_lock
+ Associated lock to use when waiting for @c term_cond
+
+ @param term_cond
+ Condition that is signalled when the thread has terminated
+
+ @param slave_running
+ Pointer to predicate to check for slave thread termination
+
+ @param skip_lock
+ If @c true the lock will not be acquired before waiting on
+ the condition. In this case, it is assumed that the calling
+ function acquires the lock before calling this function.
+
+ @retval 0 All OK
+ */
+static int
+terminate_slave_thread(THD* thd,
+ pthread_mutex_t* term_lock,
+ pthread_cond_t* term_cond,
+ volatile uint *slave_running,
+ bool skip_lock)
{
+ int error;
+
DBUG_ENTER("terminate_slave_thread");
- if (term_lock)
+ if (!skip_lock)
{
pthread_mutex_lock(term_lock);
if (!*slave_running)
@@ -380,9 +410,86 @@ int terminate_slave_thread(THD* thd, pth
*/
struct timespec abstime;
set_timespec(abstime,2);
- pthread_cond_timedwait(term_cond, cond_lock, &abstime);
+ error= pthread_cond_timedwait(term_cond, term_lock, &abstime);
+ DBUG_ASSERT(error == ETIMEDOUT || error == 0);
}
- if (term_lock)
+
+ DBUG_ASSERT(*slave_running == 0);
+
+ /*
+ BUG#29968: At this point, cond_lock should be held since
+ pthread_cond_timedwait() always acquire the associated mutex even
+ in the case of an error. However, it appears to be a bug in some
+ thread libraries in that the function can return without acquiring
+ the mutex (see http://bugs.mysql.com/bug.php?id=29968 for more
+ info), so we have to take some precautions to ensure that the
+ slave thread really have terminated (i.e., executed the
+ pthread_mutex_unlock(cond_lock) statement) and we own the
+ cond_lock mutex.
+
+ We are assuming that the basic operations on mutexes work as they
+ should, but that there is something with the handling of
+ conditions that causes the premature return with the mutex
+ unlocked. If not: there is not really much we can do except advice
+ use of a working POSIX threads library.
+
+ If we could just lock and unlock the cond_lock, we would be home
+ free, but since calling pthread_mutex_lock() when we already own
+ the mutex might cause a deadlock, we cannot do this and instead
+ have to resort to the method below.
+
+ Since *slave_running == 0, we know that the slave thread is
+ executing somewhere after it being set to 0 and the end of the
+ function (either handle_slave_io or handle_slave_sql). The only
+ things that the slave thread does, is signalling some conditions
+ and unlocking some mutexes, i.e., nothing that can block the
+ thread and prevent it from terminating.
+
+ To see if we have the mutex or not, we try to lock it.
+ */
+ error= pthread_mutex_trylock(term_lock);
+
+ /*
+ We now have three cases to consider:
+ 0 The mutex was not locked and we now own it
+ EBUSY The mutex was locked either by us or by the slave thread
+
+ If we got 0, there is nothing more to do, otherwise, we have to
+ decide if we own the mutex and the slave thread has terminated, or
+ if the slave thread is still executing and we need to wait for it
+ to terminate.
+ */
+ DBUG_ASSERT(error == 0 || error == EBUSY);
+ if (error == EBUSY)
+ {
+ /*
+ Here we know that the mutex was locked either by us or by the
+ slave thread. By unlocking it, we can check if we did own it. In
+ the event that the mutex is released by the slave thread, or the
+ slave thread owns it, this call is harmless (and will return the
+ same error in both cases).
+ */
+ error= pthread_mutex_unlock(term_lock);
+
+ /*
+ We now have two cases:
+ 0 We owned the mutex and have now released it
+ EPERM We didn't own the mutex (and doesn't after the call either)
+
+ In either case, we do not own the mutex right now, so it is safe
+ to call pthread_mutex_lock() to acquire the mutex.
+ */
+ DBUG_ASSERT(error == 0 || error == EPERM);
+ pthread_mutex_lock(term_lock);
+ }
+
+ /*
+ Here, we own the cond_lock mutex and am not relying on
+ pthread_cond_timedwait() to work correctly, we are just relying on
+ pthread_mutex_{lock,trylock,unlock} to work correctly.
+ */
+
+ if (!skip_lock)
pthread_mutex_unlock(term_lock);
DBUG_RETURN(0);
}
diff -Nrup a/sql/slave.h b/sql/slave.h
--- a/sql/slave.h 2007-06-09 08:29:44 +02:00
+++ b/sql/slave.h 2007-08-23 14:48:16 +02:00
@@ -133,10 +133,6 @@ bool flush_relay_log_info(RELAY_LOG_INFO
int register_slave_on_master(MYSQL* mysql);
int terminate_slave_threads(MASTER_INFO* mi, int thread_mask,
bool skip_lock = 0);
-int terminate_slave_thread(THD* thd, pthread_mutex_t* term_mutex,
- pthread_mutex_t* cond_lock,
- pthread_cond_t* term_cond,
- volatile uint* slave_running);
int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
MASTER_INFO* mi, const char* master_info_fname,
const char* slave_info_fname, int thread_mask);