Mats, hej.
The patch is good.
> Below is the list of changes that have just been committed into a local
> 5.1 repository of mats. When mats does a push these changes will
> be propagated to the main repository and, within 24 hours after the
> push, to the public repository.
> For information on how to access the public repository
> see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
>
> ChangeSet@stripped, 2007-08-29 09:15:00+02:00, mats@stripped +2 -0
> BUG#29968 (rpl_ndb_circular.test and rpl_ndb_log.test fail):
>
> Removing unguarded read of slave_running field from inside
> terminate_slave_threads(). This could cause premature exit in the event
> that the slave thread already were shutting down, but isn't finished yet.
>
> sql/slave.cc@stripped, 2007-08-29 09:14:51+02:00, mats@stripped +55
> -21
> Making terminate_slave_thread() static since it is not used elsewhere.
> Adding code to handle buggy implementation of pthread_cond_timedwait().
>
> Changing signature of terminate_slave_thread() to accept a skip_lock
> parameter instead of two mutexes. This mimics the signature of the
> terminate_slave_threads() function. Code is also changed as a result
> of this.
>
One of the reasons we have been hunting after this bunch of bugs for
some time was ugly code of terminate_slave_threads().
> Removing unguarded check of slave_running field in the master info and
> relay log info structure since that could cause premature exit of
> terminate_slave_threads().
>
> sql/slave.h@stripped, 2007-08-29 09:14:52+02:00, mats@stripped +0
> -4
> Moving terminate_slave_thread() to use internal linkage.
>
> diff -Nrup a/sql/slave.cc b/sql/slave.cc
> --- a/sql/slave.cc 2007-07-27 02:26:39 +02:00
> +++ b/sql/slave.cc 2007-08-29 09:14:51 +02:00
> @@ -132,6 +132,11 @@ static int create_table_from_dump(THD* t
> const char* table_name, bool overwrite);
> static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi);
> static Log_event* next_event(RELAY_LOG_INFO* rli);
> +static int terminate_slave_thread(THD* thd,
> + pthread_mutex_t* term_lock,
> + pthread_cond_t* term_cond,
> + volatile uint *slave_running,
> + bool skip_lock);
>
> /*
> Find out which replications threads are running
> @@ -312,35 +317,27 @@ int terminate_slave_threads(MASTER_INFO*
> DBUG_RETURN(0); /* successfully do nothing */
> int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
> pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock =
> &mi->run_lock;
> - pthread_mutex_t *sql_cond_lock,*io_cond_lock;
>
> - sql_cond_lock=sql_lock;
> - io_cond_lock=io_lock;
> -
> - if (skip_lock)
> - {
> - sql_lock = io_lock = 0;
> - }
> - if ((thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) &&
> mi->slave_running)
> + if ((thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)))
> {
> DBUG_PRINT("info",("Terminating IO thread"));
> mi->abort_slave=1;
> if ((error=terminate_slave_thread(mi->io_thd,io_lock,
> - io_cond_lock,
> &mi->stop_cond,
> - &mi->slave_running)) &&
> + &mi->slave_running,
> + skip_lock)) &&
> !force_all)
> DBUG_RETURN(error);
> }
> - if ((thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)) &&
> mi->rli.slave_running)
> + if ((thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)))
> {
> DBUG_PRINT("info",("Terminating SQL thread"));
> DBUG_ASSERT(mi->rli.sql_thd != 0) ;
> mi->rli.abort_slave=1;
> if ((error=terminate_slave_thread(mi->rli.sql_thd,sql_lock,
> - sql_cond_lock,
> &mi->rli.stop_cond,
> - &mi->rli.slave_running)) &&
> + &mi->rli.slave_running,
> + skip_lock)) &&
> !force_all)
> DBUG_RETURN(error);
> }
> @@ -348,13 +345,46 @@ int terminate_slave_threads(MASTER_INFO*
> }
>
>
> -int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
> - pthread_mutex_t *cond_lock,
> - pthread_cond_t* term_cond,
> - volatile uint *slave_running)
> +/**
> + Wait for a slave thread to terminate.
> +
> + This function is called after requesting the thread to terminate
> + (by setting @c abort_slave member of @c Relay_log_info or @c
> + Master_info structure to 1). Termination of the thread is
> + controlled with the the predicate <code>*slave_running</code>.
> +
> + Function will acquire @c term_lock before waiting on the condition
> + unless @c skip_lock is true in which case the mutex should be owned
> + by the caller of this function and will remain acquired after
> + return from the function.
> +
> + @param term_lock
> + Associated lock to use when waiting for @c term_cond
> +
> + @param term_cond
> + Condition that is signalled when the thread has terminated
> +
> + @param slave_running
> + Pointer to predicate to check for slave thread termination
> +
> + @param skip_lock
> + If @c true the lock will not be acquired before waiting on
> + the condition. In this case, it is assumed that the calling
> + function acquires the lock before calling this function.
> +
> + @retval 0 All OK
> + */
> +static int
> +terminate_slave_thread(THD* thd,
> + pthread_mutex_t* term_lock,
> + pthread_cond_t* term_cond,
> + volatile uint *slave_running,
> + bool skip_lock)
> {
> + int error;
> +
> DBUG_ENTER("terminate_slave_thread");
> - if (term_lock)
> + if (!skip_lock)
> {
> pthread_mutex_lock(term_lock);
> if (!*slave_running)
> @@ -380,9 +410,13 @@ int terminate_slave_thread(THD* thd, pth
> */
> struct timespec abstime;
> set_timespec(abstime,2);
> - pthread_cond_timedwait(term_cond, cond_lock, &abstime);
> + error= pthread_cond_timedwait(term_cond, term_lock, &abstime);
> + DBUG_ASSERT(error == ETIMEDOUT || error == 0);
> }
> - if (term_lock)
> +
> + DBUG_ASSERT(*slave_running == 0);
> +
> + if (!skip_lock)
> pthread_mutex_unlock(term_lock);
> DBUG_RETURN(0);
> }
> diff -Nrup a/sql/slave.h b/sql/slave.h
> --- a/sql/slave.h 2007-06-09 08:29:44 +02:00
> +++ b/sql/slave.h 2007-08-29 09:14:52 +02:00
> @@ -133,10 +133,6 @@ bool flush_relay_log_info(RELAY_LOG_INFO
> int register_slave_on_master(MYSQL* mysql);
> int terminate_slave_threads(MASTER_INFO* mi, int thread_mask,
> bool skip_lock = 0);
> -int terminate_slave_thread(THD* thd, pthread_mutex_t* term_mutex,
> - pthread_mutex_t* cond_lock,
> - pthread_cond_t* term_cond,
> - volatile uint* slave_running);
> int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
> MASTER_INFO* mi, const char* master_info_fname,
> const char* slave_info_fname, int thread_mask);
>
> --
> MySQL Code Commits Mailing List
> For list archives: http://lists.mysql.com/commits
> To unsubscribe: http://lists.mysql.com/commits?unsub=1
>
Thanks for this work!
regards,
Andrei