Patch approved! Please look below for some comments.
Li-Bing.Song@stripped wrote:
> #At file:///home/anders/Work/bzrwork/wt1/mysql-next-mr-bugfixing/ based on
> revid:horst.hunger@stripped
>
> 3343 Li-Bing.Song@stripped 2010-11-18
> Bug#47699 rpl.rpl_backup_block fails sporadically
>
> It failed because an error appeared in Slave_SQL_Error,
> the test case expects that no error happens. The reason is
> that START SLAVE released the lock and returned before it cleared
> the error, so there was a possibility that Slave_SQL_Error was not
> 0 when doing the following:
> START SLAVE SQL_THREAD;
> SHOW SLAVE STATUS;
>
> After this patch, clearing error always happens before START SLAVE
> releases the lock.
>
> modified:
> mysql-test/suite/rpl/r/rpl_slave_status.result
> mysql-test/suite/rpl/t/rpl_slave_status.test
> sql/rpl_slave.cc
> === modified file 'mysql-test/suite/rpl/r/rpl_slave_status.result'
> --- a/mysql-test/suite/rpl/r/rpl_slave_status.result 2008-07-10 16:09:39 +0000
> +++ b/mysql-test/suite/rpl/r/rpl_slave_status.result 2010-11-18 04:43:11 +0000
> @@ -29,7 +29,32 @@ include/stop_slave.inc
> START SLAVE;
> ==== Verify that Slave_IO_Running = No ====
> Slave_IO_Running = No (should be No)
> -==== Cleanup (Note that slave IO thread is not running) ====
> +==== Cleanup ====
> +[on master]
> +DROP TABLE t1;
> +[on slave]
> +include/stop_slave.inc
> +CHANGE MASTER TO master_user='root', master_password='';
> +include/start_slave.inc
> +
> +# Bug#47699 rpl.rpl_backup_block fails sporadically
> +#
> +# START SLAVE released the lock and returned before it cleared the error,
> +# so there is a possibility that Slave_SQL_Error is not 0.
> +[on slave]
> +CALL mtr.add_suppression("Slave: Table 't1' already exists Error_code: 1050");
> +# The statement makes SQL thread to fail.
> +CREATE TABLE t1(c1 INT);
> +[on master]
> +CREATE TABLE t1(c1 INT);
> +[on slave]
> DROP TABLE t1;
> +# Block SQL thread immediately after it starts.
> +SET DEBUG_SYNC='after_start_slave WAIT_FOR signal.continue';
> +START SLAVE SQL_THREAD;
> +# Check Slave_SQL_Error, there should not be an error.
> +# Resume SQL thread
> +SET DEBUG_SYNC="now SIGNAL signal.continue";
> +SET GLOBAL debug= $debug_save;
> [on master]
> DROP TABLE t1;
>
> === modified file 'mysql-test/suite/rpl/t/rpl_slave_status.test'
> --- a/mysql-test/suite/rpl/t/rpl_slave_status.test 2009-10-02 09:24:21 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_slave_status.test 2010-11-18 04:43:11 +0000
> @@ -60,11 +60,54 @@ source include/wait_for_slave_io_to_stop
> let $result= query_get_value("SHOW SLAVE STATUS", Slave_IO_Running, 1);
> --echo Slave_IO_Running = $result (should be No)
>
> ---echo ==== Cleanup (Note that slave IO thread is not running) ====
> +--echo ==== Cleanup ====
> +--echo [on master]
> +connection master;
> +DROP TABLE t1;
> +
> +--echo [on slave]
> +connection slave;
> +source include/stop_slave.inc;
> +CHANGE MASTER TO master_user='root', master_password='';
> +source include/start_slave.inc;
> +connection master;
> +sync_slave_with_master;
> +
> +--echo
> +--echo # Bug#47699 rpl.rpl_backup_block fails sporadically
> +--echo #
> +--echo # START SLAVE released the lock and returned before it cleared the error,
> +--echo # so there is a possibility that Slave_SQL_Error is not 0.
> +
> +--echo [on slave]
> +CALL mtr.add_suppression("Slave: Table 't1' already exists Error_code: 1050");
> +--echo # The statement makes SQL thread to fail.
> +CREATE TABLE t1(c1 INT);
> +
> +--echo [on master]
> +connection master;
> +CREATE TABLE t1(c1 INT);
> +
> +--echo [on slave]
> +connection slave;
> +source include/wait_for_slave_sql_to_stop.inc;
> +
> DROP TABLE t1;
> -# cleanup: slave io thread has been stopped "irrecoverably"
> -# so we clean up mess manually
> +
> +--echo # Block SQL thread immediately after it starts.
> +let $debug_save=`SELECT @@debug`;
> +#SET GLOBAL debug="d,after_start_slave";
Please remove the above two lines which are not needed any more.
> +SET DEBUG_SYNC='after_start_slave WAIT_FOR signal.continue';
> +START SLAVE SQL_THREAD;
> +source include/wait_for_slave_sql_to_start.inc;
> +--echo # Check Slave_SQL_Error, there should not be an error.
> +source include/check_slave_no_error.inc;
> +
> +--echo # Resume SQL thread
> +SET DEBUG_SYNC="now SIGNAL signal.continue";
> +SET GLOBAL debug= $debug_save;
>
Please remove the above line also.
> --echo [on master]
> connection master;
> DROP TABLE t1;
> +source include/master-slave-end.inc;
>
> === modified file 'sql/rpl_slave.cc'
> --- a/sql/rpl_slave.cc 2010-10-27 10:31:46 +0000
> +++ b/sql/rpl_slave.cc 2010-11-18 04:43:11 +0000
> @@ -3465,8 +3465,6 @@ pthread_handler_t handle_slave_sql(void
> Seconds_Behind_Master grows. No big deal.
> */
> rli->abort_slave = 0;
> - mysql_mutex_unlock(&rli->run_lock);
> - mysql_cond_broadcast(&rli->start_cond);
>
> /*
> Reset errors for a clean start (otherwise, if the master is idle, the SQL
> @@ -3480,6 +3478,11 @@ pthread_handler_t handle_slave_sql(void
> */
> rli->clear_error();
>
> + mysql_mutex_unlock(&rli->run_lock);
> + mysql_cond_broadcast(&rli->start_cond);
> +
> + DEBUG_SYNC(thd, "after_start_slave");
> +
> //tell the I/O thread to take relay_log_space_limit into account from now on
> mysql_mutex_lock(&rli->log_space_lock);
> rli->ignore_log_space_limit= 0;