#At file:///home/anders/Work/bzrwork/wt1/mysql-5.1-bugteam/ based on revid:li-bing.song@stripped
3525 Li-Bing.Song@stripped 2010-12-17
Bug#58546 test rpl_packet timeout failure sporadically on PB
rpl_packet got a timeout failure sporadically on PB when stopping
slave. The real reason of this bug is that STOP SLAVE stopped
IO thread first and then stopped SQL thread. It was
possible that IO thread stopped after replicating part of a
transaction which SQL thread was executing. SQL thread would
be hung if the transaction could not be rolled back safely.
After this patch, SQL thread is always stopped first. It guarantees
IO thread stops just after the transaction which cannot be
rolled back safely and is being replied by SQL thread
has been replicated completely.
modified:
mysql-test/suite/rpl/r/rpl_stop_slave.result
mysql-test/suite/rpl/t/rpl_stop_slave.test
sql/slave.cc
sql/sql_repl.cc
=== modified file 'mysql-test/suite/rpl/r/rpl_stop_slave.result'
--- a/mysql-test/suite/rpl/r/rpl_stop_slave.result 2010-10-16 12:03:44 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stop_slave.result 2010-12-17 10:20:40 +0000
@@ -124,4 +124,42 @@ Comparing tables master:test.t1 and slav
START SLAVE SQL_THREAD;
# Test end
SET GLOBAL debug= '$debug_save';
+STOP SLAVE SQL_THREAD;
+START SLAVE SQL_THREAD;
+DROP TABLE t1, t2;
+
+# To verify that IO thread should replicate the whole transaction which
+# SQL thread is executing and cannot be rolled back safely.
+# ----------------------------------------------------------------------
+# STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
+# possible that IO thread stopped after replicating part of a transaction
+# which SQL thread was executing. SQL thread would be hung if the
+# transaction could not be rolled back safely.
+# It caused some sporadic failures on PB2.
+CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
+CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES(1, 1);
+SET GLOBAL debug= 'd,dump_thread_block_before_XID';
+# [ on slave ]
+STOP SLAVE IO_THREAD;
+START SLAVE IO_THREAD;
+BEGIN;
+UPDATE t1 SET c2 = 2 WHERE c1 = 1;
+# [ on master ]
+BEGIN;
+INSERT INTO t1 VALUES(2, 2);
+INSERT INTO t2 VALUES(1);
+UPDATE t1 SET c2 = 3 WHERE c1 = 1;
+COMMIT;
+# [ on slave1 ]
+STOP SLAVE;
+# [ on slave ]
+ROLLBACK;
+# [ on master ]
+SET DEBUG_SYNC= 'now SIGNAL signal.continue';
+SET DEBUG_SYNC= 'RESET';
+# [ on slave ]
+[ on slave1 ]
+include/start_slave.inc
DROP TABLE t1, t2;
+SET GLOBAL debug= $debug_save;
=== modified file 'mysql-test/suite/rpl/t/rpl_stop_slave.test'
--- a/mysql-test/suite/rpl/t/rpl_stop_slave.test 2010-10-16 12:03:44 +0000
+++ b/mysql-test/suite/rpl/t/rpl_stop_slave.test 2010-12-17 10:20:40 +0000
@@ -54,7 +54,78 @@ source extra/rpl_tests/rpl_stop_slave.te
--echo # Test end
SET GLOBAL debug= '$debug_save';
+STOP SLAVE SQL_THREAD;
+source include/wait_for_slave_sql_to_stop.inc;
+START SLAVE SQL_THREAD;
+source include/wait_for_slave_sql_to_start.inc;
connection master;
DROP TABLE t1, t2;
+
+--echo
+--echo # To verify that IO thread should replicate the whole transaction which
+--echo # SQL thread is executing and cannot be rolled back safely.
+--echo # ----------------------------------------------------------------------
+--echo # STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
+--echo # possible that IO thread stopped after replicating part of a transaction
+--echo # which SQL thread was executing. SQL thread would be hung if the
+--echo # transaction could not be rolled back safely.
+--echo # It caused some sporadic failures on PB2.
+
+CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
+CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES(1, 1);
+
+let $debug_save= `SELECT @@GLOBAL.debug`;
+SET GLOBAL debug= 'd,dump_thread_block_before_XID';
+
+sync_slave_with_master;
+
+--echo # [ on slave ]
+STOP SLAVE IO_THREAD;
+source include/wait_for_slave_io_to_stop.inc;
+START SLAVE IO_THREAD;
+source include/wait_for_slave_io_to_start.inc;
+
+BEGIN;
+UPDATE t1 SET c2 = 2 WHERE c1 = 1;
+
+--echo # [ on master ]
+connection master;
+BEGIN;
+INSERT INTO t1 VALUES(2, 2);
+INSERT INTO t2 VALUES(1);
+UPDATE t1 SET c2 = 3 WHERE c1 = 1;
+COMMIT;
+
+--echo # [ on slave1 ]
+connection slave1;
+let $show_statement= SHOW PROCESSLIST;
+let $field= Info;
+let $condition= = 'UPDATE t1 SET c2 = 3 WHERE c1 = 1';
+source include/wait_show_condition.inc;
+
+send STOP SLAVE;
+
+--echo # [ on slave ]
+connection slave;
+ROLLBACK;
+
+--echo # [ on master ]
+connection master;
+SET DEBUG_SYNC= 'now SIGNAL signal.continue';
+SET DEBUG_SYNC= 'RESET';
+
+--echo # [ on slave ]
+connection slave;
+source include/wait_for_slave_to_stop.inc;
+
+--echo [ on slave1 ]
+connection slave1;
+reap;
+source include/start_slave.inc;
+
+connection master;
+DROP TABLE t1, t2;
+SET GLOBAL debug= $debug_save;
source include/master-slave-end.inc;
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc 2010-11-10 21:14:47 +0000
+++ b/sql/slave.cc 2010-12-17 10:20:40 +0000
@@ -408,17 +408,6 @@ int terminate_slave_threads(Master_info*
int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
- if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
- {
- DBUG_PRINT("info",("Terminating IO thread"));
- mi->abort_slave=1;
- if ((error=terminate_slave_thread(mi->io_thd, io_lock,
- &mi->stop_cond,
- &mi->slave_running,
- skip_lock)) &&
- !force_all)
- DBUG_RETURN(error);
- }
if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
{
DBUG_PRINT("info",("Terminating SQL thread"));
@@ -429,6 +418,17 @@ int terminate_slave_threads(Master_info*
skip_lock)) &&
!force_all)
DBUG_RETURN(error);
+ }
+ if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
+ {
+ DBUG_PRINT("info",("Terminating IO thread"));
+ mi->abort_slave=1;
+ if ((error=terminate_slave_thread(mi->io_thd, io_lock,
+ &mi->stop_cond,
+ &mi->slave_running,
+ skip_lock)) &&
+ !force_all)
+ DBUG_RETURN(error);
}
DBUG_RETURN(0);
}
=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc 2010-07-26 09:56:30 +0000
+++ b/sql/sql_repl.cc 2010-12-17 10:20:40 +0000
@@ -21,6 +21,7 @@
#include "log_event.h"
#include "rpl_filter.h"
#include <my_dir.h>
+#include "debug_sync.h"
int max_binlog_dump_events = 0; // unlimited
my_bool opt_sporadic_binlog_dump_fail = 0;
@@ -556,6 +557,20 @@ impossible position";
}
#endif
+ DBUG_EXECUTE_IF("dump_thread_block_before_XID",
+ {
+ if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
+ {
+ net_flush(net);
+ const char act[]=
+ "now "
+ "wait_for signal.continue";
+ DBUG_ASSERT(opt_debug_sync_timeout > 0);
+ DBUG_ASSERT(!debug_sync_set_action(current_thd,
+ STRING_WITH_LEN(act)));
+ }
+ });
+
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
{
binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
@@ -572,6 +587,14 @@ impossible position";
goto err;
}
+ DBUG_EXECUTE_IF("dump_thread_block_before_XID",
+ {
+ if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
+ {
+ net_flush(net);
+ }
+ });
+
DBUG_PRINT("info", ("log event code %d",
(*packet)[LOG_EVENT_OFFSET+1] ));
if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
Attachment: [text/bzr-bundle] bzr/li-bing.song@sun.com-20101217102040-4g5w9fn4a9r8zcin.bundle