Hi Libing,
One more comments, I think it's better to only add the sync points in
the code, and define the sync policy (actions) in the test.
Li-Bing.Song@stripped wrote:
> #At file:///home/anders/Work/bzrwork/wt1/mysql-5.1-bugteam/ based on
> revid:sergey.glukhov@stripped
>
> 3534 Li-Bing.Song@stripped 2010-12-24
> Bug#58546 test rpl_packet timeout failure sporadically on PB
[ snip]
>
> === modified file 'mysql-test/suite/rpl/t/rpl_stop_slave.test'
> --- a/mysql-test/suite/rpl/t/rpl_stop_slave.test 2010-12-19 17:07:28 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_stop_slave.test 2010-12-24 04:17:30 +0000
> @@ -54,7 +54,67 @@ source extra/rpl_tests/rpl_stop_slave.te
>
> --echo # Test end
> SET GLOBAL debug= '$debug_save';
> +source include/restart_slave_sql.inc;
>
> -connection master;
> +--source include/rpl_connection_master.inc
> DROP TABLE t1, t2;
> +
> +--echo
> +--echo # To verify that when issues STOP SLAVE, IO thread will continue to fetch
> +--echo # the rest of the events of the transaction that SQL thread is executing
> +--echo # and cannot be rolled back safely.
> +--echo # ----------------------------------------------------------------------
> +--echo # STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
> +--echo # possible that IO thread stopped after replicating part of a transaction
> +--echo # which SQL thread was executing. SQL thread would be hung if the
> +--echo # transaction could not be rolled back safely.
> +--echo # It caused some sporadic failures on PB2(bug#58546).
> +
> +CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
> +CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
> +INSERT INTO t1 VALUES(1, 1);
> +
> +let $debug_save= `SELECT @@GLOBAL.debug`;
> +SET GLOBAL debug= 'd,dump_thread_wait_before_send_xid';
> +
> +sync_slave_with_master;
> +
> +--source include/rpl_connection_slave.inc
> +source include/restart_slave_sql.inc;
> +
> +BEGIN;
> +UPDATE t1 SET c2 = 2 WHERE c1 = 1;
> +
> +--source include/rpl_connection_master.inc
> +BEGIN;
> +INSERT INTO t1 VALUES(2, 2);
> +INSERT INTO t2 VALUES(1);
> +UPDATE t1 SET c2 = 3 WHERE c1 = 1;
> +COMMIT;
> +
> +--source include/rpl_connection_slave1.inc
> +let $show_statement= SHOW PROCESSLIST;
> +let $field= Info;
> +let $condition= = 'UPDATE t1 SET c2 = 3 WHERE c1 = 1';
> +source include/wait_show_condition.inc;
> +
> +send STOP SLAVE;
> +
> +--source include/rpl_connection_slave.inc
> +ROLLBACK;
> +
> +--source include/rpl_connection_master.inc
> +SET DEBUG_SYNC= 'now SIGNAL signal.continue';
> +SET DEBUG_SYNC= 'RESET';
> +
> +--source include/rpl_connection_slave.inc
> +source include/wait_for_slave_to_stop.inc;
> +
> +--source include/rpl_connection_slave1.inc
> +reap;
> +source include/start_slave.inc;
> +
> +--source include/rpl_connection_master.inc
> +DROP TABLE t1, t2;
> +SET GLOBAL debug= $debug_save;
> --source include/rpl_end.inc
>
> === modified file 'sql/slave.cc'
> --- a/sql/slave.cc 2010-11-30 23:32:51 +0000
> +++ b/sql/slave.cc 2010-12-24 04:17:30 +0000
> @@ -408,17 +408,6 @@ int terminate_slave_threads(Master_info*
> int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
> pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock =
> &mi->run_lock;
>
> - if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
> - {
> - DBUG_PRINT("info",("Terminating IO thread"));
> - mi->abort_slave=1;
> - if ((error=terminate_slave_thread(mi->io_thd, io_lock,
> - &mi->stop_cond,
> - &mi->slave_running,
> - skip_lock)) &&
> - !force_all)
> - DBUG_RETURN(error);
> - }
> if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
> {
> DBUG_PRINT("info",("Terminating SQL thread"));
> @@ -429,6 +418,17 @@ int terminate_slave_threads(Master_info*
> skip_lock)) &&
> !force_all)
> DBUG_RETURN(error);
> + }
> + if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
> + {
> + DBUG_PRINT("info",("Terminating IO thread"));
> + mi->abort_slave=1;
> + if ((error=terminate_slave_thread(mi->io_thd, io_lock,
> + &mi->stop_cond,
> + &mi->slave_running,
> + skip_lock)) &&
> + !force_all)
> + DBUG_RETURN(error);
> }
> DBUG_RETURN(0);
> }
>
> === modified file 'sql/sql_repl.cc'
> --- a/sql/sql_repl.cc 2010-07-26 09:56:30 +0000
> +++ b/sql/sql_repl.cc 2010-12-24 04:17:30 +0000
> @@ -21,6 +21,7 @@
> #include "log_event.h"
> #include "rpl_filter.h"
> #include <my_dir.h>
> +#include "debug_sync.h"
>
> int max_binlog_dump_events = 0; // unlimited
> my_bool opt_sporadic_binlog_dump_fail = 0;
> @@ -556,6 +557,20 @@ impossible position";
> }
> #endif
>
> + DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
> + {
> + if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
> + {
> + net_flush(net);
> + const char act[]=
> + "now "
> + "wait_for signal.continue";
> + DBUG_ASSERT(opt_debug_sync_timeout > 0);
> + DBUG_ASSERT(!debug_sync_set_action(current_thd,
> +
> STRING_WITH_LEN(act)));
> + }
> + });
> +
Only define a sync point here, and add the sync action to the test file.
> if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
> {
> binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
> @@ -572,6 +587,14 @@ impossible position";
> goto err;
> }
>
> + DBUG_EXECUTE_IF("dump_thread_wait_before_send_xid",
> + {
> + if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
> + {
> + net_flush(net);
> + }
> + });
> +
> DBUG_PRINT("info", ("log event code %d",
> (*packet)[LOG_EVENT_OFFSET+1] ));
> if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)