List:Commits« Previous MessageNext Message »
From:Li-Bing.Song Date:December 22 2010 3:46pm
Subject:bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3534) Bug#58546
View as plain text  
#At file:///home/anders/Work/bzrwork/wt1/mysql-5.1-bugteam/ based on revid:sergey.glukhov@stripped

 3534 Li-Bing.Song@stripped	2010-12-22
      Bug#58546 test rpl_packet timeout failure sporadically on PB
      
      rpl_packet got a timeout failure sporadically on PB when stopping
      slave. The real reason of this bug is that STOP SLAVE stopped
      IO thread first and then stopped SQL thread. It was
      possible that IO thread stopped after replicating part of a
      transaction which SQL thread was executing. SQL thread would
      be hung if the transaction could not be rolled back safely.
      
      After this patch, SQL thread is always stopped first. It guarantees
      IO thread stops just after the transaction which cannot be
      rolled back safely and is being replied by SQL thread
      has been replicated completely.
      and added below auxiliary files to make the test code neater.
      restart_slave_sql.inc
      rpl_connection_master.inc
      rpl_connection_slave.inc
      rpl_connection_slave1.inc

    added:
      mysql-test/include/restart_slave_sql.inc
      mysql-test/include/rpl_connection_master.inc
      mysql-test/include/rpl_connection_slave.inc
      mysql-test/include/rpl_connection_slave1.inc
    modified:
      mysql-test/suite/rpl/r/rpl_stop_slave.result
      mysql-test/suite/rpl/t/rpl_packet.test
      mysql-test/suite/rpl/t/rpl_stop_slave.test
      sql/slave.cc
      sql/sql_repl.cc
=== added file 'mysql-test/include/restart_slave_sql.inc'
--- a/mysql-test/include/restart_slave_sql.inc	1970-01-01 00:00:00 +0000
+++ b/mysql-test/include/restart_slave_sql.inc	2010-12-22 15:46:46 +0000
@@ -0,0 +1,40 @@
+# ==== Purpose ====
+#
+# Source stop_slave_sql.inc to stop SQL thread on the current connection.
+# Then issues START SLAVE SQL_THREAD and then waits until
+# the SQL threads have started, or until a timeout is reached.
+#
+# Please use this instead of 'STOP|START SLAVE SQL_THREAD', to reduce the risk of
+# test case bugs.
+#
+#
+# ==== Usage ====
+#
+# [--let $slave_timeout= NUMBER]
+# [--let $rpl_debug= 1]
+# --source include/restart_slave_sql.inc
+#
+# Parameters:
+#   $slave_timeout
+#     See include/wait_for_slave_param.inc
+#
+#   $rpl_debug
+#     See include/rpl_init.inc
+
+
+--let $include_filename= restart_slave.inc
+--source include/begin_include_file.inc
+
+
+if (!$rpl_debug)
+{
+  --disable_query_log
+}
+
+source include/stop_slave_sql.inc;
+START SLAVE SQL_THREAD;
+source include/wait_for_slave_sql_to_start.inc;
+
+
+--let $include_filename= restart_slave.inc
+--source include/end_include_file.inc

=== added file 'mysql-test/include/rpl_connection_master.inc'
--- a/mysql-test/include/rpl_connection_master.inc	1970-01-01 00:00:00 +0000
+++ b/mysql-test/include/rpl_connection_master.inc	2010-12-22 15:46:46 +0000
@@ -0,0 +1,2 @@
+let $rpl_connection_name= master;
+source include/rpl_connection.inc;
\ No newline at end of file

=== added file 'mysql-test/include/rpl_connection_slave.inc'
--- a/mysql-test/include/rpl_connection_slave.inc	1970-01-01 00:00:00 +0000
+++ b/mysql-test/include/rpl_connection_slave.inc	2010-12-22 15:46:46 +0000
@@ -0,0 +1,2 @@
+let $rpl_connection_name= slave;
+source include/rpl_connection.inc;
\ No newline at end of file

=== added file 'mysql-test/include/rpl_connection_slave1.inc'
--- a/mysql-test/include/rpl_connection_slave1.inc	1970-01-01 00:00:00 +0000
+++ b/mysql-test/include/rpl_connection_slave1.inc	2010-12-22 15:46:46 +0000
@@ -0,0 +1,2 @@
+let $rpl_connection_name= slave1;
+source include/rpl_connection.inc;
\ No newline at end of file

=== modified file 'mysql-test/suite/rpl/r/rpl_stop_slave.result'
--- a/mysql-test/suite/rpl/r/rpl_stop_slave.result	2010-12-19 17:07:28 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stop_slave.result	2010-12-22 15:46:46 +0000
@@ -128,5 +128,44 @@ START SLAVE SQL_THREAD;
 include/wait_for_slave_sql_to_start.inc
 # Test end
 SET GLOBAL debug= '$debug_save';
+include/restart_slave.inc
+[connection master]
 DROP TABLE t1, t2;
+
+# To verify that IO thread should replicate the whole transaction which
+# SQL thread is executing and cannot be rolled back safely.
+# ----------------------------------------------------------------------
+# STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
+# possible that IO thread stopped after replicating part of a transaction
+# which SQL thread was executing. SQL thread would be hung if the
+# transaction could not be rolled back safely.
+# It caused some sporadic failures on PB2.
+CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
+CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES(1, 1);
+SET GLOBAL debug= 'd,dump_thread_block_before_XID';
+[connection slave]
+include/restart_slave.inc
+BEGIN;
+UPDATE t1 SET c2 = 2 WHERE c1 = 1;
+[connection master]
+BEGIN;
+INSERT INTO t1 VALUES(2, 2);
+INSERT INTO t2 VALUES(1);
+UPDATE t1 SET c2 = 3 WHERE c1 = 1;
+COMMIT;
+[connection slave1]
+STOP SLAVE;
+[connection slave]
+ROLLBACK;
+[connection master]
+SET DEBUG_SYNC= 'now SIGNAL signal.continue';
+SET DEBUG_SYNC= 'RESET';
+[connection slave]
+include/wait_for_slave_to_stop.inc
+[connection slave1]
+include/start_slave.inc
+[connection master]
+DROP TABLE t1, t2;
+SET GLOBAL debug= $debug_save;
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_packet.test'
--- a/mysql-test/suite/rpl/t/rpl_packet.test	2010-12-19 17:07:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_packet.test	2010-12-22 15:46:46 +0000
@@ -26,8 +26,8 @@ let $old_net_buffer_length= `SELECT @@gl
 SET @@global.max_allowed_packet=1024;
 SET @@global.net_buffer_length=1024;
 
+sync_slave_with_master;
 # Restart slave for setting to take effect
-connection slave;
 source include/stop_slave.inc;
 source include/start_slave.inc;
 

=== modified file 'mysql-test/suite/rpl/t/rpl_stop_slave.test'
--- a/mysql-test/suite/rpl/t/rpl_stop_slave.test	2010-12-19 17:07:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_stop_slave.test	2010-12-22 15:46:46 +0000
@@ -54,7 +54,66 @@ source extra/rpl_tests/rpl_stop_slave.te
 
 --echo # Test end
 SET GLOBAL debug= '$debug_save';
+source include/restart_slave_sql.inc;
 
-connection master;
+--source include/rpl_connection_master.inc
 DROP TABLE t1, t2;
+
+--echo
+--echo # To verify that IO thread should replicate the whole transaction which
+--echo # SQL thread is executing and cannot be rolled back safely.
+--echo # ----------------------------------------------------------------------
+--echo # STOP SLAVE stopped IO thread first and then stopped SQL thread. It was
+--echo # possible that IO thread stopped after replicating part of a transaction
+--echo # which SQL thread was executing. SQL thread would be hung if the
+--echo # transaction could not be rolled back safely.
+--echo # It caused some sporadic failures on PB2.
+
+CREATE TABLE t1 (c1 INT KEY, c2 INT) ENGINE=InnoDB;
+CREATE TABLE t2 (c1 INT) ENGINE=MyISAM;
+INSERT INTO t1 VALUES(1, 1);
+
+let $debug_save= `SELECT @@GLOBAL.debug`;
+SET GLOBAL debug= 'd,dump_thread_block_before_XID';
+
+sync_slave_with_master;
+
+--source include/rpl_connection_slave.inc
+source include/restart_slave_sql.inc;
+
+BEGIN;
+UPDATE t1 SET c2 = 2 WHERE c1 = 1;
+
+--source include/rpl_connection_master.inc
+BEGIN;
+INSERT INTO t1 VALUES(2, 2);
+INSERT INTO t2 VALUES(1);
+UPDATE t1 SET c2 = 3 WHERE c1 = 1;
+COMMIT;
+
+--source include/rpl_connection_slave1.inc
+let $show_statement= SHOW PROCESSLIST;
+let $field= Info;
+let $condition= = 'UPDATE t1 SET c2 = 3 WHERE c1 = 1';
+source include/wait_show_condition.inc;
+
+send STOP SLAVE;
+
+--source include/rpl_connection_slave.inc
+ROLLBACK;
+
+--source include/rpl_connection_master.inc
+SET DEBUG_SYNC= 'now SIGNAL signal.continue';
+SET DEBUG_SYNC= 'RESET';
+
+--source include/rpl_connection_slave.inc
+source include/wait_for_slave_to_stop.inc;
+
+--source include/rpl_connection_slave1.inc
+reap;
+source include/start_slave.inc;
+
+--source include/rpl_connection_master.inc
+DROP TABLE t1, t2;
+SET GLOBAL debug= $debug_save;
 --source include/rpl_end.inc

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2010-11-30 23:32:51 +0000
+++ b/sql/slave.cc	2010-12-22 15:46:46 +0000
@@ -408,17 +408,6 @@ int terminate_slave_threads(Master_info*
   int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
   pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
 
-  if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
-  {
-    DBUG_PRINT("info",("Terminating IO thread"));
-    mi->abort_slave=1;
-    if ((error=terminate_slave_thread(mi->io_thd, io_lock,
-                                      &mi->stop_cond,
-                                      &mi->slave_running,
-                                      skip_lock)) &&
-        !force_all)
-      DBUG_RETURN(error);
-  }
   if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
   {
     DBUG_PRINT("info",("Terminating SQL thread"));
@@ -429,6 +418,17 @@ int terminate_slave_threads(Master_info*
                                       skip_lock)) &&
         !force_all)
       DBUG_RETURN(error);
+  }
+  if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
+  {
+    DBUG_PRINT("info",("Terminating IO thread"));
+    mi->abort_slave=1;
+    if ((error=terminate_slave_thread(mi->io_thd, io_lock,
+                                      &mi->stop_cond,
+                                      &mi->slave_running,
+                                      skip_lock)) &&
+        !force_all)
+      DBUG_RETURN(error);
   }
   DBUG_RETURN(0);
 }

=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc	2010-07-26 09:56:30 +0000
+++ b/sql/sql_repl.cc	2010-12-22 15:46:46 +0000
@@ -21,6 +21,7 @@
 #include "log_event.h"
 #include "rpl_filter.h"
 #include <my_dir.h>
+#include "debug_sync.h"
 
 int max_binlog_dump_events = 0; // unlimited
 my_bool opt_sporadic_binlog_dump_fail = 0;
@@ -556,6 +557,20 @@ impossible position";
       }
 #endif
 
+      DBUG_EXECUTE_IF("dump_thread_block_before_XID",
+                      {
+                        if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
+                        {
+                          net_flush(net);
+                          const char act[]=
+                            "now "
+                            "wait_for signal.continue";
+                          DBUG_ASSERT(opt_debug_sync_timeout > 0);
+                          DBUG_ASSERT(!debug_sync_set_action(current_thd,
+                                                             STRING_WITH_LEN(act)));
+                        }
+                      });
+
       if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
       {
         binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
@@ -572,6 +587,14 @@ impossible position";
 	goto err;
       }
 
+      DBUG_EXECUTE_IF("dump_thread_block_before_XID",
+                      {
+                        if ((*packet)[EVENT_TYPE_OFFSET+1] == XID_EVENT)
+                        {
+                          net_flush(net);
+                        }
+                      });
+
       DBUG_PRINT("info", ("log event code %d",
 			  (*packet)[LOG_EVENT_OFFSET+1] ));
       if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)


Attachment: [text/bzr-bundle] bzr/li-bing.song@sun.com-20101222154646-glgwf6f21ttq4q63.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3534) Bug#58546Li-Bing.Song22 Dec
  • Re: bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3534)Bug#58546He Zhenxing23 Dec
    • Re: bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3534)Bug#58546Libing Song24 Dec