List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:March 16 2011 2:44pm
Subject:bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/mysql-trunk/ based on revid:chuck.bell@stripped

 3766 Andrei Elkin	2011-03-16
      Bug#11765758 - 58754 rpl_corruption fails.
      
      There are two issues: io thread error code mismatch and a crash.
      The mismatch is benign and is possible due to the current style of error reporting allowing
      multiple errors to be pushed.
      The crash is caused by concurrent access of @@global.debug reader (IO thread) and a user
      connection that updates the var at that time.
      
      Fixed with modifying rpl_corruption and few other potentially vulnerable tests, adding notes
      to the replication failure simulator writer.
      
      The rule of thumb for using @@global.debug in replication thread failure simulation:
      -----------------------------------------------------------------------------------
      Update the variable when the slave threads or the master dump thread are down or they 
      are guaranteed to stay idle e.g
      in waiting for an event. Changing the value while a replication thread is UP can cause
      reading a partial results of the change to end up in a crash.
      For the dump thread one has to consider that a possible small hearbeat period won't break
      waiting for an event in the binlog.
     @ mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
        added a note explaining why @@global.debug is safe to update at that point.
     @ mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
        made set global debug safe with the explicit stopping the slave prior to the assignement.
     @ mysql-test/suite/rpl/r/rpl_corruption.result
        results updated.
     @ mysql-test/suite/rpl/r/rpl_init_slave_errors.result
        results updated.
     @ mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
        results updated.
     @ mysql-test/suite/rpl/t/rpl_corruption.test
        added a note explaining how @@global.debug can be updated safely.
     @ mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
        added a note explaining why @@global.debug is safe to update at that point.
     @ mysql-test/suite/rpl/t/rpl_init_slave_errors.test
        made set global debug safe with the explicit stopping the slave prior to the assignement.
     @ mysql-test/suite/rpl/t/rpl_show_slave_running.test
        made set global debug safe with syncing the slave prior to the assignement.

    modified:
      mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
      mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
      mysql-test/suite/rpl/r/rpl_corruption.result
      mysql-test/suite/rpl/r/rpl_init_slave_errors.result
      mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
      mysql-test/suite/rpl/t/rpl_corruption.test
      mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
      mysql-test/suite/rpl/t/rpl_init_slave_errors.test
      mysql-test/suite/rpl/t/rpl_show_slave_running.test
      sql/rpl_slave.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test'
--- a/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test	2010-12-19 17:22:30 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test	2011-03-16 14:44:30 +0000
@@ -71,6 +71,13 @@ source include/wait_for_slave_io_error.i
 # now to avoid restarting IO-thread to re-enter it.
 # There will be a new IO thread forked out with its @@session.debug
 # unset.
+
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
+
 eval set @@global.debug = "-d,$dbug_sync_point";
 
 --let $rpl_server_number= 1

=== modified file 'mysql-test/extra/rpl_tests/rpl_stop_middle_group.test'
--- a/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test	2011-02-23 20:01:27 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test	2011-03-16 14:44:30 +0000
@@ -47,6 +47,7 @@ source include/wait_for_slave_sql_to_sto
 --let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 1
 --source include/assert.inc
 
+--source include/stop_slave.inc
 set @@global.debug="-d";
 
 #
@@ -104,6 +105,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
 --let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 0
 --source include/assert.inc
 
+--source include/stop_slave.inc
 set @@global.debug="-d";
 
 #
@@ -146,6 +148,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
 --let $assert_cond= [SELECT MAX(a) AS Val FROM ti, Val, 1] = 1
 --source include/assert.inc
 
+--source include/stop_slave.inc
 set @@global.debug="-d";
 
 #

=== modified file 'mysql-test/suite/rpl/r/rpl_corruption.result'
--- a/mysql-test/suite/rpl/r/rpl_corruption.result	2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/r/rpl_corruption.result	2011-03-16 14:44:30 +0000
@@ -12,32 +12,32 @@ SET @old_master_verify_checksum = @@mast
 CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b VARCHAR(10), c VARCHAR(100));
 include/stop_slave.inc
 # 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
 SHOW BINLOG EVENTS;
 ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 # 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 START SLAVE IO_THREAD;
 include/wait_for_slave_io_error.inc [errno=1236]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 # 4. Master read a corrupted event from binlog and send it to slave
 SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 START SLAVE IO_THREAD;
 include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 SET GLOBAL master_verify_checksum=1;
 # 5. Slave. Corruption in network
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
 START SLAVE IO_THREAD;
 include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
 # 6. Slave. Corruption in relay log
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+START SLAVE SQL_THREAD;
 include/wait_for_slave_sql_error.inc [errno=1593]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 # 7. Seek diff for tables on master and slave
 include/start_slave.inc
 include/diff_tables.inc [master:t1, slave:t1]

=== modified file 'mysql-test/suite/rpl/r/rpl_init_slave_errors.result'
--- a/mysql-test/suite/rpl/r/rpl_init_slave_errors.result	2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/r/rpl_init_slave_errors.result	2011-03-16 14:44:30 +0000
@@ -7,6 +7,9 @@ start slave;
 include/wait_for_slave_sql_error.inc [errno=1593]
 Last_SQL_Error = 'Failed during slave thread initialization'
 call mtr.add_suppression("Failed during slave.* thread initialization");
+include/stop_slave_io.inc
+Warnings:
+Note	1255	Slave already has been stopped
 SET GLOBAL debug= "";
 reset slave;
 SET GLOBAL init_slave= "garbage";

=== modified file 'mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result	2011-02-23 20:01:27 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result	2011-03-16 14:44:30 +0000
@@ -18,6 +18,7 @@ include/check_slave_no_error.inc
 include/assert.inc [Everything that was read, was executed]
 include/assert.inc [There is one row in table tm]
 include/assert.inc [There is one row in table ti]
+include/stop_slave.inc
 set @@global.debug="-d";
 include/start_slave.inc
 truncate table tm;
@@ -35,8 +36,11 @@ Last_SQL_Error = 'Fatal error: ... The s
 include/assert.inc [Not everything that was read, was executed]
 include/assert.inc [There is one row in table tm]
 include/assert.inc [There is no row in table ti]
+include/stop_slave.inc
 set @@global.debug="-d";
 stop slave;
+Warnings:
+Note	1255	Slave already has been stopped
 truncate table tm;
 include/start_slave.inc
 set @@global.debug="+d,stop_slave_middle_group";
@@ -47,6 +51,7 @@ Last_SQL_Error = 'Fatal error: ... The s
 include/assert.inc [Not everything that was read, was executed]
 include/assert.inc [The max value for field 'a' is 2]
 include/assert.inc [The max value for field 'a' is 1]
+include/stop_slave.inc
 set @@global.debug="-d";
 include/rpl_reset.inc
 drop table tm, ti;

=== modified file 'mysql-test/suite/rpl/t/rpl_corruption.test'
--- a/mysql-test/suite/rpl/t/rpl_corruption.test	2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/t/rpl_corruption.test	2011-03-16 14:44:30 +0000
@@ -7,6 +7,14 @@
 #  - in relay log
 ############################################################
 
+#
+# The tests intensively utilize @@global.debug. Note,
+# Bug#11765758 - 58754,
+# updatating the global is crash-safe only if a. slave threads are down
+# or b. it's guaranteed they are standing still and won't execute
+# and DBUG-stack code.
+#
+
 --source include/have_debug.inc
 --source include/master-slave.inc
 
@@ -56,17 +64,10 @@ while ($i) {  
 }
 --enable_query_log
 
-#
-# Bug #58630 shows `+d' syntax has a side effect Andrei changed it to
-# unsigned `d' version which does not affect the tests logics.
-# todo:
-# restore *all* following `SET GLOBAL debug' back to `+|-d' when Bug
-# #58630, OBug#11765758 sorted out
-#
 
 # Emulate corruption in binlog file when SHOW BINLOG EVENTS is executing
 --echo # 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
 --echo SHOW BINLOG EVENTS;
 --disable_query_log
 send_eval SHOW BINLOG EVENTS FROM $pos;
@@ -74,50 +75,50 @@ send_eval SHOW BINLOG EVENTS FROM $pos;
 --error ER_ERROR_WHEN_EXECUTING_COMMAND
 reap;
 
-# see above comments on `+d' syntax
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 
 # Emulate corruption on master with crc checking on master
 --echo # 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 --connection slave
 START SLAVE IO_THREAD;
 let $slave_io_errno= 1236;
 --source include/wait_for_slave_io_error.inc
 --connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 
 # Emulate corruption on master without crc checking on master
 --echo # 4. Master read a corrupted event from binlog and send it to slave
 --connection master
 SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 --connection slave
 START SLAVE IO_THREAD;
 let $slave_io_errno= 1595,1722;
 --source include/wait_for_slave_io_error.inc
 --connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 SET GLOBAL master_verify_checksum=1;
 
 # Emulate corruption in network
 --echo # 5. Slave. Corruption in network
 --connection slave
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
 START SLAVE IO_THREAD;
 let $slave_io_errno= 1595,1722;
 --source include/wait_for_slave_io_error.inc
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
 
 # Emulate corruption in relay log
 --echo # 6. Slave. Corruption in relay log
 
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+
+START SLAVE SQL_THREAD;
 let $slave_sql_errno= 1593;
 --source include/wait_for_slave_sql_error.inc
 
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 
 # Start normal replication and compare same table on master
 # and slave

=== modified file 'mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test'
--- a/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test	2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test	2011-03-16 14:44:30 +0000
@@ -47,6 +47,11 @@ let $dbug_sync_point= 'dbug.before_get_M
 let $debug_sync_action= 'now SIGNAL signal.get_master_uuid';
 source extra/rpl_tests/rpl_get_master_version_and_clock.test;
 
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
 eval set global debug= '$debug_saved';
 
 #Test case 4: This test checks that the slave I/O thread refuses to start

=== modified file 'mysql-test/suite/rpl/t/rpl_init_slave_errors.test'
--- a/mysql-test/suite/rpl/t/rpl_init_slave_errors.test	2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/t/rpl_init_slave_errors.test	2011-03-16 14:44:30 +0000
@@ -60,6 +60,10 @@ start slave;
 
 call mtr.add_suppression("Failed during slave.* thread initialization");
 
+# Bug#11765758 - 58754
+# make sure the dump thread has sent the queries and
+# is inactive to allow safe updating to @@global.debug
+--source include/stop_slave_io.inc
 SET GLOBAL debug= "";
 
 ######################################################################

=== modified file 'mysql-test/suite/rpl/t/rpl_show_slave_running.test'
--- a/mysql-test/suite/rpl/t/rpl_show_slave_running.test	2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_show_slave_running.test	2011-03-16 14:44:30 +0000
@@ -76,9 +76,16 @@ echo Slave_SQL_Running= $status;
 
 # cleanup
 
-connection slave;
+connection master;
+
+# Bug#11765758 - 58754
+# make sure the slave has processed all sent queries and
+# is idling to allow safe updating to @@global.debug
+#connection slave;
+sync_slave_with_master;
 
 eval set global debug= '$debug_saved';
+
 SET DEBUG_SYNC= 'RESET';
 --echo End of tests
 --source include/rpl_end.inc

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2011-03-14 13:55:44 +0000
+++ b/sql/rpl_slave.cc	2011-03-16 14:44:30 +0000
@@ -4372,7 +4372,7 @@ static int queue_event(Master_info* mi,c
       int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
       debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
       DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
-      DBUG_SET("");
+      DBUG_SET("-d,corrupt_queue_event");
     }
   );
                                               


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110316144430-2tcwmvoi3roiv7h6.bundle
Thread
bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758Andrei Elkin16 Mar
  • Re: bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758Sven Sandberg17 Mar