List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:March 15 2011 7:52pm
Subject:bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/mysql-trunk/ based on revid:chuck.bell@stripped

 3766 Andrei Elkin	2011-03-15
      Bug#11765758 - 58754 rpl_corruption fails.
      
      There are two issues: io thread error code mismatch and a crash.
      The mismatch is benign and is possible due to the current style of error reporting allowing
      multiple errors to be pushed.
      The crash is caused by concurrent access of @@global.debug reader (IO thread) and a user
      connection that updates the var at that time.
      
      Fixed with modifying rpl_corruption and few other potentially vulnerable tests, adding notes
      to the replication failure simulator writer.
     @ mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
        added a note explaining why @@global.debug is safe to update at that point.
     @ mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
        made set global debug safe with the explicit stopping the slave prior to the assignement.
     @ mysql-test/suite/rpl/r/rpl_corruption.result
        results updated.
     @ mysql-test/suite/rpl/r/rpl_init_slave_errors.result
        results updated.
     @ mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
        results updated.
     @ mysql-test/suite/rpl/t/rpl_corruption.test
        added a note explaining how @@global.debug can be updated safely.
     @ mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
        added a note explaining why @@global.debug is safe to update at that point.
     @ mysql-test/suite/rpl/t/rpl_init_slave_errors.test
        made set global debug safe with the explicit stopping the slave prior to the assignement.
     @ mysql-test/suite/rpl/t/rpl_show_slave_running.test
        made set global debug safe with syncing the slave prior to the assignement.

    modified:
      mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
      mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
      mysql-test/suite/rpl/r/rpl_corruption.result
      mysql-test/suite/rpl/r/rpl_init_slave_errors.result
      mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
      mysql-test/suite/rpl/t/rpl_corruption.test
      mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
      mysql-test/suite/rpl/t/rpl_init_slave_errors.test
      mysql-test/suite/rpl/t/rpl_show_slave_running.test
      sql/rpl_slave.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test'
--- a/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test	2010-12-19 17:22:30 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test	2011-03-15 19:51:55 +0000
@@ -71,6 +71,13 @@ source include/wait_for_slave_io_error.i
 # now to avoid restarting IO-thread to re-enter it.
 # There will be a new IO thread forked out with its @@session.debug
 # unset.
+
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
+
 eval set @@global.debug = "-d,$dbug_sync_point";
 
 --let $rpl_server_number= 1

=== modified file 'mysql-test/extra/rpl_tests/rpl_stop_middle_group.test'
--- a/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test	2011-02-23 20:01:27 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test	2011-03-15 19:51:55 +0000
@@ -47,6 +47,7 @@ source include/wait_for_slave_sql_to_sto
 --let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 1
 --source include/assert.inc
 
+--source include/stop_slave.inc
 set @@global.debug="-d";
 
 #
@@ -104,6 +105,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
 --let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 0
 --source include/assert.inc
 
+--source include/stop_slave.inc
 set @@global.debug="-d";
 
 #
@@ -146,6 +148,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
 --let $assert_cond= [SELECT MAX(a) AS Val FROM ti, Val, 1] = 1
 --source include/assert.inc
 
+--source include/stop_slave.inc
 set @@global.debug="-d";
 
 #

=== modified file 'mysql-test/suite/rpl/r/rpl_corruption.result'
--- a/mysql-test/suite/rpl/r/rpl_corruption.result	2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/r/rpl_corruption.result	2011-03-15 19:51:55 +0000
@@ -12,32 +12,32 @@ SET @old_master_verify_checksum = @@mast
 CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b VARCHAR(10), c VARCHAR(100));
 include/stop_slave.inc
 # 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
 SHOW BINLOG EVENTS;
 ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 # 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 START SLAVE IO_THREAD;
 include/wait_for_slave_io_error.inc [errno=1236]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 # 4. Master read a corrupted event from binlog and send it to slave
 SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 START SLAVE IO_THREAD;
 include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 SET GLOBAL master_verify_checksum=1;
 # 5. Slave. Corruption in network
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
 START SLAVE IO_THREAD;
 include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
 # 6. Slave. Corruption in relay log
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+START SLAVE SQL_THREAD;
 include/wait_for_slave_sql_error.inc [errno=1593]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 # 7. Seek diff for tables on master and slave
 include/start_slave.inc
 include/diff_tables.inc [master:t1, slave:t1]

=== modified file 'mysql-test/suite/rpl/r/rpl_init_slave_errors.result'
--- a/mysql-test/suite/rpl/r/rpl_init_slave_errors.result	2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/r/rpl_init_slave_errors.result	2011-03-15 19:51:55 +0000
@@ -7,6 +7,9 @@ start slave;
 include/wait_for_slave_sql_error.inc [errno=1593]
 Last_SQL_Error = 'Failed during slave thread initialization'
 call mtr.add_suppression("Failed during slave.* thread initialization");
+include/stop_slave_io.inc
+Warnings:
+Note	1255	Slave already has been stopped
 SET GLOBAL debug= "";
 reset slave;
 SET GLOBAL init_slave= "garbage";

=== modified file 'mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result	2011-02-23 20:01:27 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result	2011-03-15 19:51:55 +0000
@@ -18,6 +18,7 @@ include/check_slave_no_error.inc
 include/assert.inc [Everything that was read, was executed]
 include/assert.inc [There is one row in table tm]
 include/assert.inc [There is one row in table ti]
+include/stop_slave.inc
 set @@global.debug="-d";
 include/start_slave.inc
 truncate table tm;
@@ -35,8 +36,11 @@ Last_SQL_Error = 'Fatal error: ... The s
 include/assert.inc [Not everything that was read, was executed]
 include/assert.inc [There is one row in table tm]
 include/assert.inc [There is no row in table ti]
+include/stop_slave.inc
 set @@global.debug="-d";
 stop slave;
+Warnings:
+Note	1255	Slave already has been stopped
 truncate table tm;
 include/start_slave.inc
 set @@global.debug="+d,stop_slave_middle_group";
@@ -47,6 +51,7 @@ Last_SQL_Error = 'Fatal error: ... The s
 include/assert.inc [Not everything that was read, was executed]
 include/assert.inc [The max value for field 'a' is 2]
 include/assert.inc [The max value for field 'a' is 1]
+include/stop_slave.inc
 set @@global.debug="-d";
 include/rpl_reset.inc
 drop table tm, ti;

=== modified file 'mysql-test/suite/rpl/t/rpl_corruption.test'
--- a/mysql-test/suite/rpl/t/rpl_corruption.test	2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/t/rpl_corruption.test	2011-03-15 19:51:55 +0000
@@ -7,6 +7,14 @@
 #  - in relay log
 ############################################################
 
+#
+# The tests intensively utilize @@global.debug. Note,
+# Bug#11765758 - 58754,
+# updatating the global is crash-safe only if a. slave threads are down
+# or b. it's guaranteed they are standing still and won't execute
+# and DBUG-stack code.
+#
+
 --source include/have_debug.inc
 --source include/master-slave.inc
 
@@ -56,17 +64,10 @@ while ($i) {  
 }
 --enable_query_log
 
-#
-# Bug #58630 shows `+d' syntax has a side effect Andrei changed it to
-# unsigned `d' version which does not affect the tests logics.
-# todo:
-# restore *all* following `SET GLOBAL debug' back to `+|-d' when Bug
-# #58630, OBug#11765758 sorted out
-#
 
 # Emulate corruption in binlog file when SHOW BINLOG EVENTS is executing
 --echo # 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
 --echo SHOW BINLOG EVENTS;
 --disable_query_log
 send_eval SHOW BINLOG EVENTS FROM $pos;
@@ -74,50 +75,50 @@ send_eval SHOW BINLOG EVENTS FROM $pos;
 --error ER_ERROR_WHEN_EXECUTING_COMMAND
 reap;
 
-# see above comments on `+d' syntax
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 
 # Emulate corruption on master with crc checking on master
 --echo # 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 --connection slave
 START SLAVE IO_THREAD;
 let $slave_io_errno= 1236;
 --source include/wait_for_slave_io_error.inc
 --connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 
 # Emulate corruption on master without crc checking on master
 --echo # 4. Master read a corrupted event from binlog and send it to slave
 --connection master
 SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
 --connection slave
 START SLAVE IO_THREAD;
 let $slave_io_errno= 1595,1722;
 --source include/wait_for_slave_io_error.inc
 --connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
 SET GLOBAL master_verify_checksum=1;
 
 # Emulate corruption in network
 --echo # 5. Slave. Corruption in network
 --connection slave
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
 START SLAVE IO_THREAD;
 let $slave_io_errno= 1595,1722;
 --source include/wait_for_slave_io_error.inc
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
 
 # Emulate corruption in relay log
 --echo # 6. Slave. Corruption in relay log
 
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+
+START SLAVE SQL_THREAD;
 let $slave_sql_errno= 1593;
 --source include/wait_for_slave_sql_error.inc
 
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
 
 # Start normal replication and compare same table on master
 # and slave

=== modified file 'mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test'
--- a/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test	2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test	2011-03-15 19:51:55 +0000
@@ -47,6 +47,11 @@ let $dbug_sync_point= 'dbug.before_get_M
 let $debug_sync_action= 'now SIGNAL signal.get_master_uuid';
 source extra/rpl_tests/rpl_get_master_version_and_clock.test;
 
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
 eval set global debug= '$debug_saved';
 
 #Test case 4: This test checks that the slave I/O thread refuses to start

=== modified file 'mysql-test/suite/rpl/t/rpl_init_slave_errors.test'
--- a/mysql-test/suite/rpl/t/rpl_init_slave_errors.test	2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/t/rpl_init_slave_errors.test	2011-03-15 19:51:55 +0000
@@ -60,6 +60,10 @@ start slave;
 
 call mtr.add_suppression("Failed during slave.* thread initialization");
 
+# Bug#11765758 - 58754
+# make sure the dump thread has sent the queries and
+# is inactive to allow safe updating to @@global.debug
+--source include/stop_slave_io.inc
 SET GLOBAL debug= "";
 
 ######################################################################

=== modified file 'mysql-test/suite/rpl/t/rpl_show_slave_running.test'
--- a/mysql-test/suite/rpl/t/rpl_show_slave_running.test	2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_show_slave_running.test	2011-03-15 19:51:55 +0000
@@ -76,9 +76,16 @@ echo Slave_SQL_Running= $status;
 
 # cleanup
 
-connection slave;
+connection master;
+
+# Bug#11765758 - 58754
+# make sure the slave has processed all sent queries and
+# is idling to allow safe updating to @@global.debug
+#connection slave;
+sync_slave_with_master;
 
 eval set global debug= '$debug_saved';
+
 SET DEBUG_SYNC= 'RESET';
 --echo End of tests
 --source include/rpl_end.inc

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2011-03-14 13:55:44 +0000
+++ b/sql/rpl_slave.cc	2011-03-15 19:51:55 +0000
@@ -4372,7 +4372,7 @@ static int queue_event(Master_info* mi,c
       int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
       debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
       DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
-      DBUG_SET("");
+      DBUG_SET("-d,corrupt_queue_event");
     }
   );
                                               


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110315195155-hnmtv32sp5ht7p9j.bundle
Thread
bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758Andrei Elkin15 Mar