#At file:///home/andrei/MySQL/BZR/2a-23May/mysql-trunk/ based on revid:chuck.bell@stripped
3766 Andrei Elkin 2011-03-15
Bug#11765758 - 58754 rpl_corruption fails.
There are two issues: io thread error code mismatch and a crash.
The mismatch is benign and is possible due to the current style of error reporting allowing
multiple errors to be pushed.
The crash is caused by concurrent access of @@global.debug reader (IO thread) and a user
connection that updates the var at that time.
Fixed with modifying rpl_corruption and few other potentially vulnerable tests, adding notes
to the replication failure simulator writer.
@ mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
added a note explaining why @@global.debug is safe to update at that point.
@ mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
made set global debug safe with the explicit stopping the slave prior to the assignement.
@ mysql-test/suite/rpl/r/rpl_corruption.result
results updated.
@ mysql-test/suite/rpl/r/rpl_init_slave_errors.result
results updated.
@ mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
results updated.
@ mysql-test/suite/rpl/t/rpl_corruption.test
added a note explaining how @@global.debug can be updated safely.
@ mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
added a note explaining why @@global.debug is safe to update at that point.
@ mysql-test/suite/rpl/t/rpl_init_slave_errors.test
made set global debug safe with the explicit stopping the slave prior to the assignement.
@ mysql-test/suite/rpl/t/rpl_show_slave_running.test
made set global debug safe with syncing the slave prior to the assignement.
modified:
mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
mysql-test/suite/rpl/r/rpl_corruption.result
mysql-test/suite/rpl/r/rpl_init_slave_errors.result
mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
mysql-test/suite/rpl/t/rpl_corruption.test
mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
mysql-test/suite/rpl/t/rpl_init_slave_errors.test
mysql-test/suite/rpl/t/rpl_show_slave_running.test
sql/rpl_slave.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test'
--- a/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test 2010-12-19 17:22:30 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test 2011-03-15 19:51:55 +0000
@@ -71,6 +71,13 @@ source include/wait_for_slave_io_error.i
# now to avoid restarting IO-thread to re-enter it.
# There will be a new IO thread forked out with its @@session.debug
# unset.
+
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
+
eval set @@global.debug = "-d,$dbug_sync_point";
--let $rpl_server_number= 1
=== modified file 'mysql-test/extra/rpl_tests/rpl_stop_middle_group.test'
--- a/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test 2011-02-23 20:01:27 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test 2011-03-15 19:51:55 +0000
@@ -47,6 +47,7 @@ source include/wait_for_slave_sql_to_sto
--let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 1
--source include/assert.inc
+--source include/stop_slave.inc
set @@global.debug="-d";
#
@@ -104,6 +105,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
--let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 0
--source include/assert.inc
+--source include/stop_slave.inc
set @@global.debug="-d";
#
@@ -146,6 +148,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
--let $assert_cond= [SELECT MAX(a) AS Val FROM ti, Val, 1] = 1
--source include/assert.inc
+--source include/stop_slave.inc
set @@global.debug="-d";
#
=== modified file 'mysql-test/suite/rpl/r/rpl_corruption.result'
--- a/mysql-test/suite/rpl/r/rpl_corruption.result 2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/r/rpl_corruption.result 2011-03-15 19:51:55 +0000
@@ -12,32 +12,32 @@ SET @old_master_verify_checksum = @@mast
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b VARCHAR(10), c VARCHAR(100));
include/stop_slave.inc
# 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
SHOW BINLOG EVENTS;
ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
START SLAVE IO_THREAD;
include/wait_for_slave_io_error.inc [errno=1236]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
# 4. Master read a corrupted event from binlog and send it to slave
SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
START SLAVE IO_THREAD;
include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
SET GLOBAL master_verify_checksum=1;
# 5. Slave. Corruption in network
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
START SLAVE IO_THREAD;
include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
# 6. Slave. Corruption in relay log
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+START SLAVE SQL_THREAD;
include/wait_for_slave_sql_error.inc [errno=1593]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# 7. Seek diff for tables on master and slave
include/start_slave.inc
include/diff_tables.inc [master:t1, slave:t1]
=== modified file 'mysql-test/suite/rpl/r/rpl_init_slave_errors.result'
--- a/mysql-test/suite/rpl/r/rpl_init_slave_errors.result 2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/r/rpl_init_slave_errors.result 2011-03-15 19:51:55 +0000
@@ -7,6 +7,9 @@ start slave;
include/wait_for_slave_sql_error.inc [errno=1593]
Last_SQL_Error = 'Failed during slave thread initialization'
call mtr.add_suppression("Failed during slave.* thread initialization");
+include/stop_slave_io.inc
+Warnings:
+Note 1255 Slave already has been stopped
SET GLOBAL debug= "";
reset slave;
SET GLOBAL init_slave= "garbage";
=== modified file 'mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result 2011-02-23 20:01:27 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result 2011-03-15 19:51:55 +0000
@@ -18,6 +18,7 @@ include/check_slave_no_error.inc
include/assert.inc [Everything that was read, was executed]
include/assert.inc [There is one row in table tm]
include/assert.inc [There is one row in table ti]
+include/stop_slave.inc
set @@global.debug="-d";
include/start_slave.inc
truncate table tm;
@@ -35,8 +36,11 @@ Last_SQL_Error = 'Fatal error: ... The s
include/assert.inc [Not everything that was read, was executed]
include/assert.inc [There is one row in table tm]
include/assert.inc [There is no row in table ti]
+include/stop_slave.inc
set @@global.debug="-d";
stop slave;
+Warnings:
+Note 1255 Slave already has been stopped
truncate table tm;
include/start_slave.inc
set @@global.debug="+d,stop_slave_middle_group";
@@ -47,6 +51,7 @@ Last_SQL_Error = 'Fatal error: ... The s
include/assert.inc [Not everything that was read, was executed]
include/assert.inc [The max value for field 'a' is 2]
include/assert.inc [The max value for field 'a' is 1]
+include/stop_slave.inc
set @@global.debug="-d";
include/rpl_reset.inc
drop table tm, ti;
=== modified file 'mysql-test/suite/rpl/t/rpl_corruption.test'
--- a/mysql-test/suite/rpl/t/rpl_corruption.test 2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/t/rpl_corruption.test 2011-03-15 19:51:55 +0000
@@ -7,6 +7,14 @@
# - in relay log
############################################################
+#
+# The tests intensively utilize @@global.debug. Note,
+# Bug#11765758 - 58754,
+# updatating the global is crash-safe only if a. slave threads are down
+# or b. it's guaranteed they are standing still and won't execute
+# and DBUG-stack code.
+#
+
--source include/have_debug.inc
--source include/master-slave.inc
@@ -56,17 +64,10 @@ while ($i) {
}
--enable_query_log
-#
-# Bug #58630 shows `+d' syntax has a side effect Andrei changed it to
-# unsigned `d' version which does not affect the tests logics.
-# todo:
-# restore *all* following `SET GLOBAL debug' back to `+|-d' when Bug
-# #58630, OBug#11765758 sorted out
-#
# Emulate corruption in binlog file when SHOW BINLOG EVENTS is executing
--echo # 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
--echo SHOW BINLOG EVENTS;
--disable_query_log
send_eval SHOW BINLOG EVENTS FROM $pos;
@@ -74,50 +75,50 @@ send_eval SHOW BINLOG EVENTS FROM $pos;
--error ER_ERROR_WHEN_EXECUTING_COMMAND
reap;
-# see above comments on `+d' syntax
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# Emulate corruption on master with crc checking on master
--echo # 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
--connection slave
START SLAVE IO_THREAD;
let $slave_io_errno= 1236;
--source include/wait_for_slave_io_error.inc
--connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
# Emulate corruption on master without crc checking on master
--echo # 4. Master read a corrupted event from binlog and send it to slave
--connection master
SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
--connection slave
START SLAVE IO_THREAD;
let $slave_io_errno= 1595,1722;
--source include/wait_for_slave_io_error.inc
--connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
SET GLOBAL master_verify_checksum=1;
# Emulate corruption in network
--echo # 5. Slave. Corruption in network
--connection slave
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
START SLAVE IO_THREAD;
let $slave_io_errno= 1595,1722;
--source include/wait_for_slave_io_error.inc
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
# Emulate corruption in relay log
--echo # 6. Slave. Corruption in relay log
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+
+START SLAVE SQL_THREAD;
let $slave_sql_errno= 1593;
--source include/wait_for_slave_sql_error.inc
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# Start normal replication and compare same table on master
# and slave
=== modified file 'mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test'
--- a/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test 2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test 2011-03-15 19:51:55 +0000
@@ -47,6 +47,11 @@ let $dbug_sync_point= 'dbug.before_get_M
let $debug_sync_action= 'now SIGNAL signal.get_master_uuid';
source extra/rpl_tests/rpl_get_master_version_and_clock.test;
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
eval set global debug= '$debug_saved';
#Test case 4: This test checks that the slave I/O thread refuses to start
=== modified file 'mysql-test/suite/rpl/t/rpl_init_slave_errors.test'
--- a/mysql-test/suite/rpl/t/rpl_init_slave_errors.test 2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/t/rpl_init_slave_errors.test 2011-03-15 19:51:55 +0000
@@ -60,6 +60,10 @@ start slave;
call mtr.add_suppression("Failed during slave.* thread initialization");
+# Bug#11765758 - 58754
+# make sure the dump thread has sent the queries and
+# is inactive to allow safe updating to @@global.debug
+--source include/stop_slave_io.inc
SET GLOBAL debug= "";
######################################################################
=== modified file 'mysql-test/suite/rpl/t/rpl_show_slave_running.test'
--- a/mysql-test/suite/rpl/t/rpl_show_slave_running.test 2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_show_slave_running.test 2011-03-15 19:51:55 +0000
@@ -76,9 +76,16 @@ echo Slave_SQL_Running= $status;
# cleanup
-connection slave;
+connection master;
+
+# Bug#11765758 - 58754
+# make sure the slave has processed all sent queries and
+# is idling to allow safe updating to @@global.debug
+#connection slave;
+sync_slave_with_master;
eval set global debug= '$debug_saved';
+
SET DEBUG_SYNC= 'RESET';
--echo End of tests
--source include/rpl_end.inc
=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc 2011-03-14 13:55:44 +0000
+++ b/sql/rpl_slave.cc 2011-03-15 19:51:55 +0000
@@ -4372,7 +4372,7 @@ static int queue_event(Master_info* mi,c
int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
- DBUG_SET("");
+ DBUG_SET("-d,corrupt_queue_event");
}
);
Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110315195155-hnmtv32sp5ht7p9j.bundle
| Thread |
|---|
| • bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758 | Andrei Elkin | 15 Mar |