#At file:///home/andrei/MySQL/BZR/2a-23May/mysql-trunk/ based on revid:chuck.bell@stripped
3766 Andrei Elkin 2011-03-17
Bug#11765758 - 58754 rpl_corruption fails.
There is a crash issue.
It is caused by concurrent access of @@global.debug reader (IO thread) and a user
connection that updates the var at that time.
When @@session.debug == "", it actually points to @@global.debug. So
if client X has @@session.debug == "", and then @@global.debug is set
to "foo", then the debug symbol "foo" is active also in client X.
The slave threads always have @@session.debug == "". Hence, the slave
threads are affected by the value of @@global.debug. So if we need to
set @@global.debug but do not want it to affect the slave threads,
then we have to ensure that the slave threads are either stopped or
synced.
Fixed with modifying rpl_corruption and few other potentially vulnerable tests, adding notes
to the replication failure simulator writer.
The rule of thumb for using @@global.debug in replication thread failure simulation:
-----------------------------------------------------------------------------------
Update the variable when the slave threads or the master dump thread are down or they
are guaranteed to stay idle e.g
in waiting for an event. Changing the value while a replication thread is UP can cause
reading a partial results of the change to end up in a crash.
For the dump thread one has to consider that a possible small hearbeat period won't break
waiting for an event in the binlog.
@ mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
added a note explaining why @@global.debug is safe to update at that point.
@ mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
made set global debug safe with the explicit stopping the slave prior to the assignement.
@ mysql-test/suite/rpl/r/rpl_corruption.result
results updated.
@ mysql-test/suite/rpl/r/rpl_init_slave_errors.result
results updated.
@ mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
results updated.
@ mysql-test/suite/rpl/t/rpl_corruption.test
added a note explaining how @@global.debug can be updated safely.
@ mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
added a note explaining why @@global.debug is safe to update at that point.
@ mysql-test/suite/rpl/t/rpl_init_slave_errors.test
made set global debug safe with the explicit stopping the slave prior to the assignement.
@ mysql-test/suite/rpl/t/rpl_show_slave_running.test
made set global debug safe with syncing the slave prior to the assignement.
modified:
mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test
mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
mysql-test/suite/rpl/r/rpl_corruption.result
mysql-test/suite/rpl/r/rpl_init_slave_errors.result
mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
mysql-test/suite/rpl/t/rpl_corruption.test
mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test
mysql-test/suite/rpl/t/rpl_init_slave_errors.test
mysql-test/suite/rpl/t/rpl_show_slave_running.test
sql/rpl_slave.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test'
--- a/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test 2010-12-19 17:22:30 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_get_master_version_and_clock.test 2011-03-17 18:13:10 +0000
@@ -71,6 +71,13 @@ source include/wait_for_slave_io_error.i
# now to avoid restarting IO-thread to re-enter it.
# There will be a new IO thread forked out with its @@session.debug
# unset.
+
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
+
eval set @@global.debug = "-d,$dbug_sync_point";
--let $rpl_server_number= 1
=== modified file 'mysql-test/extra/rpl_tests/rpl_stop_middle_group.test'
--- a/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test 2011-02-23 20:01:27 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_stop_middle_group.test 2011-03-17 18:13:10 +0000
@@ -47,6 +47,7 @@ source include/wait_for_slave_sql_to_sto
--let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 1
--source include/assert.inc
+--source include/stop_slave.inc
set @@global.debug="-d";
#
@@ -104,6 +105,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
--let $assert_cond= [SELECT COUNT(*) AS Val FROM ti, Val, 1] = 0
--source include/assert.inc
+--source include/stop_slave.inc
set @@global.debug="-d";
#
@@ -146,6 +148,7 @@ let $slave_sql_errno= 1593; # ER_SLAVE_F
--let $assert_cond= [SELECT MAX(a) AS Val FROM ti, Val, 1] = 1
--source include/assert.inc
+--source include/stop_slave.inc
set @@global.debug="-d";
#
=== modified file 'mysql-test/suite/rpl/r/rpl_corruption.result'
--- a/mysql-test/suite/rpl/r/rpl_corruption.result 2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/r/rpl_corruption.result 2011-03-17 18:13:10 +0000
@@ -12,32 +12,32 @@ SET @old_master_verify_checksum = @@mast
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b VARCHAR(10), c VARCHAR(100));
include/stop_slave.inc
# 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
SHOW BINLOG EVENTS;
ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
START SLAVE IO_THREAD;
include/wait_for_slave_io_error.inc [errno=1236]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
# 4. Master read a corrupted event from binlog and send it to slave
SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
START SLAVE IO_THREAD;
include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
SET GLOBAL master_verify_checksum=1;
# 5. Slave. Corruption in network
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
START SLAVE IO_THREAD;
include/wait_for_slave_io_error.inc [errno=1595,1722]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
# 6. Slave. Corruption in relay log
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+START SLAVE SQL_THREAD;
include/wait_for_slave_sql_error.inc [errno=1593]
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# 7. Seek diff for tables on master and slave
include/start_slave.inc
include/diff_tables.inc [master:t1, slave:t1]
=== modified file 'mysql-test/suite/rpl/r/rpl_init_slave_errors.result'
--- a/mysql-test/suite/rpl/r/rpl_init_slave_errors.result 2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/r/rpl_init_slave_errors.result 2011-03-17 18:13:10 +0000
@@ -7,6 +7,9 @@ start slave;
include/wait_for_slave_sql_error.inc [errno=1593]
Last_SQL_Error = 'Failed during slave thread initialization'
call mtr.add_suppression("Failed during slave.* thread initialization");
+include/stop_slave_io.inc
+Warnings:
+Note 1255 Slave already has been stopped
SET GLOBAL debug= "";
reset slave;
SET GLOBAL init_slave= "garbage";
=== modified file 'mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result 2011-02-23 20:01:27 +0000
+++ b/mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result 2011-03-17 18:13:10 +0000
@@ -18,6 +18,7 @@ include/check_slave_no_error.inc
include/assert.inc [Everything that was read, was executed]
include/assert.inc [There is one row in table tm]
include/assert.inc [There is one row in table ti]
+include/stop_slave.inc
set @@global.debug="-d";
include/start_slave.inc
truncate table tm;
@@ -35,8 +36,11 @@ Last_SQL_Error = 'Fatal error: ... The s
include/assert.inc [Not everything that was read, was executed]
include/assert.inc [There is one row in table tm]
include/assert.inc [There is no row in table ti]
+include/stop_slave.inc
set @@global.debug="-d";
stop slave;
+Warnings:
+Note 1255 Slave already has been stopped
truncate table tm;
include/start_slave.inc
set @@global.debug="+d,stop_slave_middle_group";
@@ -47,6 +51,7 @@ Last_SQL_Error = 'Fatal error: ... The s
include/assert.inc [Not everything that was read, was executed]
include/assert.inc [The max value for field 'a' is 2]
include/assert.inc [The max value for field 'a' is 1]
+include/stop_slave.inc
set @@global.debug="-d";
include/rpl_reset.inc
drop table tm, ti;
=== modified file 'mysql-test/suite/rpl/t/rpl_corruption.test'
--- a/mysql-test/suite/rpl/t/rpl_corruption.test 2011-03-13 19:56:40 +0000
+++ b/mysql-test/suite/rpl/t/rpl_corruption.test 2011-03-17 18:13:10 +0000
@@ -7,6 +7,14 @@
# - in relay log
############################################################
+#
+# The tests intensively utilize @@global.debug. Note,
+# Bug#11765758 - 58754,
+# @@global.debug is read by the slave threads through dbug-interface.
+# Hence, before a client thread set @@global.debug we have to ensure that:
+# (a) the slave threads are stopped, or (b) the slave threads are in
+# sync and waiting.
+
--source include/have_debug.inc
--source include/master-slave.inc
@@ -56,17 +64,10 @@ while ($i) {
}
--enable_query_log
-#
-# Bug #58630 shows `+d' syntax has a side effect Andrei changed it to
-# unsigned `d' version which does not affect the tests logics.
-# todo:
-# restore *all* following `SET GLOBAL debug' back to `+|-d' when Bug
-# #58630, OBug#11765758 sorted out
-#
# Emulate corruption in binlog file when SHOW BINLOG EVENTS is executing
--echo # 2. Corruption in master binlog and SHOW BINLOG EVENTS
-SET GLOBAL debug="d,corrupt_read_log_event_char";
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
--echo SHOW BINLOG EVENTS;
--disable_query_log
send_eval SHOW BINLOG EVENTS FROM $pos;
@@ -74,50 +75,50 @@ send_eval SHOW BINLOG EVENTS FROM $pos;
--error ER_ERROR_WHEN_EXECUTING_COMMAND
reap;
-# see above comments on `+d' syntax
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# Emulate corruption on master with crc checking on master
--echo # 3. Master read a corrupted event from binlog and send the error to slave
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
--connection slave
START SLAVE IO_THREAD;
let $slave_io_errno= 1236;
--source include/wait_for_slave_io_error.inc
--connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
# Emulate corruption on master without crc checking on master
--echo # 4. Master read a corrupted event from binlog and send it to slave
--connection master
SET GLOBAL master_verify_checksum=0;
-SET GLOBAL debug="d,corrupt_read_log_event";
+SET GLOBAL debug="+d,corrupt_read_log_event";
--connection slave
START SLAVE IO_THREAD;
let $slave_io_errno= 1595,1722;
--source include/wait_for_slave_io_error.inc
--connection master
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event";
SET GLOBAL master_verify_checksum=1;
# Emulate corruption in network
--echo # 5. Slave. Corruption in network
--connection slave
-SET GLOBAL debug="d,corrupt_queue_event";
+SET GLOBAL debug="+d,corrupt_queue_event";
START SLAVE IO_THREAD;
let $slave_io_errno= 1595,1722;
--source include/wait_for_slave_io_error.inc
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_queue_event";
# Emulate corruption in relay log
--echo # 6. Slave. Corruption in relay log
-SET GLOBAL debug="d,corrupt_read_log_event_char";
-START SLAVE;
+SET GLOBAL debug="+d,corrupt_read_log_event_char";
+
+START SLAVE SQL_THREAD;
let $slave_sql_errno= 1593;
--source include/wait_for_slave_sql_error.inc
-SET GLOBAL debug="";
+SET GLOBAL debug="-d,corrupt_read_log_event_char";
# Start normal replication and compare same table on master
# and slave
=== modified file 'mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test'
--- a/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test 2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_get_master_version_and_clock.test 2011-03-17 18:13:10 +0000
@@ -47,6 +47,11 @@ let $dbug_sync_point= 'dbug.before_get_M
let $debug_sync_action= 'now SIGNAL signal.get_master_uuid';
source extra/rpl_tests/rpl_get_master_version_and_clock.test;
+#
+# Note, due to # Bug#11765758 - 58754
+# make sure the slave threads stand still (SQL thread in this context)
+# while @@global.debug is being updated.
+#
eval set global debug= '$debug_saved';
#Test case 4: This test checks that the slave I/O thread refuses to start
=== modified file 'mysql-test/suite/rpl/t/rpl_init_slave_errors.test'
--- a/mysql-test/suite/rpl/t/rpl_init_slave_errors.test 2011-02-23 09:31:37 +0000
+++ b/mysql-test/suite/rpl/t/rpl_init_slave_errors.test 2011-03-17 18:13:10 +0000
@@ -60,6 +60,9 @@ start slave;
call mtr.add_suppression("Failed during slave.* thread initialization");
+# Bug#11765758 - 58754
+# make sure the IO thread is down to allow safe updating to @@global.debug
+--source include/stop_slave_io.inc
SET GLOBAL debug= "";
######################################################################
=== modified file 'mysql-test/suite/rpl/t/rpl_show_slave_running.test'
--- a/mysql-test/suite/rpl/t/rpl_show_slave_running.test 2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_show_slave_running.test 2011-03-17 18:13:10 +0000
@@ -76,9 +76,17 @@ echo Slave_SQL_Running= $status;
# cleanup
-connection slave;
+connection master;
+
+# Bug#11765758 - 58754
+# @@global.debug is read by the slave threads through dbug-interface.
+# Hence, before a client thread set @@global.debug we have to ensure that:
+# (a) the slave threads are stopped, or (b) the slave threads are in
+# sync and waiting.
+sync_slave_with_master;
eval set global debug= '$debug_saved';
+
SET DEBUG_SYNC= 'RESET';
--echo End of tests
--source include/rpl_end.inc
=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc 2011-03-14 13:55:44 +0000
+++ b/sql/rpl_slave.cc 2011-03-17 18:13:10 +0000
@@ -4372,7 +4372,7 @@ static int queue_event(Master_info* mi,c
int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
- DBUG_SET("");
+ DBUG_SET("-d,corrupt_queue_event");
}
);
Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110317181310-ekvwso1nrtua16dn.bundle
| Thread |
|---|
| • bzr commit into mysql-trunk branch (andrei.elkin:3766) Bug#11765758 | Andrei Elkin | 17 Mar |