Hi Serge,
sync_with_master is still around, which causes problems. Please,
replace it with:
-- connection master
-- sync_slave_with_master
Also, there are some indentation issues, check them in-line.
Given that these are such a small changes, I will approve the patch
anyway.
Regards,
Luís Soares
On 10/31/2010 08:06 PM, Serge Kozlov wrote:
> #At file:///home/ksm/oracle/repo/WL5064/mysql-next-mr-wl2540-commit-2/ based on
> revid:andrei.elkin@stripped
>
> 3188 Serge Kozlov 2010-10-31
> WL#5064 Emulate the corruption of events for replication at vary stages:
> reading from binlog/relay log, receiving by slave IO thread from network
>
> added:
> mysql-test/suite/rpl/r/rpl_corruption.result
> mysql-test/suite/rpl/t/rpl_corruption-master.opt
> mysql-test/suite/rpl/t/rpl_corruption-slave.opt
> mysql-test/suite/rpl/t/rpl_corruption.test
> modified:
> sql/log_event.cc
> sql/mysqld.cc
> sql/rpl_slave.cc
> === added file 'mysql-test/suite/rpl/r/rpl_corruption.result'
> --- a/mysql-test/suite/rpl/r/rpl_corruption.result 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/r/rpl_corruption.result 2010-10-31 20:05:57 +0000
> @@ -0,0 +1,46 @@
> +stop slave;
> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> +reset master;
> +reset slave;
> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> +start slave;
> +call mtr.add_suppression('Found invalid event in binary log');
> +call mtr.add_suppression('Slave I/O: Relay log write failure: could not queue event
> from master');
> +call mtr.add_suppression('event read from binlog did not pass crc check');
> +call mtr.add_suppression('Replication event checksum verification failed');
> +SET @old_master_verify_checksum = @@master_verify_checksum;
> +# 1. Creating test table/data and set corruption position for testing
> +* insert/update/delete rows in table t1 *
> +CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b VARCHAR(10), c TEXT);
> +include/stop_slave.inc
> +# 2. Corruption in master binlog and SHOW BINLOG EVENTS
> +SET GLOBAL debug="+d,corrupt_read_log_event_char";
> +SHOW BINLOG EVENTS;
> +ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O
> error
> +SET GLOBAL debug="-d,corrupt_read_log_event_char";
> +# 3. Master read a corrupted event from binlog and send the error to slave
> +SET GLOBAL debug="+d,corrupt_read_log_event";
> +START SLAVE IO_THREAD;
> +SET GLOBAL debug="-d,corrupt_read_log_event";
> +# 4. Master read a corrupted event from binlog and send it to slave
> +SET GLOBAL master_verify_checksum=0;
> +SET GLOBAL debug="+d,corrupt_read_log_event";
> +START SLAVE IO_THREAD;
> +SET GLOBAL debug="-d,corrupt_read_log_event";
> +SET GLOBAL master_verify_checksum=1;
> +# 5. Slave. Corruption in network
> +SET GLOBAL debug="+d,corrupt_queue_event";
> +START SLAVE IO_THREAD;
> +SET GLOBAL debug="-d,corrupt_queue_event";
> +# 6. Slave. Corruption in relay log
> +SET GLOBAL debug="+d,corrupt_read_log_event_char";
> +START SLAVE;
> +SET GLOBAL debug="-d,corrupt_read_log_event_char";
> +# 7. Seek diff for tables on master and slave
> +include/start_slave.inc
> +Comparing tables master:test.t1 and slave:test.t1
> +# 8. Clean up
> +SET GLOBAL debug= "";
> +SET GLOBAL master_verify_checksum = @old_master_verify_checksum;
> +DROP TABLE t1;
> +SET GLOBAL debug= "";
>
> === added file 'mysql-test/suite/rpl/t/rpl_corruption-master.opt'
> --- a/mysql-test/suite/rpl/t/rpl_corruption-master.opt 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_corruption-master.opt 2010-10-31 20:05:57 +0000
> @@ -0,0 +1 @@
> +--binlog-checksum=CRC32 --master-verify-checksum=1
>
> === added file 'mysql-test/suite/rpl/t/rpl_corruption-slave.opt'
> --- a/mysql-test/suite/rpl/t/rpl_corruption-slave.opt 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_corruption-slave.opt 2010-10-31 20:05:57 +0000
> @@ -0,0 +1 @@
> +--binlog-checksum=CRC32 --slave-sql-verify-checksum=1
>
> === added file 'mysql-test/suite/rpl/t/rpl_corruption.test'
> --- a/mysql-test/suite/rpl/t/rpl_corruption.test 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_corruption.test 2010-10-31 20:05:57 +0000
> @@ -0,0 +1,127 @@
> +############################################################
> +# Author: Serge Kozlov<serge.kozlov@stripped>
> +# Date: 17 Oct 2010
> +# Purpose: WL#5064 Testing with corrupted events.
> +# The test emulates the corruption at the vary stages
> +# of replication:
> +# - in binlog file
> +# - in network
> +# - in relay log
> +############################################################
> +
> +--source include/have_debug.inc
> +--source include/master-slave.inc
> +
> +# Block legal errors for MTR
> +call mtr.add_suppression('Found invalid event in binary log');
> +call mtr.add_suppression('Slave I/O: Relay log write failure: could not queue event
> from master');
> +call mtr.add_suppression('event read from binlog did not pass crc check');
> +call mtr.add_suppression('Replication event checksum verification failed');
> +
> +SET @old_master_verify_checksum = @@master_verify_checksum;
> +
> +# Creating test table/data and set corruption position for testing
> +--echo # 1. Creating test table/data and set corruption position for testing
> +--connection master
> +--echo * insert/update/delete rows in table t1 *
> +# Corruption algorithm modifies only the first event and
> +# then will be reset. To avoid checking always the first event
> +# from binlog (usually it is FD) we randomly execute different
> +# statements and set position for corruption inside events.
> +
> +CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b VARCHAR(10), c TEXT);
> +--disable_query_log
> +let $i=`SELECT 3+CEILING(10*RAND())`;
> +let $j=1;
> +let $pos=0;
> +while ($i) {
> + eval INSERT INTO t1 VALUES ($j, 'a', NULL);
> + if (`SELECT RAND()> 0.7`)
> + {
> + eval UPDATE t1 SET c = REPEAT('a', 20) WHERE a = $j;
> + }
> + if (`SELECT RAND()> 0.8`)
> + {
> + eval DELETE FROM t1 WHERE a = $j;
> + }
> + if (!$pos) {
> + let $pos= query_get_value(SHOW MASTER STATUS, Position, 1);
> + --sync_slave_with_master
> + --source include/stop_slave.inc
> + --disable_query_log
> + --connection master
> + }
> + dec $i;
> + inc $j;
> +}
> +--enable_query_log
> +
> +# Emulate corruption in binlog file when SHOW BINLOG EVENTS is executing
> +--echo # 2. Corruption in master binlog and SHOW BINLOG EVENTS
> +SET GLOBAL debug="+d,corrupt_read_log_event_char";
> +--echo SHOW BINLOG EVENTS;
> +--disable_query_log
> +send_eval SHOW BINLOG EVENTS FROM $pos;
> +--enable_query_log
> +--error ER_ERROR_WHEN_EXECUTING_COMMAND
> +reap;
> +SET GLOBAL debug="-d,corrupt_read_log_event_char";
> +
> +# Emulate corruption on master with crc checking on master
> +--echo # 3. Master read a corrupted event from binlog and send the error to slave
> +SET GLOBAL debug="+d,corrupt_read_log_event";
> +--connection slave
> +START SLAVE IO_THREAD;
> +let $slave_io_errno= 1236;
> +--source include/wait_for_slave_io_error.inc
> +--connection master
> +SET GLOBAL debug="-d,corrupt_read_log_event";
> +
> +# Emulate corruption on master without crc checking on master
> +--echo # 4. Master read a corrupted event from binlog and send it to slave
> +--connection master
> +SET GLOBAL master_verify_checksum=0;
> +SET GLOBAL debug="+d,corrupt_read_log_event";
> +--connection slave
> +START SLAVE IO_THREAD;
> +let $slave_io_errno= 1595;
> +--source include/wait_for_slave_io_error.inc
> +--connection master
> +SET GLOBAL debug="-d,corrupt_read_log_event";
> +SET GLOBAL master_verify_checksum=1;
> +
> +# Emulate corruption in network
> +--echo # 5. Slave. Corruption in network
> +--connection slave
> +SET GLOBAL debug="+d,corrupt_queue_event";
> +START SLAVE IO_THREAD;
> +let $slave_io_errno= 1595;
> +--source include/wait_for_slave_io_error.inc
> +SET GLOBAL debug="-d,corrupt_queue_event";
> +
> +# Emulate corruption in relay log
> +--echo # 6. Slave. Corruption in relay log
> +SET GLOBAL debug="+d,corrupt_read_log_event_char";
> +START SLAVE;
> +let $slave_sql_errno= 1593;
> +--source include/wait_for_slave_sql_error.inc
> +SET GLOBAL debug="-d,corrupt_read_log_event_char";
> +
> +# Start normal replication and compare same table on master
> +# and slave
> +--echo # 7. Seek diff for tables on master and slave
> +--connection slave
> +--source include/start_slave.inc
> +--sync_with_master
-- connection master
-- sync_slave_with_master
> +let $diff_table_1= master:test.t1;
> +let $diff_table_2= slave:test.t1;
> +--source include/diff_tables.inc
> +
> +# Clean up
> +--echo # 8. Clean up
> +--connection master
> +SET GLOBAL debug= "";
> +SET GLOBAL master_verify_checksum = @old_master_verify_checksum;
> +DROP TABLE t1;
> +--sync_slave_with_master
> +SET GLOBAL debug= "";
>
> === modified file 'sql/log_event.cc'
> --- a/sql/log_event.cc 2010-10-25 19:02:24 +0000
> +++ b/sql/log_event.cc 2010-10-31 20:05:57 +0000
> @@ -1240,6 +1240,17 @@ int Log_event::read_log_event(IO_CACHE*
> }
> else
> {
> + /* Corrupt the event for Dump thread*/
> + DBUG_EXECUTE_IF("corrupt_read_log_event",
> + uchar *debug_event_buf_c = (uchar*) packet->ptr() + ev_offset;
> + if (debug_event_buf_c[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
> + {
> + int debug_cor_pos = rand() % (data_len + sizeof(buf) -
> BINLOG_CHECKSUM_LEN);
> + debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
> + DBUG_PRINT("info", ("Corrupt the event at Log_event::read_log_event: byte on
> position %d", debug_cor_pos));
> + DBUG_SET("-d,corrupt_read_log_event");
Please fix the indentation.
> + }
> + );
> /*
> CRC verification of the Dump thread
> */
> @@ -1425,7 +1436,17 @@ Log_event* Log_event::read_log_event(con
> */
> alg= (event_type != FORMAT_DESCRIPTION_EVENT) ?
> description_event->checksum_alg : get_checksum_alg(buf, event_len);
> -
> + // Emulate the corruption during reading an event
> + DBUG_EXECUTE_IF("corrupt_read_log_event_char",
> + if (event_type != FORMAT_DESCRIPTION_EVENT)
> + {
> + char *debug_event_buf_c = (char *)buf;
> + int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
> + debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
> + DBUG_PRINT("info", ("Corrupt the event at
> Log_event::read_log_event(char*,...): byte on position %d", debug_cor_pos));
> + DBUG_SET("-d,corrupt_read_log_event_char");
> + }
> + );
> if (crc_check&&
> event_checksum_test((uchar *) buf, event_len, alg))
> {
>
> === modified file 'sql/mysqld.cc'
> --- a/sql/mysqld.cc 2010-10-25 19:02:24 +0000
> +++ b/sql/mysqld.cc 2010-10-31 20:05:57 +0000
> @@ -4572,6 +4572,7 @@ int mysqld_main(int argc, char **argv)
>
> #ifndef DBUG_OFF
> test_lc_time_sz();
> + srand(time(NULL));
> #endif
>
> /*
>
> === modified file 'sql/rpl_slave.cc'
> --- a/sql/rpl_slave.cc 2010-10-27 10:23:24 +0000
> +++ b/sql/rpl_slave.cc 2010-10-31 20:05:57 +0000
> @@ -4333,7 +4333,19 @@ static int queue_event(Master_info* mi,c
> // will have to refine the clause.
> DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
> BINLOG_CHECKSUM_ALG_UNDEF);
> -
> +
> + // Emulate the network corruption
> + DBUG_EXECUTE_IF("corrupt_queue_event",
> + if (buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
> + {
> + char *debug_event_buf_c = (char*) buf;
> + int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
> + debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
> + DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d",
> debug_cor_pos));
> + DBUG_SET("-d,corrupt_queue_event");
> + }
> + );
> +
> if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
> {
> error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
>
>
>
>
>