Hi Libing,
Nice work. See comments in-line.
STATUS
------------------
Not approved.
Best Regards,
Daogang
2010-11-17 17:55, Li-Bing.Song@stripped wrote:
> #At file:///home/anders/work/bzrwork1/wt2/mysql-next-mr-bugfixing/ based on
> revid:alexander.nozdrin@stripped
>
> 3384 Li-Bing.Song@stripped 2010-11-17
> Bug#21437 server_errno=29 error message flood mysqld error log
>
> If an error happens while dumping a binary log from the master,
> the master sends one of the specific set of error messages and
> the slave I/O thread stops immediately.
>
> However, when a slave requests a binlog file which does not exist
> on master, the master sends 'EE_FILENOTFOUND' error (i.e. error code 29)
> to the slave. the 'EE_FILENOTFOUND' is a low level error and slave
> I/O thread will still retry to request the binlog file, thus is flooding
> the mysqld error log.
>
> This happens because the master just sends the first error message set in
> the diagnostic area and as such the I/O thread does not stop. To fix the
> the problem, we reset the disagnostic area before setting the
> ER_MASTER_FATAL_ERROR_READING_BINLOG in order to notify and stop the slave
> I/O thread immediately.
>
> modified:
> mysql-test/suite/rpl/r/rpl_manual_change_index_file.result
> mysql-test/suite/rpl/t/rpl_manual_change_index_file.test
> sql/rpl_master.cc
> === modified file 'mysql-test/suite/rpl/r/rpl_manual_change_index_file.result'
> --- a/mysql-test/suite/rpl/r/rpl_manual_change_index_file.result 2010-01-08 05:42:23
> +0000
> +++ b/mysql-test/suite/rpl/r/rpl_manual_change_index_file.result 2010-11-17 09:55:31
> +0000
> @@ -23,3 +23,49 @@ t2
> t3
> t4
> DROP TABLE t1, t2, t3, t4;
> +
> +# BUG#21437 server_errno=29 error message flood mysqld error log
> +# --------------------------------------------------------------------------
> +# This test verifies if the ER_MASTER_FATAL_ERROR_READING_BINLOG which
> +# insteads of EE_FILENOTFOUND error is sent to slave, so that the slave
> +# I/O thread stops immediately.
> +stop slave;
> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> +reset master;
> +reset slave;
> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> +start slave;
> +[ on master ]
> +call mtr.add_suppression("Got fatal error 1236 from master when reading data from
> binary log:");
> +CREATE TABLE t1(c1 int);
> +DROP TABLE t1;
> +FLUSH LOGS;
> +FLUSH LOGS;
> +CREATE TABLE t2(c1 int);
> +DROP TABLE t2;
> +[ on slave ]
> +include/stop_slave.inc
> +[ on master ]
> +# Remove master-bin.000001 and master-bin.000002 from index file.
> +PURGE MASTER LOGS TO 'master-bin.000003';
> +# Re-insert master-bin.000002 into index file manually.
> +# PURGE is used to update IO_CACHE of index file. After this statement,
> +# master knows that master-bin.000002 is in index file.
> +PURGE MASTER LOGS TO 'master-bin.000002';
> +[ on slave ]
> +CHANGE MASTER TO master_host='127.0.0.1', master_log_file='master-bin.000002';
> +START SLAVE IO_THREAD;
> +# Instead of EE_FILENOTFOUND, ER_MASTER_FATAL_ERROR_READING_BINLOG and the
> +# specific information are sent to slave.
> +Last_IO_Error = Got fatal error 1236 from master when reading data from binary log:
> 'Could not open log file'
> +CHANGE MASTER TO master_host='127.0.0.1', master_log_file='master-bin.000003';
> +include/start_slave.inc
> +[ on master ]
> +# Restore the correct index file.
> +FLUSH LOGS;
> +PURGE MASTER LOGS TO 'master-bin.000004';
> +CREATE TABLE t1(c1 INT);
> +[ on slave ]
> +SELECT * FROM t1;
> +c1
> +DROP TABLE t1;
>
> === modified file 'mysql-test/suite/rpl/t/rpl_manual_change_index_file.test'
> --- a/mysql-test/suite/rpl/t/rpl_manual_change_index_file.test 2010-04-21 17:22:00
> +0000
> +++ b/mysql-test/suite/rpl/t/rpl_manual_change_index_file.test 2010-11-17 09:55:31
> +0000
> @@ -104,4 +104,101 @@ SHOW TABLES;
>
> connection master;
> DROP TABLE t1, t2, t3, t4;
> -source include/master-slave-end.inc;
> +sync_slave_with_master;
> +
> +--echo
> +--echo # BUG#21437 server_errno=29 error message flood mysqld error log
> +--echo # --------------------------------------------------------------------------
> +--echo # This test verifies if the ER_MASTER_FATAL_ERROR_READING_BINLOG which
> +--echo # insteads of EE_FILENOTFOUND error is sent to slave, so that the slave
> +--echo # I/O thread stops immediately.
> +
> +source include/master-slave-reset.inc;
> +--echo [ on master ]
> +connection master;
> +
> +let $MASTER_DATADIR= `SELECT @@DATADIR`;
> +
> +call mtr.add_suppression("Got fatal error 1236 from master when reading data from
> binary log:");
> +CREATE TABLE t1(c1 int);
> +DROP TABLE t1;
> +
> +FLUSH LOGS;
> +FLUSH LOGS;
> +
> +CREATE TABLE t2(c1 int);
> +DROP TABLE t2;
> +sync_slave_with_master;
> +
> +--echo [ on slave ]
> +source include/stop_slave.inc;
> +
> +--echo [ on master ]
> +connection master;
> +--echo # Remove master-bin.000001 and master-bin.000002 from index file.
> +PURGE MASTER LOGS TO 'master-bin.000003';
> +
> +--echo # Re-insert master-bin.000002 into index file manually.
> +if (`SELECT CONVERT(@@VERSION_COMPILE_OS USING latin1) NOT IN ('Win32', 'Win64',
> 'Windows')`)
> +{
> +append_file $MASTER_DATADIR/master-bin.index;
> +./master-bin.000002
> +EOF
> +sleep 0.00000001;
> +}
> +if (`SELECT CONVERT(@@VERSION_COMPILE_OS USING latin1) IN ('Win32', 'Win64',
> 'Windows')`)
> +{
> +append_file $MASTER_DATADIR/master-bin.index;
> +.\master-bin.000002
> +EOF
> +sleep 0.00000001;
>
It's duplicate. (NOT IN || IN) != ALL?
> +}
>
> +--echo # PURGE is used to update IO_CACHE of index file. After this statement,
> +--echo # master knows that master-bin.000002 is in index file.
> +PURGE MASTER LOGS TO 'master-bin.000002';
> +
> +--echo [ on slave ]
> +connection slave;
> +CHANGE MASTER TO master_host='127.0.0.1', master_log_file='master-bin.000002';
> +START SLAVE IO_THREAD;
> +
> +--echo # Instead of EE_FILENOTFOUND, ER_MASTER_FATAL_ERROR_READING_BINLOG and the
> +--echo # specific information are sent to slave.
> +let $slave_io_errno= 1236;
> +let $show_slave_io_error= 1;
> +source include/wait_for_slave_io_error.inc;
> +
> +CHANGE MASTER TO master_host='127.0.0.1', master_log_file='master-bin.000003';
> +source include/start_slave.inc;
> +
> +
> +--echo [ on master ]
> +connection master;
> +--echo # Restore the correct index file.
> +let $file= $MASTER_DATADIR/master-bin.index;
> +source include/truncate_file.inc;
> +if (`SELECT CONVERT(@@VERSION_COMPILE_OS USING latin1) NOT IN ('Win32', 'Win64',
> 'Windows')`)
> +{
> +append_file $MASTER_DATADIR/master-bin.index;
> +./master-bin.000003
> +EOF
> +sleep 0.00000001;
> +}
> +if (`SELECT CONVERT(@@VERSION_COMPILE_OS USING latin1) IN ('Win32', 'Win64',
> 'Windows')`)
> +{
> +append_file $MASTER_DATADIR/master-bin.index;
> +.\master-bin.000003
> +EOF
> +sleep 0.00000001;
>
See above.
> +}
> +FLUSH LOGS;
> +PURGE MASTER LOGS TO 'master-bin.000004';
> +
> +CREATE TABLE t1(c1 INT);
> +sync_slave_with_master;
> +--echo [ on slave ]
> +SELECT * FROM t1;
> +
> +connection master;
> +DROP TABLE t1;
> +source include/master-slave-end.inc;
> \ No newline at end of file
>
> === modified file 'sql/rpl_master.cc'
> --- a/sql/rpl_master.cc 2010-09-28 15:17:29 +0000
> +++ b/sql/rpl_master.cc 2010-11-17 09:55:31 +0000
> @@ -1069,6 +1069,15 @@ err:
> mysql_file_close(file, MYF(MY_WME));
> thd->variables.max_allowed_packet= old_max_allowed_packet;
>
> + /*
> + thd->stmt_da will not accept any other error after an error has been
> assigned
> + to it. A low level error(eg. EEEE_FILENOTFOUND) has sometimes been set into
> + thd->main_da before a high level error is set. thd->main_da should be
> + cleaned before the high level error is set into main_da and then is sent to
> + slave.
> + */
> + thd->stmt_da->reset_diagnostics_area();
> +
> my_message(my_errno, errmsg, MYF(0));
> DBUG_VOID_RETURN;
> }
>
>
>
>
>
>