From: Andrei Elkin Date: September 4 2009 1:43pm Subject: Re: bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3069) Bug#42914 List-Archive: http://lists.mysql.com/commits/82472 Message-Id: <87pra63jw2.fsf@mysql1000.dsl.inet.fi> MIME-Version: 1.0 Content-Type: text/plain; CHARSET=US-ASCII Content-Transfer-Encoding: 7BIT Li-Bing, hello. > #At file:///home/anders/work/bzrwork/bug42914/mysql-5.1-bugteam/ based on revid:davi.arnaut@stripped > > 3069 Li-Bing.Song@stripped 2009-08-25 > Bug #42914 Log event that larger than max_allowed_packet results in stop of slave I/O thread, But there is no Last_IO_Error reported. > > On master, if replicated event larger than max_allowed_packet, > master sends an error message ER_MASTER_FATAL_ERROR_READING_BINLOG to slave. > This message results in stop of slave I/O thread. > On slave, slave I/O thread will stop when receiveing a packet larger than max_allowed_packet. > In both the cases, there is no Last_IO_Error reported. > ok, almost pretty clear. Just one point, we agreed on repliation meeting that the slave will show the exact reason of stopping. Further, I (actually we, both developers in consencus) suggest how to improve implementation of this idea. > This patch adds code to report Last_IO_Error befor slave I/O thread stopping. > There is another case which results in stop of slave I/O thread, but dose not report a Last_IO_Error. > This patch fixs it too. > > added: > mysql-test/suite/rpl/r/rpl_packet_row.result > mysql-test/suite/rpl/t/rpl_packet_row.cnf > mysql-test/suite/rpl/t/rpl_packet_row.test > modified: > mysql-test/suite/rpl/r/rpl_packet.result > mysql-test/suite/rpl/t/rpl_packet.test > sql/slave.cc > === modified file 'mysql-test/suite/rpl/r/rpl_packet.result' > --- a/mysql-test/suite/rpl/r/rpl_packet.result 2009-01-23 12:22:05 +0000 > +++ b/mysql-test/suite/rpl/r/rpl_packet.result 2009-08-25 14:17:34 +0000 > @@ -32,6 +32,9 @@ include/start_slave.inc > CREATE TABLE `t1` (`f1` LONGTEXT) ENGINE=MyISAM; > INSERT INTO `t1`(`f1`) VALUES ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa! aa! > aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa! aa! > aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa! > aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa2048'); > Slave_IO_Running = No (expect No) > +SELECT "Got a packet bigger than 'max_allowed_packet' bytes" AS Last_IO_Error; > +Last_IO_Error > +Got a packet bigger than 'max_allowed_packet' bytes The improvement is in that to display a clarifying clause why the slave has stopped, like in the above ^. > ==== clean up ==== > DROP TABLE t1; > SET @@global.max_allowed_packet= 1024; > > === added file 'mysql-test/suite/rpl/r/rpl_packet_row.result' > --- a/mysql-test/suite/rpl/r/rpl_packet_row.result 1970-01-01 00:00:00 +0000 > +++ b/mysql-test/suite/rpl/r/rpl_packet_row.result 2009-08-25 14:17:34 +0000 > @@ -0,0 +1,19 @@ > +stop slave; > +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; > +reset master; > +reset slave; > +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; > +start slave; > +DROP TABLE IF EXISTS t1; > +CREATE TABLE t1 (f1 int PRIMARY KEY, f2 LONGTEXT, f3 LONGTEXT) ENGINE=MyISAM; > +SELECT @@global.max_allowed_packet; > +@@global.max_allowed_packet > +2048 > +INSERT INTO t1(f1, f2, f3) VALUES(1, REPEAT('a', @@global.max_allowed_packet), REPEAT('b', @@global.max_allowed_packet)); > +Slave_IO_Running = No (expect No) > +SELECT "Got fatal error 1236: 'error reading log entry' from master when reading data from binary log" AS Last_IO_Error; > +Last_IO_Error > +Got fatal error 1236: 'error reading log entry' from master when > reading data from binary log Compare the above `Got a packet bigger than' with the current to see the latter is definetely opaque. It's rather easy to fix that. Plz read on. > +==== clean up ==== > +DROP TABLE t1; > +DROP TABLE t1; > > === modified file 'mysql-test/suite/rpl/t/rpl_packet.test' > --- a/mysql-test/suite/rpl/t/rpl_packet.test 2009-01-23 12:22:05 +0000 > +++ b/mysql-test/suite/rpl/t/rpl_packet.test 2009-08-25 14:17:34 +0000 > @@ -86,6 +86,12 @@ connection slave; > --source include/wait_for_slave_io_to_stop.inc > let $slave_io_running= query_get_value(SHOW SLAVE STATUS, Slave_IO_Running, 1); > --echo Slave_IO_Running = $slave_io_running (expect No) > +# > +# Bug42914: The slave I/O thread must stop after trying to read the above > +# event, but there is no Last_IO_Error reported. > +# > +let $last_io_error= query_get_value(SHOW SLAVE STATUS, Last_IO_Error, 1); > +eval SELECT "$last_io_error" AS Last_IO_Error; > > --echo ==== clean up ==== > connection master; > > === added file 'mysql-test/suite/rpl/t/rpl_packet_row.cnf' > --- a/mysql-test/suite/rpl/t/rpl_packet_row.cnf 1970-01-01 00:00:00 +0000 > +++ b/mysql-test/suite/rpl/t/rpl_packet_row.cnf 2009-08-25 14:17:34 +0000 > @@ -0,0 +1,5 @@ > +!include ../my.cnf > +[mysqld.1] > +max-allowed-packet=2048 > +[mysqld.2] > +max-allowed-packet=2048 > Btw, the new test file should be executable with any format as our problem is format-value-agnostic. Could you please relocate the content of the new file into the existing rpl_packet.test? > === added file 'mysql-test/suite/rpl/t/rpl_packet_row.test' > --- a/mysql-test/suite/rpl/t/rpl_packet_row.test 1970-01-01 00:00:00 +0000 > +++ b/mysql-test/suite/rpl/t/rpl_packet_row.test 2009-08-25 14:17:34 +0000 > @@ -0,0 +1,33 @@ > +# > +# Bug#42914: On master, if replicated event larger than max_allowed_packet, > +# master sends an error message ER_MASTER_FATAL_ERROR_READING_BINLOG to slave. > +# This message results in stop of slave I/O thread. But There is no > +# Last_IO_Error report. > +# > + > +source include/master-slave.inc; > +source include/have_binlog_format_row.inc; > +disable_warnings; > +DROP TABLE IF EXISTS t1; > +enable_warnings; > +CREATE TABLE t1 (f1 int PRIMARY KEY, f2 LONGTEXT, f3 LONGTEXT) ENGINE=MyISAM; > + > +SELECT @@global.max_allowed_packet; > +INSERT INTO t1(f1, f2, f3) VALUES(1, REPEAT('a', @@global.max_allowed_packet), REPEAT('b', @@global.max_allowed_packet)); > + > +connection slave; > +# The slave I/O thread must stop after receiving > +# ER_MASTER_FATAL_ERROR_READING_BINLOG error message from master. > +--source include/wait_for_slave_io_to_stop.inc > +let $slave_io_running= query_get_value(SHOW SLAVE STATUS, Slave_IO_Running, 1); > +--echo Slave_IO_Running = $slave_io_running (expect No) > +let $last_io_error= query_get_value(SHOW SLAVE STATUS, Last_IO_Error, 1); > +eval SELECT "$last_io_error" AS Last_IO_Error; > + > +--echo ==== clean up ==== > +connection master; > +DROP TABLE t1; > +# slave is stopped > +connection slave; > +DROP TABLE t1; > +# End of tests > > === modified file 'sql/slave.cc' > --- a/sql/slave.cc 2009-08-13 20:45:01 +0000 > +++ b/sql/slave.cc 2009-08-25 14:17:34 +0000 > @@ -2681,15 +2681,22 @@ Log entry on master is longer than max_a > slave. If the entry is correct, restart the server with a higher value of \ > max_allowed_packet", > thd->variables.max_allowed_packet); > + mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE, > + ER(ER_NET_PACKET_TOO_LARGE)); very good! > goto err; > case ER_MASTER_FATAL_ERROR_READING_BINLOG: > - sql_print_error(ER(mysql_error_number), mysql_error_number, > - mysql_error(mysql)); > + mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG, > + ER(ER_MASTER_FATAL_ERROR_READING_BINLOG), > + mysql_error_number, mysql_error(mysql)); But here we don't say what exactly happened on the master. And we could. Look inside mysql_binlog_send() to find test_for_non_eof_log_read_errors() which internally qualifies the exact reason. That's an omission of the existing implementation that the error message corresponding to `ER_MASTER_FATAL_ERROR_READING_BINLOG' is always the same. So we suggest to fix that. Namely, 1. To extend the format line for ER_MASTER_FATAL_ERROR_READING_BINLOG with %s to hold clarifying clause. E.g ER_MASTER_FATAL_ERROR_READING_BINLOG eng "Got fatal error %d: '%-.128s' from master when reading data from binary log" to append "specific reason: %s". 2. Append a specific reason string arg to the error message generator e.g my_error(ER_MASTER_FATAL_ERROR_READING_BINLOG..., spec_reason). Check with test_for_non_eof_log_read_errors() as well as with lines of mysql_binlog_send() like errmsg = "Binary log is not open"; my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; what kind of specific reason. After making 1,2 the master shall see some fully qualified error message. > goto err; > case EE_OUTOFMEMORY: > case ER_OUTOFMEMORY: > sql_print_error("\ > Stopping slave I/O thread due to out-of-memory error from master"); > + > + mi->report(ERROR_LEVEL, ER_OUTOFMEMORY, > + ER(ER_OUTOFMEMORY), "\ > +Stopping slave I/O thread due to out-of-memory error from master"); good. > goto err; > } > if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, > cheers, Andrei