Hi Andrei!
Thanks for fixing this.
As we discussed on IRC, please replace 'while (!io_slave_killed)' by 'if
(!io_slave_killed)' in the outer loop of handle_slave_io.
There is one place where errors are cleared redundantly. I would also
like more explicit comments in the test case, and a test case for
Last_SQL_Err*. See below.
/Sven
Andrei Elkin wrote:
> Below is the list of changes that have just been committed into a local
> 6.0 repository of aelkin. When aelkin does a push these changes
> will be propagated to the main repository and, within 24 hours after the
> push, to the public repository.
> For information on how to access the public repository
> see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
>
> ChangeSet@stripped, 2008-05-01 16:26:05+03:00, aelkin@stripped +5 -0
> Bug #34654 RESET SLAVE and START SLAVE does not clear Last_IO_Errno
>
> There were no resetting of the io thread's error status at reset_slave() and
> at the beginning of handle_slave_io() that corresponds to start slave.
>
> Fixed with deploying clean-up at necessary places of the code.
>
> mysql-test/extra/rpl_tests/rpl_reset_slave.test@stripped, 2008-05-01 16:26:01+03:00,
> aelkin@stripped +51 -0
> adding regression test for bug#34654 that tests clearing the IO error status
> for start slave and reset slave.
>
> mysql-test/suite/rpl/r/rpl_row_reset_slave.result@stripped, 2008-05-01 16:26:01+03:00,
> aelkin@stripped +21 -0
> results changed
>
> mysql-test/suite/rpl/r/rpl_stm_reset_slave.result@stripped, 2008-05-01 16:26:01+03:00,
> aelkin@stripped +21 -0
> results changed
>
> sql/slave.cc@stripped, 2008-05-01 16:26:01+03:00, aelkin@stripped +3 -0
> explicitly clearing the last mi's error at
>
> 1. entering handle_slave_io
> 2. getting to the read loop after passing successfully
> connecting, checking master version etc, registering and
> requesting for the dump
> 3. right after successful connecting
>
> sql/sql_repl.cc@stripped, 2008-05-01 16:26:01+03:00, aelkin@stripped +1
> -0
> reset slave now resets the io thread error.
>
> diff -Nrup a/mysql-test/extra/rpl_tests/rpl_reset_slave.test
> b/mysql-test/extra/rpl_tests/rpl_reset_slave.test
> --- a/mysql-test/extra/rpl_tests/rpl_reset_slave.test 2008-02-05 15:29:36 +02:00
> +++ b/mysql-test/extra/rpl_tests/rpl_reset_slave.test 2008-05-01 16:26:01 +03:00
> @@ -42,3 +42,54 @@ reset slave;
> start slave;
> sync_with_master;
> show status like 'slave_open_temp_tables';
> +
> +#
> +#Bug#34654 RESET SLAVE does not clear LAST_IO_Err*
> +#
> +
> +# clearing the status
> +stop slave;
> +reset slave;
> +let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
> +echo *** errno must be zero: $last_io_errno ***;
> +
> +#
> +# verifying start slave resets
> +#
Please be more explicit in the comment describing the purpose of the
test. You can write something like:
# Verify that "START SLAVE" resets Last_IO_Error and Last_IO_Errno.
> +
> +change master to master_user='impossible_user_name';
> +start slave;
> +source include/wait_for_slave_io_to_stop.inc;
> +let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
> +--disable_query_log
> +eval SELECT $last_io_errno > 0 as ONE;
> +--enable_query_log
> +
> +stop slave;
> +change master to master_user='root';
> +start slave;
> +source include/wait_for_slave_to_start.inc;
> +let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
> +let $last_io_error= query_get_value(SHOW SLAVE STATUS, Last_IO_Error, 1);
> +--echo *** last errno must be zero: $last_io_errno ***
> +--echo *** last error must be blank: $last_io_error ***
> +
> +#
> +# verifying reset slave resets
> +#
Please be more explicit in the comment (see above).
> +
> +stop slave;
> +change master to master_user='impossible_user_name';
> +start slave;
> +source include/wait_for_slave_io_to_stop.inc;
> +let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
> +--disable_query_log
> +eval SELECT $last_io_errno > 0 as ONE;
> +--enable_query_log
> +
> +stop slave;
> +reset slave;
> +let $last_io_errno= query_get_value(SHOW SLAVE STATUS, Last_IO_Errno, 1);
> +let $last_io_error= query_get_value(SHOW SLAVE STATUS, Last_IO_Error, 1);
> +--echo *** last errno must be zero: $last_io_errno ***
> +--echo *** last error must be blank: $last_io_error ***
I think the test should also verify that "RESET SLAVE" resets Last_SQL_Err*.
> diff -Nrup a/mysql-test/suite/rpl/r/rpl_row_reset_slave.result
> b/mysql-test/suite/rpl/r/rpl_row_reset_slave.result
> --- a/mysql-test/suite/rpl/r/rpl_row_reset_slave.result 2008-02-05 15:29:36 +02:00
> +++ b/mysql-test/suite/rpl/r/rpl_row_reset_slave.result 2008-05-01 16:26:01 +03:00
> @@ -175,3 +175,24 @@ start slave;
> show status like 'slave_open_temp_tables';
> Variable_name Value
> Slave_open_temp_tables 0
> +stop slave;
> +reset slave;
> +*** errno must be zero: 0 ***
> +change master to master_user='impossible_user_name';
> +start slave;
> +ONE
> +1
> +stop slave;
> +change master to master_user='root';
> +start slave;
> +*** last errno must be zero: 0 ***
> +*** last error must be blank: ***
> +stop slave;
> +change master to master_user='impossible_user_name';
> +start slave;
> +ONE
> +1
> +stop slave;
> +reset slave;
> +*** last errno must be zero: 0 ***
> +*** last error must be blank: ***
> diff -Nrup a/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result
> b/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result
> --- a/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result 2008-02-05 15:29:36 +02:00
> +++ b/mysql-test/suite/rpl/r/rpl_stm_reset_slave.result 2008-05-01 16:26:01 +03:00
> @@ -175,3 +175,24 @@ start slave;
> show status like 'slave_open_temp_tables';
> Variable_name Value
> Slave_open_temp_tables 1
> +stop slave;
> +reset slave;
> +*** errno must be zero: 0 ***
> +change master to master_user='impossible_user_name';
> +start slave;
> +ONE
> +1
> +stop slave;
> +change master to master_user='root';
> +start slave;
> +*** last errno must be zero: 0 ***
> +*** last error must be blank: ***
> +stop slave;
> +change master to master_user='impossible_user_name';
> +start slave;
> +ONE
> +1
> +stop slave;
> +reset slave;
> +*** last errno must be zero: 0 ***
> +*** last error must be blank: ***
> diff -Nrup a/sql/slave.cc b/sql/slave.cc
> --- a/sql/slave.cc 2008-04-01 13:56:42 +03:00
> +++ b/sql/slave.cc 2008-05-01 16:26:01 +03:00
> @@ -2114,6 +2114,7 @@ pthread_handler_t handle_slave_io(void *
>
> pthread_detach_this_thread();
> thd->thread_stack= (char*) &thd; // remember where our stack is
> + mi->clear_error();
> if (init_slave_thread(thd, SLAVE_THD_IO))
> {
> pthread_cond_broadcast(&mi->start_cond);
> @@ -2228,6 +2229,7 @@ requesting master dump") ||
> goto connected;
> });
>
> + mi->clear_error(); // getting into read loop with clear sheet
This place is actually redundant: the error is already cleared at this
point.
The error was cleared in the beginning of handle_slave_io(). If new
errors are reported after that, either the errors will be cleared again
before reaching the read loop, or the read loop will not be reached:
here is the list of all possible errors reported in handle_slave_io()
before the read loop:
- in handle_slave_io() if mysql_init() fails, but then we execute
'goto err'.
- in get_master_version_and_clock(), but then we return from
handle_slave_io().
- in register_slave_on_master(), but only if it fails. In this case,
either 'goto err' is executed or try_to_reconnect() succeeds and clears
the error (actually, try_to_reconnect calls safe_reconnect which calls
connect_to_master which resets the error).
Argh, the reconnect logic of handle_slave_io is so horrible :-( . I'd
like to see this refactored in the future (not in this bug, of course).
> while (!io_slave_killed(thd,mi))
> {
> ulong event_len;
> @@ -3411,6 +3413,7 @@ static int connect_to_master(THD* thd, M
>
> if (!slave_was_killed)
> {
> + mi->clear_error();
> if (reconnect)
> {
> if (!suppress_warnings && global_system_variables.log_warnings)
> diff -Nrup a/sql/sql_repl.cc b/sql/sql_repl.cc
> --- a/sql/sql_repl.cc 2008-03-08 12:14:45 +02:00
> +++ b/sql/sql_repl.cc 2008-05-01 16:26:01 +03:00
> @@ -1114,6 +1114,7 @@ int reset_slave(THD *thd, Master_info* m
> Reset errors (the idea is that we forget about the
> old master).
> */
> + mi->clear_error();
> mi->rli.clear_error();
> mi->rli.clear_until_condition();
>
>
--
Sven Sandberg, Software Engineer
MySQL AB, www.mysql.com