Hi Alfranio,
Nice work, thanks for your work!
I have no comment on the code, but I think the test need to provide more
test cases. Currently, only the error ER_LOCK_WAIT_TIMEOUT is tested. In
general, I think each of the errors in function concurrency_error_code()
should have a test case for it.
Alfranio Correia wrote:
> #At
> file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-44581/mysql-5.1-bugteam/
> based on revid:davi.arnaut@stripped
>
> 2936 Alfranio Correia 2009-06-12
> BUG#44581 Slave stops when transaction with non-transactional table gets lock
> wait timeout
>
> In STMT and MIXED modes, when both non-transactional and transactional tables
> are changed on behalf of a transaction, upon commit/rollback its statements
> must
> be written to the binlog. On the master, a failed statement that updates both
> non-transactional and transactional tables are annotated with the error number
> and also written to the binlog. On the slave, while applying such statements,
> it is expected the same failure.
>
> Unfortunately, statements that fail due to concurrency issues (e.g. deadlocks,
> timeouts) are logged in the same way causing the slave to stop as the
> statements
> are applied sequentially by the SQL Thread. To fix this bug, we automatically
> ignore concurrency failures on the slave.
>
> modified:
> sql/log_event.cc
> sql/slave.cc
> sql/slave.h
> === modified file 'sql/log_event.cc'
> --- a/sql/log_event.cc 2009-06-09 16:44:26 +0000
> +++ b/sql/log_event.cc 2009-06-12 00:07:18 +0000
> @@ -369,6 +369,40 @@ int convert_handler_error(int error, THD
> return (actual_error);
> }
>
> +inline bool concurrency_error_code(int error)
> +{
> + switch (error)
> + {
> + case ER_CANT_LOCK:
> + case ER_FILE_USED:
> + case ER_DELAYED_CANT_CHANGE_LOCK:
> + case ER_DELAYED_INSERT_TABLE_LOCKED:
> + case ER_TABLE_NOT_LOCKED_FOR_WRITE:
> + case ER_LOCK_WAIT_TIMEOUT:
> + case ER_LOCK_OR_ACTIVE_TRANSACTION:
> + case ER_LOCK_DEADLOCK:
> + case ER_XA_RBDEADLOCK:
> + return TRUE;
> + default:
> + return (FALSE);
> + }
> +}
> +
> +inline bool check_expected_error(int expected_error)
> +{
> + switch (expected_error)
> + {
> + case ER_NET_READ_ERROR:
> + case ER_NET_ERROR_ON_WRITE:
> + case ER_QUERY_INTERRUPTED:
> + case ER_SERVER_SHUTDOWN:
> + case ER_NEW_ABORTING_CONNECTION:
> + return(TRUE);
> + default:
> + return(FALSE);
> + }
> +}
> +
> /*
> pretty_print_str()
> */
> @@ -3006,7 +3040,7 @@ int Query_log_event::do_apply_event(Rela
> DBUG_PRINT("query",("%s",thd->query));
>
> if (ignored_error_code((expected_error= error_code)) ||
> - !check_expected_error(thd,rli,expected_error))
> + !check_expected_error(expected_error))
> {
> if (flags2_inited)
> /*
> @@ -3138,8 +3172,8 @@ compare_errors:
> actual_error= thd->is_error() ? thd->main_da.sql_errno() : 0;
> DBUG_PRINT("info",("expected_error: %d sql_errno: %d",
> expected_error, actual_error));
> - if ((expected_error != actual_error) &&
> - expected_error &&
> + if ((expected_error && expected_error != actual_error &&
> + !concurrency_error_code(expected_error)) &&
> !ignored_error_code(actual_error) &&
> !ignored_error_code(expected_error))
> {
> @@ -3158,7 +3192,8 @@ Default database: '%s'. Query: '%s'",
> /*
> If we get the same error code as expected, or they should be ignored.
> */
> - else if (expected_error == actual_error ||
> + else if ((expected_error == actual_error &&
> + !concurrency_error_code(expected_error)) ||
> ignored_error_code(actual_error))
> {
> DBUG_PRINT("info",("error ignored"));
>
> === modified file 'sql/slave.cc'
> --- a/sql/slave.cc 2009-06-09 16:44:26 +0000
> +++ b/sql/slave.cc 2009-06-12 00:07:18 +0000
> @@ -1863,25 +1863,6 @@ static ulong read_event(MYSQL* mysql, Ma
> DBUG_RETURN(len - 1);
> }
>
> -
> -int check_expected_error(THD* thd, Relay_log_info const *rli,
> - int expected_error)
> -{
> - DBUG_ENTER("check_expected_error");
> -
> - switch (expected_error) {
> - case ER_NET_READ_ERROR:
> - case ER_NET_ERROR_ON_WRITE:
> - case ER_QUERY_INTERRUPTED:
> - case ER_SERVER_SHUTDOWN:
> - case ER_NEW_ABORTING_CONNECTION:
> - DBUG_RETURN(1);
> - default:
> - DBUG_RETURN(0);
> - }
> -}
> -
> -
> /*
> Check if the current error is of temporary nature of not.
> Some errors are temporary in nature, such as
>
> === modified file 'sql/slave.h'
> --- a/sql/slave.h 2009-04-30 13:17:46 +0000
> +++ b/sql/slave.h 2009-06-12 00:07:18 +0000
> @@ -171,7 +171,6 @@ bool rpl_master_has_bug(const Relay_log_
> bool rpl_master_erroneous_autoinc(THD* thd);
>
> const char *print_slave_db_safe(const char *db);
> -int check_expected_error(THD* thd, Relay_log_info const *rli, int error_code);
> void skip_load_data_infile(NET* net);
>
> void end_slave(); /* clean up */
>