List:Commits« Previous MessageNext Message »
From:Alfranio Correia Date:March 28 2011 9:31am
Subject:Re: bzr commit into mysql-trunk branch (andrei.elkin:3311) Bug#11748510
View as plain text  
Hi Andrei,

Great work !!!
Thanks for addressing my comments.
Patch approved.

I have just one suggestion that you may do if you agree on it.
Please, make "has_temporary_error" a static member function in
Slave_reporting_capability.

Cheers.


On 03/27/2011 07:16 PM, Andrei Elkin wrote:
> #At file:///home/andrei/MySQL/BZR/2a-23May/FIXES/bug11748510_36524_less_drama/ based
> on revid:magne.mahre@stripped
> 
>  3311 Andrei Elkin	2011-03-27
>       bug#11748510 incident of deadlock on slave is overdramatized
>       
>       In cases of temporary errors in replication event execution (deadlock,
>       timeout) the error log gained an overreacting error message whereas
>       just a warning would be fine.
>       
>       Fixed with checking of the temporary status of the error inside
>       Slave_reporting_capability::report()  to demote the error to the
>       warning level if needed. 
>       The warning gets converted into the error
>       whenever number of successive attempts to re-apply the event(s) gets
>       equal to @@global.slave_trans_retries.
>      @ mysql-test/extra/rpl_tests/rpl_deadlock.test
>         regression test for bug#11748510 is added.
>      @ mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
>         results get updated.
>      @ sql/rpl_reporting.cc
>         implementing error to warning demotion and refining stderr report func:s
> invocation
>         with prior checking of global_system_variables.log_warnings.
>      @ sql/rpl_slave.cc
>         has_temporary_error() is extended with a new argument and a new rule
>         to return 0 if there has been the fatal error flag raised.
>         A minor refinement is done to not regard an error as temporary in case
>         there is no retry.
>      @ sql/rpl_slave.h
>         providing has_temporary_error() interface for rpl_reporting.cc.
> 
>     modified:
>       mysql-test/extra/rpl_tests/rpl_deadlock.test
>       mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
>       sql/rpl_reporting.cc
>       sql/rpl_slave.cc
>       sql/rpl_slave.h
> === modified file 'mysql-test/extra/rpl_tests/rpl_deadlock.test'
> --- a/mysql-test/extra/rpl_tests/rpl_deadlock.test	2010-12-19 17:07:28 +0000
> +++ b/mysql-test/extra/rpl_tests/rpl_deadlock.test	2011-03-27 18:16:32 +0000
> @@ -123,6 +123,57 @@ SELECT * FROM t3;
>  source include/check_slave_is_running.inc;
>  --echo
>  
> +#
> +# bug#11748510/36524 incident of deadlock on slave is overdramatized
> +#
> +# Observe that the slave stopped when the number of transation retries 
> +# exceeds @@global.slave_transaction_retries
> +#
> +connection master;
> +
> +--echo *** Test the deadlock warning to be escalated into the error ***
> +
> +delete from t1;
> +delete from t2;
> +delete from t3;
> +
> +sync_slave_with_master;
> +
> +# make sure slave's unilateral row gone as well
> +delete from t1;
> +delete from t2;
> +delete from t3;
> +
> +# the first attempt to run a deadlock scenario of p 1) leads to the error
> +set @save.slave_transaction_retries= @@global.slave_transaction_retries;
> +set @@global.slave_transaction_retries= 0;
> +source include/stop_slave.inc;
> +
> +connection master;
> +
> +BEGIN;
> +INSERT INTO t1 VALUES (1);
> +# We make a long transaction here
> +INSERT INTO t2 VALUES (2), (2), (2), (2), (2), (2), (2), (2), (2), (2);
> +INSERT INTO t3 VALUES (3);
> +COMMIT;
> +
> +connection slave;
> +BEGIN;
> +SELECT count(*) as zero FROM t1 FOR UPDATE;
> +
> +start slave;
> +
> +--echo *** Now the slave must be stopped due to timeout ***
> +
> +let $slave_sql_errno= 1205; # ER_LOCK_TIMEOUT
> +let $show_slave_sql_error= 0;
> +source include/wait_for_slave_sql_error.inc;
> +
> +rollback;
> +
> +set @@global.slave_transaction_retries= @save.slave_transaction_retries;
> +source include/start_slave.inc;
>  # Clean up
>  --echo *** Clean up ***
>  connection master;
> 
> === modified file 'mysql-test/suite/rpl/r/rpl_deadlock_innodb.result'
> --- a/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result	2010-12-19 17:22:30 +0000
> +++ b/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result	2011-03-27 18:16:32 +0000
> @@ -103,6 +103,31 @@ a
>  3
>  include/check_slave_is_running.inc
>  
> +*** Test the deadlock warning to be escalated into the error ***
> +delete from t1;
> +delete from t2;
> +delete from t3;
> +delete from t1;
> +delete from t2;
> +delete from t3;
> +set @save.slave_transaction_retries= @@global.slave_transaction_retries;
> +set @@global.slave_transaction_retries= 0;
> +include/stop_slave.inc
> +BEGIN;
> +INSERT INTO t1 VALUES (1);
> +INSERT INTO t2 VALUES (2), (2), (2), (2), (2), (2), (2), (2), (2), (2);
> +INSERT INTO t3 VALUES (3);
> +COMMIT;
> +BEGIN;
> +SELECT count(*) as zero FROM t1 FOR UPDATE;
> +zero
> +0
> +start slave;
> +*** Now the slave must be stopped due to timeout ***
> +include/wait_for_slave_sql_error.inc [errno=1205]
> +rollback;
> +set @@global.slave_transaction_retries= @save.slave_transaction_retries;
> +include/start_slave.inc
>  *** Clean up ***
>  DROP TABLE t1,t2,t3;
>  SET global max_relay_log_size= @my_max_relay_log_size;
> 
> === modified file 'sql/rpl_reporting.cc'
> --- a/sql/rpl_reporting.cc	2010-08-05 17:45:25 +0000
> +++ b/sql/rpl_reporting.cc	2011-03-27 18:16:32 +0000
> @@ -17,6 +17,7 @@
>  #include "rpl_reporting.h"
>  #include "log.h" // sql_print_error, sql_print_warning,
>                   // sql_print_information
> +#include "rpl_slave.h"
>  
>  Slave_reporting_capability::Slave_reporting_capability(char const *thread_name)
>    : m_thread_name(thread_name)
> @@ -29,11 +30,17 @@ void
>  Slave_reporting_capability::report(loglevel level, int err_code,
>                                     const char *msg, ...) const
>  {
> +#if !defined(EMBEDDED_LIBRARY)
> +  THD *thd= current_thd;
>    void (*report_function)(const char *, ...);
>    char buff[MAX_SLAVE_ERRMSG];
>    char *pbuff= buff;
>    uint pbuffsize= sizeof(buff);
>    va_list args;
> +
> +  if (level == ERROR_LEVEL && has_temporary_error(thd, err_code))
> +    level= WARNING_LEVEL;
> +
>    va_start(args, msg);
>  
>    mysql_mutex_lock(&err_lock);
> @@ -51,10 +58,12 @@ Slave_reporting_capability::report(logle
>      report_function= sql_print_error;
>      break;
>    case WARNING_LEVEL:
> -    report_function= sql_print_warning;
> +    report_function= global_system_variables.log_warnings?
> +      sql_print_warning : NULL;
>      break;
>    case INFORMATION_LEVEL:
> -    report_function= sql_print_information;
> +    report_function= global_system_variables.log_warnings?
> +      sql_print_information : NULL;
>      break;
>    default:
>      DBUG_ASSERT(0);                            // should not come here
> @@ -71,6 +80,7 @@ Slave_reporting_capability::report(logle
>                    m_thread_name, pbuff,
>                    (pbuff[0] && *(strend(pbuff)-1) == '.') ? "" : ",",
>                    err_code);
> +#endif  
>  }
>  
>  Slave_reporting_capability::~Slave_reporting_capability()
> 
> === modified file 'sql/rpl_slave.cc'
> --- a/sql/rpl_slave.cc	2011-03-17 13:20:36 +0000
> +++ b/sql/rpl_slave.cc	2011-03-27 18:16:32 +0000
> @@ -2514,15 +2514,23 @@ static ulong read_event(MYSQL* mysql, Ma
>    DBUG_RETURN(len - 1);
>  }
>  
> -/*
> -  Check if the current error is of temporary nature of not.
> +/**
> +  Check if the current error is of temporary nature or not.
>    Some errors are temporary in nature, such as
>    ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT.  Ndb also signals
>    that the error is temporary by pushing a warning with the error code
>    ER_GET_TEMPORARY_ERRMSG, if the originating error is temporary.
> +
> +  @param      thd  a THD instance, typically of the slave SQL thread's.
> +  @error_arg  the error code for assessment. 
> +              defaults to zero which makes the function check the top
> +              of the reported errors stack.
> +
> +  @return 1 as the positive and 0 as the negative verdict
>  */
> -static int has_temporary_error(THD *thd)
> +int has_temporary_error(THD *thd, uint error_arg)
>  {
> +  uint error;
>    DBUG_ENTER("has_temporary_error");
>  
>    DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
> @@ -2533,20 +2541,25 @@ static int has_temporary_error(THD *thd)
>                    });
>  
>    /*
> -    If there is no message in THD, we can't say if it's a temporary
> +    The state of the slave thread can't be regarded as
> +    experiencing a temporary failure in cases of @c is_slave_error was set TRUE,
> +    or if there is no message in THD, we can't say if it's a temporary
>      error or not. This is currently the case for Incident_log_event,
> -    which sets no message. Return FALSE.
> +    which sets no message.
>    */
> -  if (!thd->is_error())
> +  if (thd->is_fatal_error || !thd->is_error())
>      DBUG_RETURN(0);
>  
> +  error= (error_arg == 0)? thd->stmt_da->sql_errno() : error_arg;
> +
>    /*
>      Temporary error codes:
>      currently, InnoDB deadlock detected by InnoDB or lock
> -    wait timeout (innodb_lock_wait_timeout exceeded
> +    wait timeout (innodb_lock_wait_timeout exceeded).
> +    Notice, the temporary error requires slave_trans_retries != 0)
>    */
> -  if (thd->stmt_da->sql_errno() == ER_LOCK_DEADLOCK ||
> -      thd->stmt_da->sql_errno() == ER_LOCK_WAIT_TIMEOUT)
> +  if (slave_trans_retries &&
> +      (error == ER_LOCK_DEADLOCK || error == ER_LOCK_WAIT_TIMEOUT))
>      DBUG_RETURN(1);
>  
>  #ifdef HAVE_NDB_BINLOG
> @@ -3007,10 +3020,13 @@ static int exec_relay_log_event(THD* thd
>            }
>          }
>          else
> -          sql_print_error("Slave SQL thread retried transaction %lu time(s) "
> -                          "in vain, giving up. Consider raising the value of "
> -                          "the slave_transaction_retries variable.",
> -                          slave_trans_retries);
> +        {
> +          thd->is_fatal_error= 1;
> +          rli->report(ERROR_LEVEL, thd->stmt_da->sql_errno(),
> +                      "Slave SQL thread retried transaction %lu time(s) "
> +                      "in vain, giving up. Consider raising the value of "
> +                      "the slave_transaction_retries variable.",
> rli->trans_retries);
> +        }
>        }
>        else if ((exec_res && !temp_err) ||
>                 (opt_using_transactions &&
> 
> === modified file 'sql/rpl_slave.h'
> --- a/sql/rpl_slave.h	2011-02-16 17:13:30 +0000
> +++ b/sql/rpl_slave.h	2011-03-27 18:16:32 +0000
> @@ -213,6 +213,7 @@ void set_slave_thread_options(THD* thd);
>  void set_slave_thread_default_charset(THD *thd, Relay_log_info const *rli);
>  int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli);
>  int rotate_relay_log(Master_info* mi);
> +int has_temporary_error(THD *thd, uint err= 0);
>  
>  pthread_handler_t handle_slave_io(void *arg);
>  pthread_handler_t handle_slave_sql(void *arg);
> 
> 
> 
> 
> 

Thread
bzr commit into mysql-trunk branch (andrei.elkin:3311) Bug#11748510Andrei Elkin27 Mar
  • Re: bzr commit into mysql-trunk branch (andrei.elkin:3311) Bug#11748510Alfranio Correia28 Mar