List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:March 27 2011 6:16pm
Subject:bzr commit into mysql-trunk branch (andrei.elkin:3311) Bug#11748510
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/FIXES/bug11748510_36524_less_drama/ based on revid:magne.mahre@stripped

 3311 Andrei Elkin	2011-03-27
      bug#11748510 incident of deadlock on slave is overdramatized
      
      In cases of temporary errors in replication event execution (deadlock,
      timeout) the error log gained an overreacting error message whereas
      just a warning would be fine.
      
      Fixed with checking of the temporary status of the error inside
      Slave_reporting_capability::report()  to demote the error to the
      warning level if needed. 
      The warning gets converted into the error
      whenever number of successive attempts to re-apply the event(s) gets
      equal to @@global.slave_trans_retries.
     @ mysql-test/extra/rpl_tests/rpl_deadlock.test
        regression test for bug#11748510 is added.
     @ mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
        results get updated.
     @ sql/rpl_reporting.cc
        implementing error to warning demotion and refining stderr report func:s invocation
        with prior checking of global_system_variables.log_warnings.
     @ sql/rpl_slave.cc
        has_temporary_error() is extended with a new argument and a new rule
        to return 0 if there has been the fatal error flag raised.
        A minor refinement is done to not regard an error as temporary in case
        there is no retry.
     @ sql/rpl_slave.h
        providing has_temporary_error() interface for rpl_reporting.cc.

    modified:
      mysql-test/extra/rpl_tests/rpl_deadlock.test
      mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
      sql/rpl_reporting.cc
      sql/rpl_slave.cc
      sql/rpl_slave.h
=== modified file 'mysql-test/extra/rpl_tests/rpl_deadlock.test'
--- a/mysql-test/extra/rpl_tests/rpl_deadlock.test	2010-12-19 17:07:28 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_deadlock.test	2011-03-27 18:16:32 +0000
@@ -123,6 +123,57 @@ SELECT * FROM t3;
 source include/check_slave_is_running.inc;
 --echo
 
+#
+# bug#11748510/36524 incident of deadlock on slave is overdramatized
+#
+# Observe that the slave stopped when the number of transation retries 
+# exceeds @@global.slave_transaction_retries
+#
+connection master;
+
+--echo *** Test the deadlock warning to be escalated into the error ***
+
+delete from t1;
+delete from t2;
+delete from t3;
+
+sync_slave_with_master;
+
+# make sure slave's unilateral row gone as well
+delete from t1;
+delete from t2;
+delete from t3;
+
+# the first attempt to run a deadlock scenario of p 1) leads to the error
+set @save.slave_transaction_retries= @@global.slave_transaction_retries;
+set @@global.slave_transaction_retries= 0;
+source include/stop_slave.inc;
+
+connection master;
+
+BEGIN;
+INSERT INTO t1 VALUES (1);
+# We make a long transaction here
+INSERT INTO t2 VALUES (2), (2), (2), (2), (2), (2), (2), (2), (2), (2);
+INSERT INTO t3 VALUES (3);
+COMMIT;
+
+connection slave;
+BEGIN;
+SELECT count(*) as zero FROM t1 FOR UPDATE;
+
+start slave;
+
+--echo *** Now the slave must be stopped due to timeout ***
+
+let $slave_sql_errno= 1205; # ER_LOCK_TIMEOUT
+let $show_slave_sql_error= 0;
+source include/wait_for_slave_sql_error.inc;
+
+rollback;
+
+set @@global.slave_transaction_retries= @save.slave_transaction_retries;
+source include/start_slave.inc;
 # Clean up
 --echo *** Clean up ***
 connection master;

=== modified file 'mysql-test/suite/rpl/r/rpl_deadlock_innodb.result'
--- a/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result	2010-12-19 17:22:30 +0000
+++ b/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result	2011-03-27 18:16:32 +0000
@@ -103,6 +103,31 @@ a
 3
 include/check_slave_is_running.inc
 
+*** Test the deadlock warning to be escalated into the error ***
+delete from t1;
+delete from t2;
+delete from t3;
+delete from t1;
+delete from t2;
+delete from t3;
+set @save.slave_transaction_retries= @@global.slave_transaction_retries;
+set @@global.slave_transaction_retries= 0;
+include/stop_slave.inc
+BEGIN;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (2), (2), (2), (2), (2), (2), (2), (2), (2), (2);
+INSERT INTO t3 VALUES (3);
+COMMIT;
+BEGIN;
+SELECT count(*) as zero FROM t1 FOR UPDATE;
+zero
+0
+start slave;
+*** Now the slave must be stopped due to timeout ***
+include/wait_for_slave_sql_error.inc [errno=1205]
+rollback;
+set @@global.slave_transaction_retries= @save.slave_transaction_retries;
+include/start_slave.inc
 *** Clean up ***
 DROP TABLE t1,t2,t3;
 SET global max_relay_log_size= @my_max_relay_log_size;

=== modified file 'sql/rpl_reporting.cc'
--- a/sql/rpl_reporting.cc	2010-08-05 17:45:25 +0000
+++ b/sql/rpl_reporting.cc	2011-03-27 18:16:32 +0000
@@ -17,6 +17,7 @@
 #include "rpl_reporting.h"
 #include "log.h" // sql_print_error, sql_print_warning,
                  // sql_print_information
+#include "rpl_slave.h"
 
 Slave_reporting_capability::Slave_reporting_capability(char const *thread_name)
   : m_thread_name(thread_name)
@@ -29,11 +30,17 @@ void
 Slave_reporting_capability::report(loglevel level, int err_code,
                                    const char *msg, ...) const
 {
+#if !defined(EMBEDDED_LIBRARY)
+  THD *thd= current_thd;
   void (*report_function)(const char *, ...);
   char buff[MAX_SLAVE_ERRMSG];
   char *pbuff= buff;
   uint pbuffsize= sizeof(buff);
   va_list args;
+
+  if (level == ERROR_LEVEL && has_temporary_error(thd, err_code))
+    level= WARNING_LEVEL;
+
   va_start(args, msg);
 
   mysql_mutex_lock(&err_lock);
@@ -51,10 +58,12 @@ Slave_reporting_capability::report(logle
     report_function= sql_print_error;
     break;
   case WARNING_LEVEL:
-    report_function= sql_print_warning;
+    report_function= global_system_variables.log_warnings?
+      sql_print_warning : NULL;
     break;
   case INFORMATION_LEVEL:
-    report_function= sql_print_information;
+    report_function= global_system_variables.log_warnings?
+      sql_print_information : NULL;
     break;
   default:
     DBUG_ASSERT(0);                            // should not come here
@@ -71,6 +80,7 @@ Slave_reporting_capability::report(logle
                   m_thread_name, pbuff,
                   (pbuff[0] && *(strend(pbuff)-1) == '.') ? "" : ",",
                   err_code);
+#endif  
 }
 
 Slave_reporting_capability::~Slave_reporting_capability()

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2011-03-17 13:20:36 +0000
+++ b/sql/rpl_slave.cc	2011-03-27 18:16:32 +0000
@@ -2514,15 +2514,23 @@ static ulong read_event(MYSQL* mysql, Ma
   DBUG_RETURN(len - 1);
 }
 
-/*
-  Check if the current error is of temporary nature of not.
+/**
+  Check if the current error is of temporary nature or not.
   Some errors are temporary in nature, such as
   ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT.  Ndb also signals
   that the error is temporary by pushing a warning with the error code
   ER_GET_TEMPORARY_ERRMSG, if the originating error is temporary.
+
+  @param      thd  a THD instance, typically of the slave SQL thread's.
+  @error_arg  the error code for assessment. 
+              defaults to zero which makes the function check the top
+              of the reported errors stack.
+
+  @return 1 as the positive and 0 as the negative verdict
 */
-static int has_temporary_error(THD *thd)
+int has_temporary_error(THD *thd, uint error_arg)
 {
+  uint error;
   DBUG_ENTER("has_temporary_error");
 
   DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
@@ -2533,20 +2541,25 @@ static int has_temporary_error(THD *thd)
                   });
 
   /*
-    If there is no message in THD, we can't say if it's a temporary
+    The state of the slave thread can't be regarded as
+    experiencing a temporary failure in cases of @c is_slave_error was set TRUE,
+    or if there is no message in THD, we can't say if it's a temporary
     error or not. This is currently the case for Incident_log_event,
-    which sets no message. Return FALSE.
+    which sets no message.
   */
-  if (!thd->is_error())
+  if (thd->is_fatal_error || !thd->is_error())
     DBUG_RETURN(0);
 
+  error= (error_arg == 0)? thd->stmt_da->sql_errno() : error_arg;
+
   /*
     Temporary error codes:
     currently, InnoDB deadlock detected by InnoDB or lock
-    wait timeout (innodb_lock_wait_timeout exceeded
+    wait timeout (innodb_lock_wait_timeout exceeded).
+    Notice, the temporary error requires slave_trans_retries != 0)
   */
-  if (thd->stmt_da->sql_errno() == ER_LOCK_DEADLOCK ||
-      thd->stmt_da->sql_errno() == ER_LOCK_WAIT_TIMEOUT)
+  if (slave_trans_retries &&
+      (error == ER_LOCK_DEADLOCK || error == ER_LOCK_WAIT_TIMEOUT))
     DBUG_RETURN(1);
 
 #ifdef HAVE_NDB_BINLOG
@@ -3007,10 +3020,13 @@ static int exec_relay_log_event(THD* thd
           }
         }
         else
-          sql_print_error("Slave SQL thread retried transaction %lu time(s) "
-                          "in vain, giving up. Consider raising the value of "
-                          "the slave_transaction_retries variable.",
-                          slave_trans_retries);
+        {
+          thd->is_fatal_error= 1;
+          rli->report(ERROR_LEVEL, thd->stmt_da->sql_errno(),
+                      "Slave SQL thread retried transaction %lu time(s) "
+                      "in vain, giving up. Consider raising the value of "
+                      "the slave_transaction_retries variable.", rli->trans_retries);
+        }
       }
       else if ((exec_res && !temp_err) ||
                (opt_using_transactions &&

=== modified file 'sql/rpl_slave.h'
--- a/sql/rpl_slave.h	2011-02-16 17:13:30 +0000
+++ b/sql/rpl_slave.h	2011-03-27 18:16:32 +0000
@@ -213,6 +213,7 @@ void set_slave_thread_options(THD* thd);
 void set_slave_thread_default_charset(THD *thd, Relay_log_info const *rli);
 int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli);
 int rotate_relay_log(Master_info* mi);
+int has_temporary_error(THD *thd, uint err= 0);
 
 pthread_handler_t handle_slave_io(void *arg);
 pthread_handler_t handle_slave_sql(void *arg);


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110327181632-zcv44cu2cawtepw5.bundle
Thread
bzr commit into mysql-trunk branch (andrei.elkin:3311) Bug#11748510Andrei Elkin27 Mar
  • Re: bzr commit into mysql-trunk branch (andrei.elkin:3311) Bug#11748510Alfranio Correia28 Mar