MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Mats Kindahl Date:October 20 2007 6:16pm
Subject:bk commit into 5.1 tree (mats:1.2579) BUG#24860
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of mats. When mats does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-10-20 20:16:12+02:00, mats@stripped +4 -0
  BUG#24860 (Incorrect SLAVE_TRANSACTION_RETRIES code can result in slave stuck):
  
  If a temporary error occured inside a group on an event that was not the first
  event of the group, the slave could get stuck because the retry counter is reset
  whenever an event was executed successfully.
  
  This patch only reset the retry counter when an entire group has been successfully
  executed, or failed with a non-transient error.

  mysql-test/suite/rpl/r/rpl_temporary_errors.result@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +81 -0
    New BitKeeper file ``mysql-test/suite/rpl/r/rpl_temporary_errors.result''

  mysql-test/suite/rpl/r/rpl_temporary_errors.result@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +0 -0

  mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +3 -0
    New BitKeeper file ``mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt''

  mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +0 -0

  mysql-test/suite/rpl/t/rpl_temporary_errors.test@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +29 -0
    New BitKeeper file ``mysql-test/suite/rpl/t/rpl_temporary_errors.test''

  mysql-test/suite/rpl/t/rpl_temporary_errors.test@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +0 -0

  sql/slave.cc@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +21 -5
    Adding debug printouts to every place where Relay_log_info::trans_retries
    is changed and to has_temporary_error() to print the error when returning.
    
    Adding debug variable all_errors_are_temporary_errors to make all errors
    for slave thread temporary errors.
    
    Adding code so that the Relay_log_info::trans_retries is only reset when
    an entire group was sucessfully executed, or a non-temporary error occured.

diff -Nrup a/mysql-test/suite/rpl/r/rpl_temporary_errors.result b/mysql-test/suite/rpl/r/rpl_temporary_errors.result
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/suite/rpl/r/rpl_temporary_errors.result	2007-10-20 20:16:08 +02:00
@@ -0,0 +1,81 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+**** On Master ****
+SET SESSION BINLOG_FORMAT=ROW;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4);
+**** On Slave ****
+SHOW STATUS LIKE 'Slave_retried_transactions';
+Variable_name	Value
+Slave_retried_transactions	0
+UPDATE t1 SET a = 5, b = 47 WHERE a = 1;
+SELECT * FROM t1;
+a	b
+5	47
+2	2
+3	3
+4	4
+**** On Master ****
+UPDATE t1 SET a = 5, b = 5 WHERE a = 1;
+SELECT * FROM t1;
+a	b
+5	5
+2	2
+3	3
+4	4
+**** On Slave ****
+SHOW STATUS LIKE 'Slave_retried_transactions';
+Variable_name	Value
+Slave_retried_transactions	2
+SELECT * FROM t1;
+a	b
+5	47
+2	2
+3	3
+4	4
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	408
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	No
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	#
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	1032
+Last_Error	Error in Update_rows event: error during transaction execution on table test.t1. Can't find record in 't1'
+Skip_Counter	0
+Exec_Master_Log_Pos	318
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	#
+Last_IO_Error	#
+Last_SQL_Errno	1032
+Last_SQL_Error	Error in Update_rows event: error during transaction execution on table test.t1. Can't find record in 't1'
+DROP TABLE t1;
+**** On Master ****
+DROP TABLE t1;
diff -Nrup a/mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt b/mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt	2007-10-20 20:16:08 +02:00
@@ -0,0 +1,3 @@
+--loose-debug="+d,all_errors_are_temporary_errors" --slave-transaction-retries=2
+
+
diff -Nrup a/mysql-test/suite/rpl/t/rpl_temporary_errors.test b/mysql-test/suite/rpl/t/rpl_temporary_errors.test
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/suite/rpl/t/rpl_temporary_errors.test	2007-10-20 20:16:08 +02:00
@@ -0,0 +1,29 @@
+source include/master-slave.inc;
+
+--echo **** On Master ****
+connection master;
+SET SESSION BINLOG_FORMAT=ROW;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4);
+--echo **** On Slave ****
+sync_slave_with_master;
+SHOW STATUS LIKE 'Slave_retried_transactions';
+UPDATE t1 SET a = 5, b = 47 WHERE a = 1;
+SELECT * FROM t1;
+--echo **** On Master ****
+connection master;
+UPDATE t1 SET a = 5, b = 5 WHERE a = 1;
+save_master_pos;
+SELECT * FROM t1;
+#SHOW BINLOG EVENTS;
+--echo **** On Slave ****
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+SHOW STATUS LIKE 'Slave_retried_transactions';
+SELECT * FROM t1;
+source include/show_slave_status.inc;
+DROP TABLE t1;
+
+--echo **** On Master ****
+connection master;
+DROP TABLE t1;
diff -Nrup a/sql/slave.cc b/sql/slave.cc
--- a/sql/slave.cc	2007-10-13 14:58:06 +02:00
+++ b/sql/slave.cc	2007-10-20 20:16:08 +02:00
@@ -1714,7 +1714,14 @@ static int has_temporary_error(THD *thd)
   DBUG_ENTER("has_temporary_error");
 
   if (thd->is_fatal_error)
+  {
+    DBUG_PRINT("info", ("thd->net.last_errno: %s", ER(thd->net.last_errno)));
     DBUG_RETURN(0);
+  }
+
+  DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
+                  if (thd->net.last_errno)
+                    thd->net.last_errno= ER_LOCK_DEADLOCK;);
 
   /*
     Temporary error codes:
@@ -1723,7 +1730,10 @@ static int has_temporary_error(THD *thd)
   */
   if (thd->net.last_errno == ER_LOCK_DEADLOCK ||
       thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT)
+  {
+    DBUG_PRINT("info", ("thd->net.last_errno: %s", ER(thd->net.last_errno)));
     DBUG_RETURN(1);
+  }
 
 #ifdef HAVE_NDB_BINLOG
   /*
@@ -1905,7 +1915,8 @@ static int exec_relay_log_event(THD* thd
     }
     if (slave_trans_retries)
     {
-      if (exec_res && has_temporary_error(thd))
+      int temp_err;
+      if (exec_res && (temp_err= has_temporary_error(thd)))
       {
         const char *errmsg;
         /*
@@ -1953,15 +1964,19 @@ static int exec_relay_log_event(THD* thd
                           "the slave_transaction_retries variable.",
                           slave_trans_retries);
       }
-      else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
+      else if (exec_res && !temp_err ||
+               (opt_using_transactions &&
+                rli->group_relay_log_pos == rli->event_relay_log_pos))
       {
         /*
-          Only reset the retry counter if the event succeeded or
-          failed with a non-transient error.  On a successful event,
-          the execution will proceed as usual; in the case of a
+          Only reset the retry counter if the entire group succeeded
+          or failed with a non-transient error.  On a successful
+          event, the execution will proceed as usual; in the case of a
           non-transient error, the slave will stop with an error.
          */
         rli->trans_retries= 0; // restart from fresh
+        DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %d",
+                            rli->trans_retries));
       }
     }
     DBUG_RETURN(exec_res);
@@ -2450,6 +2465,7 @@ pthread_handler_t handle_slave_sql(void 
   rli->ignore_log_space_limit= 0;
   pthread_mutex_unlock(&rli->log_space_lock);
   rli->trans_retries= 0; // start from "no error"
+  DBUG_PRINT("info", ("rli->trans_retries: %d", rli->trans_retries));
 
   if (init_relay_log_pos(rli,
                          rli->group_relay_log_name,
Thread
bk commit into 5.1 tree (mats:1.2579) BUG#24860Mats Kindahl20 Oct
  • Re: bk commit into 5.1 tree (mats:1.2579) BUG#24860Sven Sandberg22 Oct
    • Re: bk commit into 5.1 tree (mats:1.2579) BUG#24860Mats Kindahl22 Oct
      • Re: bk commit into 5.1 tree (mats:1.2579) BUG#24860Sven Sandberg22 Oct
  • Re: bk commit into 5.1 tree (mats:1.2579) BUG#24860Sven Sandberg23 Oct
  • Re: bk commit into 5.1 tree (mats:1.2579) BUG#24860Andrei Elkin23 Oct
    • Re: bk commit into 5.1 tree (mats:1.2579) BUG#24860Mats Kindahl23 Oct