List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:October 6 2006 9:21pm
Subject:bk commit into 5.0 tree (aelkin:1.2295) BUG#16228
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of elkin. When elkin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-10-07 00:21:16+03:00, aelkin@stripped +3 -0
  BUG#20697 slave fails to rollback replicated transaction hang over innodb_lock_wait_timeout
  
  Transaction on the slave sql thread got blocked against a slave's local ta lock.
  Since was default, slave-transaction-retries=10, there was replaying of replicated
  ta that failed because of 5.0's policy to rollback a timeouted transaction has been changed
  since 5.0.13.
  
  It was decided to backport already existed method working in 5.1 implemented in
  bug #16228 for handling symmetrical deadlock problem.
  Note, that this solution can be practically suboptimal only with a high rate of timeouting
  replicated transactions. 
  Upon the release of the latter

  mysql-test/r/rpl_deadlock.result@stripped, 2006-10-07 00:21:13+03:00, aelkin@stripped +86 -0
    results changed

  mysql-test/t/rpl_deadlock.test@stripped, 2006-10-07 00:21:13+03:00, aelkin@stripped +50 -0
    inspiring timeout in waiting by slave sql for a transactional lock.
    Checking the target table and slave status.

  sql/slave.cc@stripped, 2006-10-07 00:21:13+03:00, aelkin@stripped +13 -4
    applying bug#16228 fix, approbated for deadlock use case in 5.1, almost verbatim. 
    Another alternative to replay only the offending
    statement requires significant efforts, incl design work.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	aelkin
# Host:	dsl-hkigw8-feb9fb00-191.dhcp.inet.fi
# Root:	/home/elkin/MySQL/TEAM/FIXES/5.0/bug20697_slave_msta_retry

--- 1.279/sql/slave.cc	2006-10-07 00:21:23 +03:00
+++ 1.280/sql/slave.cc	2006-10-07 00:21:23 +03:00
@@ -3369,6 +3369,7 @@ static int exec_relay_log_event(THD* thd
           else
           {
             exec_res= 0;
+	    end_trans(thd, ROLLBACK);
 	    /* chance for concurrent connection to get more locks */
             safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
 		       (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
@@ -3386,9 +3387,17 @@ static int exec_relay_log_event(THD* thd
                           "the slave_transaction_retries variable.",
                           slave_trans_retries);
       }
-      if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
-         rli->trans_retries= 0; // restart from fresh
-     }
+      else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
+      {
+        /*
+          Only reset the retry counter if the event succeeded or
+          failed with a non-transient error.  On a successful event,
+          the execution will proceed as usual; in the case of a
+          non-transient error, the slave will stop with an error.
+	*/
+        rli->trans_retries= 0; // restart from fresh
+      }
+    }
     return exec_res;
   }
   else
@@ -4613,7 +4622,7 @@ static int connect_to_master(THD* thd, M
       suppress_warnings= 0;
       sql_print_error("Slave I/O thread: error %s to master \
 '%s@%s:%d': \
-Error: '%s'  errno: %d  retry-time: %d  retries: %d",
+Error: '%s'  errno: %d  retry-time: %d  retries: %lu",
 		      (reconnect ? "reconnecting" : "connecting"),
 		      mi->user,mi->host,mi->port,
 		      mysql_error(mysql), last_errno,

--- 1.10/mysql-test/r/rpl_deadlock.result	2006-10-07 00:21:23 +03:00
+++ 1.11/mysql-test/r/rpl_deadlock.result	2006-10-07 00:21:23 +03:00
@@ -177,4 +177,90 @@ Master_SSL_Cert	
 Master_SSL_Cipher	
 Master_SSL_Key	
 Seconds_Behind_Master	#
+drop table if exists t1;
+create table t1 (f int unique) engine=innodb;
+insert into t1  values (0);
+begin;
+select * from t1 where f = 0 for update;
+f
+0
+begin;
+insert into t1 values (1);
+update t1 set f=-1 where f = 0;
+commit;
+insert into t1 values (2);
+show slave status;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_MYPORT
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	19611
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	#
+Slave_SQL_Running	No
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	1205
+Last_Error	Error 'Lock wait timeout exceeded; try restarting transaction' on query. Default database: 'test'. Query: 'update t1 set f=-1 where f = 0'
+Skip_Counter	0
+Exec_Master_Log_Pos	19220
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+set @@global.sql_slave_skip_counter = 4;
+start slave;
+show slave status;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_MYPORT
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	19611
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	#
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	19611
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+select * from t1;
+f
+0
+2
 drop table t1,t2,t3,t4;

--- 1.12/mysql-test/t/rpl_deadlock.test	2006-10-07 00:21:23 +03:00
+++ 1.13/mysql-test/t/rpl_deadlock.test	2006-10-07 00:21:23 +03:00
@@ -112,6 +112,56 @@ select * from t2;
 show slave status;
 --horizontal_results
 
+# BUG20697 timeout to wait a lock by slave sql. The latter rolls back
+# and restarts its ta
+
+connection master;
+# prepare
+drop table if exists t1; 
+create table t1 (f int unique) engine=innodb; 
+insert into t1  values (0);
+
+sync_slave_with_master;
+# connection slave;
+# to block up slave sql later
+begin; select * from t1 where f = 0 for update;
+
+connection master;
+# to offend 
+begin;
+insert into t1 values (1);
+update t1 set f=-1 where f = 0;
+commit;
+insert into t1 values (2); # to succeed
+save_master_pos;
+
+connection slave;
+wait_for_slave_to_stop;
+###sync_slave_with_master;
+
+--replace_column 1 # 8 # 9 # 11 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+--vertical_results
+show slave status;
+--horizontal_results
+
+set @@global.sql_slave_skip_counter = 4;
+start slave;
+
+sync_with_master;
+#--real_sleep 3
+
+--replace_column 1 # 8 # 9 # 11 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+--vertical_results
+show slave status;
+--horizontal_results
+
+#sync_slave_with_master;
+select * from t1;
+commit;
+
+
 connection master;
 drop table t1,t2,t3,t4;
 sync_slave_with_master;
Thread
bk commit into 5.0 tree (aelkin:1.2295) BUG#16228Andrei Elkin6 Oct