From: Date: October 6 2006 11:08pm Subject: bk commit into 5.0 tree (aelkin:1.2295) BUG#16228 List-Archive: http://lists.mysql.com/commits/13284 X-Bug: 16228 Message-Id: <200610062108.k96L8wbn024320@dsl-hkigw8-feb9fb00-191.dhcp.inet.fi> Below is the list of changes that have just been committed into a local 5.0 repository of elkin. When elkin does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2006-10-07 00:08:52+03:00, aelkin@stripped +3 -0 BUG#20697 Transaction on the slave sql thread got blocked against a slave's local ta lock. Since was default, slave-transaction-retries=10, there was replaying of replicated ta that failed because of 5.0's policy to rollback a timeouted transaction has been changed since 5.0.13. It was decided to backport already existed method working in 5.1 implemented in bug #16228 for handling symmetrical deadlock problem. Note, that this solution can be practically suboptimal only with a high rate of timeouting replicated transactions. Upon the release of the latter mysql-test/r/rpl_deadlock.result@stripped, 2006-10-07 00:08:46+03:00, aelkin@stripped +86 -0 results changed mysql-test/t/rpl_deadlock.test@stripped, 2006-10-07 00:08:47+03:00, aelkin@stripped +50 -0 inspiring timeout in waiting by slave sql for a transactional lock. Checking the target table and slave status. sql/slave.cc@stripped, 2006-10-07 00:08:49+03:00, aelkin@stripped +13 -4 applying bug#16228 fix, approbated for deadlock use case in 5.1, almost verbatim. Another alternative to replay only the offending statement requires significant efforts, incl design work. # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: aelkin # Host: dsl-hkigw8-feb9fb00-191.dhcp.inet.fi # Root: /home/elkin/MySQL/TEAM/FIXES/5.0/bug20697_slave_msta_retry --- 1.279/sql/slave.cc 2006-10-07 00:08:58 +03:00 +++ 1.280/sql/slave.cc 2006-10-07 00:08:58 +03:00 @@ -3369,6 +3369,7 @@ static int exec_relay_log_event(THD* thd else { exec_res= 0; + end_trans(thd, ROLLBACK); /* chance for concurrent connection to get more locks */ safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE), (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli); @@ -3386,9 +3387,17 @@ static int exec_relay_log_event(THD* thd "the slave_transaction_retries variable.", slave_trans_retries); } - if (!((thd->options & OPTION_BEGIN) && opt_using_transactions)) - rli->trans_retries= 0; // restart from fresh - } + else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions)) + { + /* + Only reset the retry counter if the event succeeded or + failed with a non-transient error. On a successful event, + the execution will proceed as usual; in the case of a + non-transient error, the slave will stop with an error. + */ + rli->trans_retries= 0; // restart from fresh + } + } return exec_res; } else @@ -4613,7 +4622,7 @@ static int connect_to_master(THD* thd, M suppress_warnings= 0; sql_print_error("Slave I/O thread: error %s to master \ '%s@%s:%d': \ -Error: '%s' errno: %d retry-time: %d retries: %d", +Error: '%s' errno: %d retry-time: %d retries: %lu", (reconnect ? "reconnecting" : "connecting"), mi->user,mi->host,mi->port, mysql_error(mysql), last_errno, --- 1.10/mysql-test/r/rpl_deadlock.result 2006-10-07 00:08:58 +03:00 +++ 1.11/mysql-test/r/rpl_deadlock.result 2006-10-07 00:08:58 +03:00 @@ -177,4 +177,90 @@ Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master # +drop table if exists t1; +create table t1 (f int unique) engine=innodb; +insert into t1 values (0); +begin; +select * from t1 where f = 0 for update; +f +0 +begin; +insert into t1 values (1); +update t1 set f=-1 where f = 0; +commit; +insert into t1 values (2); +show slave status; +Slave_IO_State # +Master_Host 127.0.0.1 +Master_User root +Master_Port MASTER_MYPORT +Connect_Retry 1 +Master_Log_File master-bin.000001 +Read_Master_Log_Pos 19611 +Relay_Log_File # +Relay_Log_Pos # +Relay_Master_Log_File master-bin.000001 +Slave_IO_Running # +Slave_SQL_Running No +Replicate_Do_DB +Replicate_Ignore_DB +Replicate_Do_Table +Replicate_Ignore_Table +Replicate_Wild_Do_Table +Replicate_Wild_Ignore_Table +Last_Errno 1205 +Last_Error Error 'Lock wait timeout exceeded; try restarting transaction' on query. Default database: 'test'. Query: 'update t1 set f=-1 where f = 0' +Skip_Counter 0 +Exec_Master_Log_Pos 19220 +Relay_Log_Space # +Until_Condition None +Until_Log_File +Until_Log_Pos 0 +Master_SSL_Allowed No +Master_SSL_CA_File +Master_SSL_CA_Path +Master_SSL_Cert +Master_SSL_Cipher +Master_SSL_Key +Seconds_Behind_Master # +set @@global.sql_slave_skip_counter = 4; +start slave; +show slave status; +Slave_IO_State # +Master_Host 127.0.0.1 +Master_User root +Master_Port MASTER_MYPORT +Connect_Retry 1 +Master_Log_File master-bin.000001 +Read_Master_Log_Pos 19611 +Relay_Log_File # +Relay_Log_Pos # +Relay_Master_Log_File master-bin.000001 +Slave_IO_Running # +Slave_SQL_Running Yes +Replicate_Do_DB +Replicate_Ignore_DB +Replicate_Do_Table +Replicate_Ignore_Table +Replicate_Wild_Do_Table +Replicate_Wild_Ignore_Table +Last_Errno 0 +Last_Error +Skip_Counter 0 +Exec_Master_Log_Pos 19611 +Relay_Log_Space # +Until_Condition None +Until_Log_File +Until_Log_Pos 0 +Master_SSL_Allowed No +Master_SSL_CA_File +Master_SSL_CA_Path +Master_SSL_Cert +Master_SSL_Cipher +Master_SSL_Key +Seconds_Behind_Master # +select * from t1; +f +0 +2 drop table t1,t2,t3,t4; --- 1.12/mysql-test/t/rpl_deadlock.test 2006-10-07 00:08:58 +03:00 +++ 1.13/mysql-test/t/rpl_deadlock.test 2006-10-07 00:08:58 +03:00 @@ -112,6 +112,56 @@ select * from t2; show slave status; --horizontal_results +# BUG20697 timeout to wait a lock by slave sql. The latter rolls back +# and restarts its ta + +connection master; +# prepare +drop table if exists t1; +create table t1 (f int unique) engine=innodb; +insert into t1 values (0); + +sync_slave_with_master; +# connection slave; +# to block up slave sql later +begin; select * from t1 where f = 0 for update; + +connection master; +# to offend +begin; +insert into t1 values (1); +update t1 set f=-1 where f = 0; +commit; +insert into t1 values (2); # to succeed +save_master_pos; + +connection slave; +wait_for_slave_to_stop; +###sync_slave_with_master; + +--replace_column 1 # 8 # 9 # 11 # 23 # 33 # +--replace_result $MASTER_MYPORT MASTER_MYPORT +--vertical_results +show slave status; +--horizontal_results + +set @@global.sql_slave_skip_counter = 4; +start slave; + +sync_with_master; +#--real_sleep 3 + +--replace_column 1 # 8 # 9 # 11 # 23 # 33 # +--replace_result $MASTER_MYPORT MASTER_MYPORT +--vertical_results +show slave status; +--horizontal_results + +#sync_slave_with_master; +select * from t1; +commit; + + connection master; drop table t1,t2,t3,t4; sync_slave_with_master;