Below is the list of changes that have just been committed into a local
5.0 repository of elkin. When elkin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2006-10-07 00:21:16+03:00, aelkin@stripped
+3 -0
BUG#20697 slave fails to rollback replicated transaction hang over
innodb_lock_wait_timeout
Transaction on the slave sql thread got blocked against a slave's local ta lock.
Since was default, slave-transaction-retries=10, there was replaying of replicated
ta that failed because of 5.0's policy to rollback a timeouted transaction has been
changed
since 5.0.13.
It was decided to backport already existed method working in 5.1 implemented in
bug #16228 for handling symmetrical deadlock problem.
Note, that this solution can be practically suboptimal only with a high rate of
timeouting
replicated transactions.
Upon the release of the latter
mysql-test/r/rpl_deadlock.result@stripped, 2006-10-07 00:21:13+03:00,
aelkin@stripped +86 -0
results changed
mysql-test/t/rpl_deadlock.test@stripped, 2006-10-07 00:21:13+03:00,
aelkin@stripped +50 -0
inspiring timeout in waiting by slave sql for a transactional lock.
Checking the target table and slave status.
sql/slave.cc@stripped, 2006-10-07 00:21:13+03:00,
aelkin@stripped +13 -4
applying bug#16228 fix, approbated for deadlock use case in 5.1, almost verbatim.
Another alternative to replay only the offending
statement requires significant efforts, incl design work.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: aelkin
# Host: dsl-hkigw8-feb9fb00-191.dhcp.inet.fi
# Root: /home/elkin/MySQL/TEAM/FIXES/5.0/bug20697_slave_msta_retry
--- 1.279/sql/slave.cc 2006-10-07 00:21:23 +03:00
+++ 1.280/sql/slave.cc 2006-10-07 00:21:23 +03:00
@@ -3369,6 +3369,7 @@ static int exec_relay_log_event(THD* thd
else
{
exec_res= 0;
+ end_trans(thd, ROLLBACK);
/* chance for concurrent connection to get more locks */
safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
(CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
@@ -3386,9 +3387,17 @@ static int exec_relay_log_event(THD* thd
"the slave_transaction_retries variable.",
slave_trans_retries);
}
- if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
- rli->trans_retries= 0; // restart from fresh
- }
+ else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
+ {
+ /*
+ Only reset the retry counter if the event succeeded or
+ failed with a non-transient error. On a successful event,
+ the execution will proceed as usual; in the case of a
+ non-transient error, the slave will stop with an error.
+ */
+ rli->trans_retries= 0; // restart from fresh
+ }
+ }
return exec_res;
}
else
@@ -4613,7 +4622,7 @@ static int connect_to_master(THD* thd, M
suppress_warnings= 0;
sql_print_error("Slave I/O thread: error %s to master \
'%s@%s:%d': \
-Error: '%s' errno: %d retry-time: %d retries: %d",
+Error: '%s' errno: %d retry-time: %d retries: %lu",
(reconnect ? "reconnecting" : "connecting"),
mi->user,mi->host,mi->port,
mysql_error(mysql), last_errno,
--- 1.10/mysql-test/r/rpl_deadlock.result 2006-10-07 00:21:23 +03:00
+++ 1.11/mysql-test/r/rpl_deadlock.result 2006-10-07 00:21:23 +03:00
@@ -177,4 +177,90 @@ Master_SSL_Cert
Master_SSL_Cipher
Master_SSL_Key
Seconds_Behind_Master #
+drop table if exists t1;
+create table t1 (f int unique) engine=innodb;
+insert into t1 values (0);
+begin;
+select * from t1 where f = 0 for update;
+f
+0
+begin;
+insert into t1 values (1);
+update t1 set f=-1 where f = 0;
+commit;
+insert into t1 values (2);
+show slave status;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port MASTER_MYPORT
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 19611
+Relay_Log_File #
+Relay_Log_Pos #
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running #
+Slave_SQL_Running No
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 1205
+Last_Error Error 'Lock wait timeout exceeded; try restarting transaction' on query.
Default database: 'test'. Query: 'update t1 set f=-1 where f = 0'
+Skip_Counter 0
+Exec_Master_Log_Pos 19220
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master #
+set @@global.sql_slave_skip_counter = 4;
+start slave;
+show slave status;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port MASTER_MYPORT
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 19611
+Relay_Log_File #
+Relay_Log_Pos #
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running #
+Slave_SQL_Running Yes
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 0
+Last_Error
+Skip_Counter 0
+Exec_Master_Log_Pos 19611
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master #
+select * from t1;
+f
+0
+2
drop table t1,t2,t3,t4;
--- 1.12/mysql-test/t/rpl_deadlock.test 2006-10-07 00:21:23 +03:00
+++ 1.13/mysql-test/t/rpl_deadlock.test 2006-10-07 00:21:23 +03:00
@@ -112,6 +112,56 @@ select * from t2;
show slave status;
--horizontal_results
+# BUG20697 timeout to wait a lock by slave sql. The latter rolls back
+# and restarts its ta
+
+connection master;
+# prepare
+drop table if exists t1;
+create table t1 (f int unique) engine=innodb;
+insert into t1 values (0);
+
+sync_slave_with_master;
+# connection slave;
+# to block up slave sql later
+begin; select * from t1 where f = 0 for update;
+
+connection master;
+# to offend
+begin;
+insert into t1 values (1);
+update t1 set f=-1 where f = 0;
+commit;
+insert into t1 values (2); # to succeed
+save_master_pos;
+
+connection slave;
+wait_for_slave_to_stop;
+###sync_slave_with_master;
+
+--replace_column 1 # 8 # 9 # 11 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+--vertical_results
+show slave status;
+--horizontal_results
+
+set @@global.sql_slave_skip_counter = 4;
+start slave;
+
+sync_with_master;
+#--real_sleep 3
+
+--replace_column 1 # 8 # 9 # 11 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+--vertical_results
+show slave status;
+--horizontal_results
+
+#sync_slave_with_master;
+select * from t1;
+commit;
+
+
connection master;
drop table t1,t2,t3,t4;
sync_slave_with_master;
| Thread |
|---|
| • bk commit into 5.0 tree (aelkin:1.2295) BUG#16228 | Andrei Elkin | 6 Oct |