Below is the list of changes that have just been committed into a local
5.1 repository of mats. When mats does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-10-20 20:16:12+02:00, mats@stripped +4 -0
BUG#24860 (Incorrect SLAVE_TRANSACTION_RETRIES code can result in slave stuck):
If a temporary error occured inside a group on an event that was not the first
event of the group, the slave could get stuck because the retry counter is reset
whenever an event was executed successfully.
This patch only reset the retry counter when an entire group has been successfully
executed, or failed with a non-transient error.
mysql-test/suite/rpl/r/rpl_temporary_errors.result@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +81 -0
New BitKeeper file ``mysql-test/suite/rpl/r/rpl_temporary_errors.result''
mysql-test/suite/rpl/r/rpl_temporary_errors.result@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +0 -0
mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +3 -0
New BitKeeper file ``mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt''
mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +0 -0
mysql-test/suite/rpl/t/rpl_temporary_errors.test@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +29 -0
New BitKeeper file ``mysql-test/suite/rpl/t/rpl_temporary_errors.test''
mysql-test/suite/rpl/t/rpl_temporary_errors.test@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +0 -0
sql/slave.cc@stripped, 2007-10-20 20:16:08+02:00, mats@stripped +21 -5
Adding debug printouts to every place where Relay_log_info::trans_retries
is changed and to has_temporary_error() to print the error when returning.
Adding debug variable all_errors_are_temporary_errors to make all errors
for slave thread temporary errors.
Adding code so that the Relay_log_info::trans_retries is only reset when
an entire group was sucessfully executed, or a non-temporary error occured.
diff -Nrup a/mysql-test/suite/rpl/r/rpl_temporary_errors.result b/mysql-test/suite/rpl/r/rpl_temporary_errors.result
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/mysql-test/suite/rpl/r/rpl_temporary_errors.result 2007-10-20 20:16:08 +02:00
@@ -0,0 +1,81 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+**** On Master ****
+SET SESSION BINLOG_FORMAT=ROW;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4);
+**** On Slave ****
+SHOW STATUS LIKE 'Slave_retried_transactions';
+Variable_name Value
+Slave_retried_transactions 0
+UPDATE t1 SET a = 5, b = 47 WHERE a = 1;
+SELECT * FROM t1;
+a b
+5 47
+2 2
+3 3
+4 4
+**** On Master ****
+UPDATE t1 SET a = 5, b = 5 WHERE a = 1;
+SELECT * FROM t1;
+a b
+5 5
+2 2
+3 3
+4 4
+**** On Slave ****
+SHOW STATUS LIKE 'Slave_retried_transactions';
+Variable_name Value
+Slave_retried_transactions 2
+SELECT * FROM t1;
+a b
+5 47
+2 2
+3 3
+4 4
+SHOW SLAVE STATUS;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port MASTER_PORT
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 408
+Relay_Log_File #
+Relay_Log_Pos #
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running Yes
+Slave_SQL_Running No
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table #
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 1032
+Last_Error Error in Update_rows event: error during transaction execution on table test.t1. Can't find record in 't1'
+Skip_Counter 0
+Exec_Master_Log_Pos 318
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master #
+Master_SSL_Verify_Server_Cert No
+Last_IO_Errno #
+Last_IO_Error #
+Last_SQL_Errno 1032
+Last_SQL_Error Error in Update_rows event: error during transaction execution on table test.t1. Can't find record in 't1'
+DROP TABLE t1;
+**** On Master ****
+DROP TABLE t1;
diff -Nrup a/mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt b/mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/mysql-test/suite/rpl/t/rpl_temporary_errors-slave.opt 2007-10-20 20:16:08 +02:00
@@ -0,0 +1,3 @@
+--loose-debug="+d,all_errors_are_temporary_errors" --slave-transaction-retries=2
+
+
diff -Nrup a/mysql-test/suite/rpl/t/rpl_temporary_errors.test b/mysql-test/suite/rpl/t/rpl_temporary_errors.test
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/mysql-test/suite/rpl/t/rpl_temporary_errors.test 2007-10-20 20:16:08 +02:00
@@ -0,0 +1,29 @@
+source include/master-slave.inc;
+
+--echo **** On Master ****
+connection master;
+SET SESSION BINLOG_FORMAT=ROW;
+CREATE TABLE t1 (a INT PRIMARY KEY, b INT);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,3), (4,4);
+--echo **** On Slave ****
+sync_slave_with_master;
+SHOW STATUS LIKE 'Slave_retried_transactions';
+UPDATE t1 SET a = 5, b = 47 WHERE a = 1;
+SELECT * FROM t1;
+--echo **** On Master ****
+connection master;
+UPDATE t1 SET a = 5, b = 5 WHERE a = 1;
+save_master_pos;
+SELECT * FROM t1;
+#SHOW BINLOG EVENTS;
+--echo **** On Slave ****
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+SHOW STATUS LIKE 'Slave_retried_transactions';
+SELECT * FROM t1;
+source include/show_slave_status.inc;
+DROP TABLE t1;
+
+--echo **** On Master ****
+connection master;
+DROP TABLE t1;
diff -Nrup a/sql/slave.cc b/sql/slave.cc
--- a/sql/slave.cc 2007-10-13 14:58:06 +02:00
+++ b/sql/slave.cc 2007-10-20 20:16:08 +02:00
@@ -1714,7 +1714,14 @@ static int has_temporary_error(THD *thd)
DBUG_ENTER("has_temporary_error");
if (thd->is_fatal_error)
+ {
+ DBUG_PRINT("info", ("thd->net.last_errno: %s", ER(thd->net.last_errno)));
DBUG_RETURN(0);
+ }
+
+ DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
+ if (thd->net.last_errno)
+ thd->net.last_errno= ER_LOCK_DEADLOCK;);
/*
Temporary error codes:
@@ -1723,7 +1730,10 @@ static int has_temporary_error(THD *thd)
*/
if (thd->net.last_errno == ER_LOCK_DEADLOCK ||
thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT)
+ {
+ DBUG_PRINT("info", ("thd->net.last_errno: %s", ER(thd->net.last_errno)));
DBUG_RETURN(1);
+ }
#ifdef HAVE_NDB_BINLOG
/*
@@ -1905,7 +1915,8 @@ static int exec_relay_log_event(THD* thd
}
if (slave_trans_retries)
{
- if (exec_res && has_temporary_error(thd))
+ int temp_err;
+ if (exec_res && (temp_err= has_temporary_error(thd)))
{
const char *errmsg;
/*
@@ -1953,15 +1964,19 @@ static int exec_relay_log_event(THD* thd
"the slave_transaction_retries variable.",
slave_trans_retries);
}
- else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
+ else if (exec_res && !temp_err ||
+ (opt_using_transactions &&
+ rli->group_relay_log_pos == rli->event_relay_log_pos))
{
/*
- Only reset the retry counter if the event succeeded or
- failed with a non-transient error. On a successful event,
- the execution will proceed as usual; in the case of a
+ Only reset the retry counter if the entire group succeeded
+ or failed with a non-transient error. On a successful
+ event, the execution will proceed as usual; in the case of a
non-transient error, the slave will stop with an error.
*/
rli->trans_retries= 0; // restart from fresh
+ DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %d",
+ rli->trans_retries));
}
}
DBUG_RETURN(exec_res);
@@ -2450,6 +2465,7 @@ pthread_handler_t handle_slave_sql(void
rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock);
rli->trans_retries= 0; // start from "no error"
+ DBUG_PRINT("info", ("rli->trans_retries: %d", rli->trans_retries));
if (init_relay_log_pos(rli,
rli->group_relay_log_name,