3881 Andrei Elkin 2012-03-29
Bug 13893363 - MTS IS MISSING THE ABILITY TO STOP A SLAVE AFTER PROCESSING ALL GAPS
The new UNTIL condition is an important feature to have
because of --relay-log-recovery=1 and Change-Master can run in presence of gaps.
The user would have to execute
START SLAVE SQL_THREAD UNTIL SQL_AFTER_MTS_GAPS
if he needed to switch from the parallel to the sequential execution mode
after slave SQL thread or Worker threads errored out in the parallel mode.
Also, a separate issue of incorrect demotion of DEADLOCK/WAIT_FOR_LOCK
errors into warning is fixed because at Worker execution slave does not retry.
And a todo to relocate SQL_AFTER_MTS_GAPS and other post-exec/schedule until options
checking at the end of read-execute loop (instead of to have them
right after read phase which can lead to unnecessary hanging when a condition is
actually met).
@ mysql-test/suite/rpl/r/rpl_parallel_start_stop.result
results are updated.
@ mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
until SQL_AFTER_MTS_GAPS tests are added.
@ mysql-test/suite/rpl/t/rpl_stm_until.test
A memo to relocate SLAVE UNTIL SQL_AFTER_MTS_GAPS is added.
@ sql/lex.h
a new lex symbol for SQL_AFTER_MTS_GAPS is added.
@ sql/rpl_rli.cc
UNTIL_SQL_AFTER_MTS_GAPS case is added to Relay_log_info::is_until_satisfied().
@ sql/rpl_rli.h
UNTIL_SQL_AFTER_MTS_GAPS is added to enum conditions.
@ sql/rpl_slave.cc
a cause of demotion of Worker DEADLOCK/WAIT_FOR_LOCK errors into warning is fixed;
change_master() improves a warning instucting how to proceed if MTS gaps are met;
SQL_AFTER_MTS_GAPS (as well as another fixes added UNTIL_SQL_AFTER_GTIDS) is added
to displaying with Show-Slave-Status.
@ sql/sql_lex.h
struct st_lex_master_info is extended to hold the fact of
SQL_AFTER_MTS_GAPS is requested to propagate one into start_slave().
@ sql/sql_yacc.yy
SQL_AFTER_MTS_GAPS handling in parser is added.
modified:
mysql-test/suite/rpl/r/rpl_parallel_start_stop.result
mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
mysql-test/suite/rpl/t/rpl_stm_until.test
sql/lex.h
sql/rpl_rli.cc
sql/rpl_rli.h
sql/rpl_slave.cc
sql/sql_lex.h
sql/sql_yacc.yy
3880 Tor Didriksen 2012-03-29 [merge]
merge 5.5 => trunk
modified:
mysql-test/r/ps.result
mysql-test/r/sp.result
mysql-test/t/ps.test
mysql-test/t/sp.test
=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_start_stop.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result 2012-03-15 09:55:15 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result 2012-03-29 15:10:31 +0000
@@ -78,6 +78,50 @@ drop table t2m;
drop table t2;
drop view worker_proc_list;
drop view coord_proc_list;
-set @@global.slave_parallel_workers= @save.slave_parallel_workers;
set @@global.slave_transaction_retries= @save.slave_transaction_retries;
+include/stop_slave.inc
+start slave until sql_after_mts_gaps relay_log_file='dummy';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'relay_log_file='dummy'' at line 1
+start slave until sql_after_mts_gaps relay_log_pos=0;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'relay_log_pos=0' at line 1
+start slave until sql_after_mts_gaps master_log_file='dummy';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'master_log_file='dummy'' at line 1
+start slave until sql_after_mts_gaps master_log_pos=0;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'master_log_pos=0' at line 1
+start slave until sql_after_mts_gaps SQL_BEFORE_GTIDS='dummy';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SQL_BEFORE_GTIDS='dummy'' at line 1
+start slave until sql_after_mts_gaps;
+show warnings;
+Level Code Message
+Error 1064 You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SQL_BEFORE_GTIDS='dummy'' at line 1
+call mtr.add_suppression('Slave SQL: Could not execute Update_rows event on table d1.t1; Deadlock found when trying to get lock');
+include/start_slave.inc
+create database d1;
+create database d2;
+create table d1.t1 (a int primary key) engine=innodb;
+create table d2.t1 (a int primary key) engine=innodb;
+create table d1.t2m (a int) engine=myisam;
+insert into d1.t1 values (1),(2),(3);
+insert into d2.t1 values (1),(2),(3);
+insert into d1.t2m values (1);
+begin;
+update d1.t1 set a=31 where a=3;
+insert into d1.t1 values (5),(6),(7);
+begin;
+update d1.t1 set a=20 where a=2;
+insert into d1.t2m values (2);
+update d1.t1 set a=30 where a=3;
+insert into d1.t1 values (4);
+commit;
+delete from d2.t1;
+delete from d1.t1;
+update d1.t1 set a=21 where a=2;
+include/wait_for_slave_sql_error.inc [errno=1213, 1205]
+rollback;
+start slave until sql_after_mts_gaps;
+include/wait_for_slave_sql_to_stop.inc
+include/start_slave.inc
+drop database d1;
+drop database d2;
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
include/rpl_end.inc
=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_start_stop.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test 2012-03-15 09:55:15 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test 2012-03-29 15:10:31 +0000
@@ -237,7 +237,8 @@ if (`select $a - max(a) from t1`)
}
#
-# UNTIL condition is not supported by Parallel slave to reject
+# UNTIL condition othen than SQL_AFTER_MTS_GAPS
+# is not supported by Parallel slave to reject
# with a warning and no Worker thread is started
#
@@ -278,8 +279,143 @@ sync_slave_with_master;
drop view worker_proc_list;
drop view coord_proc_list;
-set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+# reuse non-zero Workers value in the follwing tests
+#set @@global.slave_parallel_workers= @save.slave_parallel_workers;
set @@global.slave_transaction_retries= @save.slave_transaction_retries;
---source include/rpl_end.inc
+#
+# START SLAVE UNTIL SQL_AFTER_MTS_GAPS
+#
+# A new UNTIL condition is introduced as a tool to
+# fill gaps in the sequence of executed transaction started
+# at Exec_Master_Log_Pos.
+# The gaps could be caused by the previous slave session stop
+# with an error, or it was killed, or the server crashed.
+#
+
+connection slave;
+
+source include/stop_slave.inc;
+
+# Show SQL_AFTER_MTS_GAPS is meaningless in combination with coordinates
+# related options
+--error 1064
+start slave until sql_after_mts_gaps relay_log_file='dummy';
+--error 1064
+start slave until sql_after_mts_gaps relay_log_pos=0;
+--error 1064
+start slave until sql_after_mts_gaps master_log_file='dummy';
+--error 1064
+start slave until sql_after_mts_gaps master_log_pos=0;
+--error 1064
+start slave until sql_after_mts_gaps SQL_BEFORE_GTIDS='dummy';
+
+# check out a "blank" run which leaves an info line into error-log
+start slave until sql_after_mts_gaps;
+
+show warnings;
+call mtr.add_suppression('Slave SQL: Could not execute Update_rows event on table d1.t1; Deadlock found when trying to get lock');
+
+# regular start now
+source include/start_slave.inc;
+
+# set up gaps when slave sql errors out
+connection master;
+
+create database d1;
+create database d2;
+create table d1.t1 (a int primary key) engine=innodb;
+create table d2.t1 (a int primary key) engine=innodb;
+create table d1.t2m (a int) engine=myisam; # non-trans engine to detect deadlock
+insert into d1.t1 values (1),(2),(3);
+insert into d2.t1 values (1),(2),(3);
+insert into d1.t2m values (1);
+
+sync_slave_with_master;
+#connection slave;
+begin; # the blocker
+# set up a deadlock at the 1st job
+update d1.t1 set a=31 where a=3;
+insert into d1.t1 values (5),(6),(7);
+
+# create the 1st job to get blocked on the slave
+connection master;
+begin;
+update d1.t1 set a=20 where a=2;
+insert into d1.t2m values (2);
+update d1.t1 set a=30 where a=3;
+insert into d1.t1 values (4);
+commit;
+
+# create the 2nd job for another worker
+delete from d2.t1;
+
+# create the 3nd job to help UNTIL SQL_AFTER_MTS_GAPS be reached
+# (todo: if the last gap event ends the relay-log SQL thread will
+# hang - to be fixed with MTS support for regular UNTIL:s)
+delete from d1.t1;
+
+# wait till the 2nd job will be done
+connection slave1;
+let $count= 0;
+let $table= d2.t1;
+source include/wait_until_rows_count.inc;
+
+# proceed with 1st job into its middle
+connection slave;
+let $count= 2;
+let $table= d1.t2m;
+source include/wait_until_rows_count.inc;
+# must victimize the master trans because of deadlock or timeout
+update d1.t1 set a=21 where a=2;
+
+# slave is stopped
+# setting timeout to be as twice as greater than innodb's.
+--let $slave_timeout=`select 2*@@global.innodb_lock_wait_timeout`
+
+let $slave_sql_errno= 1213, 1205;
+source include/wait_for_slave_sql_error.inc;
+
+rollback; # the blocker
+
+# find out Exec_Master_Log_Pos to store it in Exec_0
+let $exec_pos_0= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
+
+start slave until sql_after_mts_gaps;
+
+# Efficiency of UNTIL proof:
+source include/wait_for_slave_sql_to_stop.inc;
+
+# Consistency proof:
+
+if (`select count(*) <> 4 from d1.t1`)
+{
+ --echo *** Something is wrong in recovery ***
+ --die
+}
+
+# Efficiency of gap filling proof:
+# find out Exec_Master_Log_Pos and compare with Exec_0
+let $exec_pos_1= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
+
+if (`select $exec_pos_1 - $exec_pos_0 <= 0`)
+{
+ --echo *** No gap transaction is executed as expected ***
+ --connection slave
+ show slave status;
+ --connection master
+ show master status;
+ --die
+}
+
+# UNTIL SQL_AFTER_MTS_GAPS cleanup
+source include/start_slave.inc;
+connection master;
+drop database d1;
+drop database d2;
+
+sync_slave_with_master;
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+
+--source include/rpl_end.inc
=== modified file 'mysql-test/suite/rpl/t/rpl_stm_until.test'
--- a/mysql-test/suite/rpl/t/rpl_stm_until.test 2011-08-19 13:04:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_stm_until.test 2012-03-29 15:10:31 +0000
@@ -124,6 +124,9 @@ start slave sql_thread;
--replace_result 776 MASTER_LOG_POS
start slave until master_log_file='master-bin.000001', master_log_pos=776;
+--echo *** TODO: consider to relocate SLAVE UNTIL SQL_AFTER_MTS_GAPS testing ***
+--echo *** from rpl_parallel_start_stop when this test gets enabled ***
+
#
# bug#47210 first execution of "start slave until" stops too early
#
=== modified file 'sql/lex.h'
--- a/sql/lex.h 2012-03-06 14:29:42 +0000
+++ b/sql/lex.h 2012-03-29 15:10:31 +0000
@@ -530,7 +530,7 @@ static SYMBOL symbols[] = {
{ "SQLSTATE", SYM(SQLSTATE_SYM)},
{ "SQLWARNING", SYM(SQLWARNING_SYM)},
{ "SQL_AFTER_GTIDS", SYM(SQL_AFTER_GTIDS)},
- { "SQL_BEFORE_GTIDS", SYM(SQL_BEFORE_GTIDS)},
+ { "SQL_AFTER_MTS_GAPS", SYM(SQL_AFTER_MTS_GAPS)},
{ "SQL_BIG_RESULT", SYM(SQL_BIG_RESULT)},
{ "SQL_BUFFER_RESULT", SYM(SQL_BUFFER_RESULT)},
{ "SQL_CACHE", SYM(SQL_CACHE_SYM)},
=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc 2012-03-28 18:01:14 +0000
+++ b/sql/rpl_rli.cc 2012-03-29 15:10:31 +0000
@@ -1251,6 +1251,29 @@ bool Relay_log_info::is_until_satisfied(
DBUG_RETURN(false);
break;
+ case UNTIL_SQL_AFTER_MTS_GAPS:
+ /*
+ TODO: this condition is actually post-execution or post-scheduling
+ so the proper place to check it before SQL thread goes
+ into next_event() where it can wait while the condition
+ has been satisfied already.
+ It's deployed here temporarily to be fixed along the regular UNTIL
+ support for MTS is provided.
+ */
+ if (mts_recovery_group_cnt == 0)
+ {
+ sql_print_information("Slave SQL thread stopped according to "
+ "UNTIL SQL_AFTER_MTS_GAPS as it has "
+ "processed all gap transactions left from "
+ "the previous slave session.");
+ DBUG_RETURN(true);
+ }
+ else
+ {
+ DBUG_RETURN(false);
+ }
+ break;
+
case UNTIL_NONE:
DBUG_ASSERT(0);
break;
=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h 2012-03-28 15:24:17 +0000
+++ b/sql/rpl_rli.h 2012-03-29 15:10:31 +0000
@@ -309,7 +309,8 @@ public:
thread is running).
*/
enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS,
- UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS} until_condition;
+ UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS, UNTIL_SQL_AFTER_MTS_GAPS}
+ until_condition;
char until_log_name[FN_REFLEN];
ulonglong until_log_pos;
/* extension extracted from log_name and converted to int */
=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc 2012-03-28 15:24:17 +0000
+++ b/sql/rpl_slave.cc 2012-03-29 15:10:31 +0000
@@ -406,7 +406,7 @@ int init_recovery(Master_info* mi, const
"was stopped with an error or killed in MTS mode; "
"consider using RESET SLAVE or restart the server "
"with --relay-log-recovery = 0 followed by "
- "START SLAVE");
+ "START SLAVE UNTIL SQL_AFTER_MTS_GAPS");
}
}
@@ -2574,11 +2574,32 @@ bool show_slave_status(THD* thd, Master_
protocol->store((ulonglong) mi->rli->get_group_master_log_pos());
protocol->store((ulonglong) mi->rli->log_space_total);
- protocol->store(
- mi->rli->until_condition == Relay_log_info::UNTIL_NONE ? "None" :
- (mi->rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ? "Master" :
- (mi->rli->until_condition == Relay_log_info::UNTIL_RELAY_POS ? "Relay" :
- "SQL_BEFORE_GTIDS")), &my_charset_bin);
+ const char *until_type;
+
+ switch (mi->rli->until_condition)
+ {
+ case Relay_log_info::UNTIL_NONE:
+ until_type= "None";
+ break;
+ case Relay_log_info::UNTIL_MASTER_POS:
+ until_type= "Master";
+ break;
+ case Relay_log_info::UNTIL_RELAY_POS:
+ until_type= "Relay";
+ break;
+ case Relay_log_info::UNTIL_SQL_BEFORE_GTIDS:
+ until_type= "SQL_BEFORE_GTIDS";
+ break;
+ case Relay_log_info::UNTIL_SQL_AFTER_GTIDS:
+ until_type= "SQL_AFTER_GTIDS";
+ break;
+ case Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS:
+ until_type= "SQL_AFTER_MTS_GAPS";
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+ protocol->store(until_type, &my_charset_bin);
protocol->store(mi->rli->until_log_name, &my_charset_bin);
protocol->store((ulonglong) mi->rli->until_log_pos);
@@ -3523,7 +3544,8 @@ static int exec_relay_log_event(THD* thd
hits the UNTIL barrier.
MTS: since the master and the relay-group coordinates change
asynchronously logics of rli->is_until_satisfied() can't apply.
- Hence, UNTIL forces the sequential applying.
+ A special UNTIL_SQL_AFTER_MTS_GAPS is still deployed here
+ temporarily (see is_until_satisfied todo).
*/
if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
rli->is_until_satisfied(thd, ev))
@@ -3591,7 +3613,8 @@ static int exec_relay_log_event(THD* thd
if (slave_trans_retries)
{
int UNINIT_VAR(temp_err);
- if (exec_res && (temp_err= rli->has_temporary_error(thd)) &&
+ if (exec_res && !is_mts_worker(thd) &&
+ (temp_err= rli->has_temporary_error(thd)) &&
!thd->transaction.all.cannot_safely_rollback())
{
const char *errmsg;
@@ -7461,6 +7484,10 @@ int start_slave(THD* thd , Master_info*
}
global_sid_lock.unlock();
}
+ else if (thd->lex->mi.until_after_gaps)
+ {
+ mi->rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
+ }
else
mi->rli->clear_until_condition();
=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h 2012-03-29 13:23:06 +0000
+++ b/sql/sql_lex.h 2012-03-29 15:10:31 +0000
@@ -214,6 +214,7 @@ typedef struct st_lex_master_info
ulong server_id, retry_count;
char *gtid;
enum {UNTIL_SQL_BEFORE_GTIDS= 0, UNTIL_SQL_AFTER_GTIDS} gtid_until_condition;
+ bool until_after_gaps;
/*
Enum is used for making it possible to detect if the user
=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy 2012-03-27 08:43:25 +0000
+++ b/sql/sql_yacc.yy 2012-03-29 15:10:31 +0000
@@ -1511,6 +1511,7 @@ bool my_yyoverflow(short **a, YYSTYPE **
%token SQLSTATE_SYM /* SQL-2003-R */
%token SQLWARNING_SYM /* SQL-2003-R */
%token SQL_AFTER_GTIDS /* MYSQL */
+%token SQL_AFTER_MTS_GAPS /* MYSQL */
%token SQL_BEFORE_GTIDS /* MYSQL */
%token SQL_BIG_RESULT
%token SQL_BUFFER_RESULT
@@ -7685,7 +7686,13 @@ slave_until:
lex->mi.gtid) ||
!((lex->mi.log_file_name && lex->mi.pos) ||
(lex->mi.relay_log_name && lex->mi.relay_log_pos) ||
- lex->mi.gtid))
+ lex->mi.gtid ||
+ lex->mi.until_after_gaps) ||
+ /* SQL_AFTER_MTS_GAPS is meaningless in combination */
+ /* with any other coordinates related options */
+ ((lex->mi.log_file_name || lex->mi.pos || lex->mi.relay_log_name
+ || lex->mi.relay_log_pos || lex->mi.gtid)
+ && lex->mi.until_after_gaps))
{
my_message(ER_BAD_SLAVE_UNTIL_COND,
ER(ER_BAD_SLAVE_UNTIL_COND), MYF(0));
@@ -7707,6 +7714,10 @@ slave_until_opts:
Lex->mi.gtid= $3.str;
Lex->mi.gtid_until_condition= LEX_MASTER_INFO::UNTIL_SQL_AFTER_GTIDS;
}
+ | SQL_AFTER_MTS_GAPS
+ {
+ Lex->mi.until_after_gaps= true;
+ }
;
checksum:
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-trunk branch (andrei.elkin:3880 to 3881) | Andrei Elkin | 29 Mar |