From: Andrei Elkin Date: March 29 2012 3:11pm Subject: bzr push into mysql-trunk branch (andrei.elkin:3880 to 3881) List-Archive: http://lists.mysql.com/commits/143360 Message-Id: <201203291511.q2TFBEPr002393@mysql1000.dsl.inet.fi> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3881 Andrei Elkin 2012-03-29 Bug 13893363 - MTS IS MISSING THE ABILITY TO STOP A SLAVE AFTER PROCESSING ALL GAPS The new UNTIL condition is an important feature to have because of --relay-log-recovery=1 and Change-Master can run in presence of gaps. The user would have to execute START SLAVE SQL_THREAD UNTIL SQL_AFTER_MTS_GAPS if he needed to switch from the parallel to the sequential execution mode after slave SQL thread or Worker threads errored out in the parallel mode. Also, a separate issue of incorrect demotion of DEADLOCK/WAIT_FOR_LOCK errors into warning is fixed because at Worker execution slave does not retry. And a todo to relocate SQL_AFTER_MTS_GAPS and other post-exec/schedule until options checking at the end of read-execute loop (instead of to have them right after read phase which can lead to unnecessary hanging when a condition is actually met). @ mysql-test/suite/rpl/r/rpl_parallel_start_stop.result results are updated. @ mysql-test/suite/rpl/t/rpl_parallel_start_stop.test until SQL_AFTER_MTS_GAPS tests are added. @ mysql-test/suite/rpl/t/rpl_stm_until.test A memo to relocate SLAVE UNTIL SQL_AFTER_MTS_GAPS is added. @ sql/lex.h a new lex symbol for SQL_AFTER_MTS_GAPS is added. @ sql/rpl_rli.cc UNTIL_SQL_AFTER_MTS_GAPS case is added to Relay_log_info::is_until_satisfied(). @ sql/rpl_rli.h UNTIL_SQL_AFTER_MTS_GAPS is added to enum conditions. @ sql/rpl_slave.cc a cause of demotion of Worker DEADLOCK/WAIT_FOR_LOCK errors into warning is fixed; change_master() improves a warning instucting how to proceed if MTS gaps are met; SQL_AFTER_MTS_GAPS (as well as another fixes added UNTIL_SQL_AFTER_GTIDS) is added to displaying with Show-Slave-Status. @ sql/sql_lex.h struct st_lex_master_info is extended to hold the fact of SQL_AFTER_MTS_GAPS is requested to propagate one into start_slave(). @ sql/sql_yacc.yy SQL_AFTER_MTS_GAPS handling in parser is added. modified: mysql-test/suite/rpl/r/rpl_parallel_start_stop.result mysql-test/suite/rpl/t/rpl_parallel_start_stop.test mysql-test/suite/rpl/t/rpl_stm_until.test sql/lex.h sql/rpl_rli.cc sql/rpl_rli.h sql/rpl_slave.cc sql/sql_lex.h sql/sql_yacc.yy 3880 Tor Didriksen 2012-03-29 [merge] merge 5.5 => trunk modified: mysql-test/r/ps.result mysql-test/r/sp.result mysql-test/t/ps.test mysql-test/t/sp.test === modified file 'mysql-test/suite/rpl/r/rpl_parallel_start_stop.result' --- a/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result 2012-03-15 09:55:15 +0000 +++ b/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result 2012-03-29 15:10:31 +0000 @@ -78,6 +78,50 @@ drop table t2m; drop table t2; drop view worker_proc_list; drop view coord_proc_list; -set @@global.slave_parallel_workers= @save.slave_parallel_workers; set @@global.slave_transaction_retries= @save.slave_transaction_retries; +include/stop_slave.inc +start slave until sql_after_mts_gaps relay_log_file='dummy'; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'relay_log_file='dummy'' at line 1 +start slave until sql_after_mts_gaps relay_log_pos=0; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'relay_log_pos=0' at line 1 +start slave until sql_after_mts_gaps master_log_file='dummy'; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'master_log_file='dummy'' at line 1 +start slave until sql_after_mts_gaps master_log_pos=0; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'master_log_pos=0' at line 1 +start slave until sql_after_mts_gaps SQL_BEFORE_GTIDS='dummy'; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SQL_BEFORE_GTIDS='dummy'' at line 1 +start slave until sql_after_mts_gaps; +show warnings; +Level Code Message +Error 1064 You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SQL_BEFORE_GTIDS='dummy'' at line 1 +call mtr.add_suppression('Slave SQL: Could not execute Update_rows event on table d1.t1; Deadlock found when trying to get lock'); +include/start_slave.inc +create database d1; +create database d2; +create table d1.t1 (a int primary key) engine=innodb; +create table d2.t1 (a int primary key) engine=innodb; +create table d1.t2m (a int) engine=myisam; +insert into d1.t1 values (1),(2),(3); +insert into d2.t1 values (1),(2),(3); +insert into d1.t2m values (1); +begin; +update d1.t1 set a=31 where a=3; +insert into d1.t1 values (5),(6),(7); +begin; +update d1.t1 set a=20 where a=2; +insert into d1.t2m values (2); +update d1.t1 set a=30 where a=3; +insert into d1.t1 values (4); +commit; +delete from d2.t1; +delete from d1.t1; +update d1.t1 set a=21 where a=2; +include/wait_for_slave_sql_error.inc [errno=1213, 1205] +rollback; +start slave until sql_after_mts_gaps; +include/wait_for_slave_sql_to_stop.inc +include/start_slave.inc +drop database d1; +drop database d2; +set @@global.slave_parallel_workers= @save.slave_parallel_workers; include/rpl_end.inc === modified file 'mysql-test/suite/rpl/t/rpl_parallel_start_stop.test' --- a/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test 2012-03-15 09:55:15 +0000 +++ b/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test 2012-03-29 15:10:31 +0000 @@ -237,7 +237,8 @@ if (`select $a - max(a) from t1`) } # -# UNTIL condition is not supported by Parallel slave to reject +# UNTIL condition othen than SQL_AFTER_MTS_GAPS +# is not supported by Parallel slave to reject # with a warning and no Worker thread is started # @@ -278,8 +279,143 @@ sync_slave_with_master; drop view worker_proc_list; drop view coord_proc_list; -set @@global.slave_parallel_workers= @save.slave_parallel_workers; +# reuse non-zero Workers value in the follwing tests +#set @@global.slave_parallel_workers= @save.slave_parallel_workers; set @@global.slave_transaction_retries= @save.slave_transaction_retries; ---source include/rpl_end.inc +# +# START SLAVE UNTIL SQL_AFTER_MTS_GAPS +# +# A new UNTIL condition is introduced as a tool to +# fill gaps in the sequence of executed transaction started +# at Exec_Master_Log_Pos. +# The gaps could be caused by the previous slave session stop +# with an error, or it was killed, or the server crashed. +# + +connection slave; + +source include/stop_slave.inc; + +# Show SQL_AFTER_MTS_GAPS is meaningless in combination with coordinates +# related options +--error 1064 +start slave until sql_after_mts_gaps relay_log_file='dummy'; +--error 1064 +start slave until sql_after_mts_gaps relay_log_pos=0; +--error 1064 +start slave until sql_after_mts_gaps master_log_file='dummy'; +--error 1064 +start slave until sql_after_mts_gaps master_log_pos=0; +--error 1064 +start slave until sql_after_mts_gaps SQL_BEFORE_GTIDS='dummy'; + +# check out a "blank" run which leaves an info line into error-log +start slave until sql_after_mts_gaps; + +show warnings; +call mtr.add_suppression('Slave SQL: Could not execute Update_rows event on table d1.t1; Deadlock found when trying to get lock'); + +# regular start now +source include/start_slave.inc; + +# set up gaps when slave sql errors out +connection master; + +create database d1; +create database d2; +create table d1.t1 (a int primary key) engine=innodb; +create table d2.t1 (a int primary key) engine=innodb; +create table d1.t2m (a int) engine=myisam; # non-trans engine to detect deadlock +insert into d1.t1 values (1),(2),(3); +insert into d2.t1 values (1),(2),(3); +insert into d1.t2m values (1); + +sync_slave_with_master; +#connection slave; +begin; # the blocker +# set up a deadlock at the 1st job +update d1.t1 set a=31 where a=3; +insert into d1.t1 values (5),(6),(7); + +# create the 1st job to get blocked on the slave +connection master; +begin; +update d1.t1 set a=20 where a=2; +insert into d1.t2m values (2); +update d1.t1 set a=30 where a=3; +insert into d1.t1 values (4); +commit; + +# create the 2nd job for another worker +delete from d2.t1; + +# create the 3nd job to help UNTIL SQL_AFTER_MTS_GAPS be reached +# (todo: if the last gap event ends the relay-log SQL thread will +# hang - to be fixed with MTS support for regular UNTIL:s) +delete from d1.t1; + +# wait till the 2nd job will be done +connection slave1; +let $count= 0; +let $table= d2.t1; +source include/wait_until_rows_count.inc; + +# proceed with 1st job into its middle +connection slave; +let $count= 2; +let $table= d1.t2m; +source include/wait_until_rows_count.inc; +# must victimize the master trans because of deadlock or timeout +update d1.t1 set a=21 where a=2; + +# slave is stopped +# setting timeout to be as twice as greater than innodb's. +--let $slave_timeout=`select 2*@@global.innodb_lock_wait_timeout` + +let $slave_sql_errno= 1213, 1205; +source include/wait_for_slave_sql_error.inc; + +rollback; # the blocker + +# find out Exec_Master_Log_Pos to store it in Exec_0 +let $exec_pos_0= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); + +start slave until sql_after_mts_gaps; + +# Efficiency of UNTIL proof: +source include/wait_for_slave_sql_to_stop.inc; + +# Consistency proof: + +if (`select count(*) <> 4 from d1.t1`) +{ + --echo *** Something is wrong in recovery *** + --die +} + +# Efficiency of gap filling proof: +# find out Exec_Master_Log_Pos and compare with Exec_0 +let $exec_pos_1= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1); + +if (`select $exec_pos_1 - $exec_pos_0 <= 0`) +{ + --echo *** No gap transaction is executed as expected *** + --connection slave + show slave status; + --connection master + show master status; + --die +} + +# UNTIL SQL_AFTER_MTS_GAPS cleanup +source include/start_slave.inc; +connection master; +drop database d1; +drop database d2; + +sync_slave_with_master; +set @@global.slave_parallel_workers= @save.slave_parallel_workers; + +--source include/rpl_end.inc === modified file 'mysql-test/suite/rpl/t/rpl_stm_until.test' --- a/mysql-test/suite/rpl/t/rpl_stm_until.test 2011-08-19 13:04:28 +0000 +++ b/mysql-test/suite/rpl/t/rpl_stm_until.test 2012-03-29 15:10:31 +0000 @@ -124,6 +124,9 @@ start slave sql_thread; --replace_result 776 MASTER_LOG_POS start slave until master_log_file='master-bin.000001', master_log_pos=776; +--echo *** TODO: consider to relocate SLAVE UNTIL SQL_AFTER_MTS_GAPS testing *** +--echo *** from rpl_parallel_start_stop when this test gets enabled *** + # # bug#47210 first execution of "start slave until" stops too early # === modified file 'sql/lex.h' --- a/sql/lex.h 2012-03-06 14:29:42 +0000 +++ b/sql/lex.h 2012-03-29 15:10:31 +0000 @@ -530,7 +530,7 @@ static SYMBOL symbols[] = { { "SQLSTATE", SYM(SQLSTATE_SYM)}, { "SQLWARNING", SYM(SQLWARNING_SYM)}, { "SQL_AFTER_GTIDS", SYM(SQL_AFTER_GTIDS)}, - { "SQL_BEFORE_GTIDS", SYM(SQL_BEFORE_GTIDS)}, + { "SQL_AFTER_MTS_GAPS", SYM(SQL_AFTER_MTS_GAPS)}, { "SQL_BIG_RESULT", SYM(SQL_BIG_RESULT)}, { "SQL_BUFFER_RESULT", SYM(SQL_BUFFER_RESULT)}, { "SQL_CACHE", SYM(SQL_CACHE_SYM)}, === modified file 'sql/rpl_rli.cc' --- a/sql/rpl_rli.cc 2012-03-28 18:01:14 +0000 +++ b/sql/rpl_rli.cc 2012-03-29 15:10:31 +0000 @@ -1251,6 +1251,29 @@ bool Relay_log_info::is_until_satisfied( DBUG_RETURN(false); break; + case UNTIL_SQL_AFTER_MTS_GAPS: + /* + TODO: this condition is actually post-execution or post-scheduling + so the proper place to check it before SQL thread goes + into next_event() where it can wait while the condition + has been satisfied already. + It's deployed here temporarily to be fixed along the regular UNTIL + support for MTS is provided. + */ + if (mts_recovery_group_cnt == 0) + { + sql_print_information("Slave SQL thread stopped according to " + "UNTIL SQL_AFTER_MTS_GAPS as it has " + "processed all gap transactions left from " + "the previous slave session."); + DBUG_RETURN(true); + } + else + { + DBUG_RETURN(false); + } + break; + case UNTIL_NONE: DBUG_ASSERT(0); break; === modified file 'sql/rpl_rli.h' --- a/sql/rpl_rli.h 2012-03-28 15:24:17 +0000 +++ b/sql/rpl_rli.h 2012-03-29 15:10:31 +0000 @@ -309,7 +309,8 @@ public: thread is running). */ enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS, - UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS} until_condition; + UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS, UNTIL_SQL_AFTER_MTS_GAPS} + until_condition; char until_log_name[FN_REFLEN]; ulonglong until_log_pos; /* extension extracted from log_name and converted to int */ === modified file 'sql/rpl_slave.cc' --- a/sql/rpl_slave.cc 2012-03-28 15:24:17 +0000 +++ b/sql/rpl_slave.cc 2012-03-29 15:10:31 +0000 @@ -406,7 +406,7 @@ int init_recovery(Master_info* mi, const "was stopped with an error or killed in MTS mode; " "consider using RESET SLAVE or restart the server " "with --relay-log-recovery = 0 followed by " - "START SLAVE"); + "START SLAVE UNTIL SQL_AFTER_MTS_GAPS"); } } @@ -2574,11 +2574,32 @@ bool show_slave_status(THD* thd, Master_ protocol->store((ulonglong) mi->rli->get_group_master_log_pos()); protocol->store((ulonglong) mi->rli->log_space_total); - protocol->store( - mi->rli->until_condition == Relay_log_info::UNTIL_NONE ? "None" : - (mi->rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ? "Master" : - (mi->rli->until_condition == Relay_log_info::UNTIL_RELAY_POS ? "Relay" : - "SQL_BEFORE_GTIDS")), &my_charset_bin); + const char *until_type; + + switch (mi->rli->until_condition) + { + case Relay_log_info::UNTIL_NONE: + until_type= "None"; + break; + case Relay_log_info::UNTIL_MASTER_POS: + until_type= "Master"; + break; + case Relay_log_info::UNTIL_RELAY_POS: + until_type= "Relay"; + break; + case Relay_log_info::UNTIL_SQL_BEFORE_GTIDS: + until_type= "SQL_BEFORE_GTIDS"; + break; + case Relay_log_info::UNTIL_SQL_AFTER_GTIDS: + until_type= "SQL_AFTER_GTIDS"; + break; + case Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS: + until_type= "SQL_AFTER_MTS_GAPS"; + break; + default: + DBUG_ASSERT(0); + } + protocol->store(until_type, &my_charset_bin); protocol->store(mi->rli->until_log_name, &my_charset_bin); protocol->store((ulonglong) mi->rli->until_log_pos); @@ -3523,7 +3544,8 @@ static int exec_relay_log_event(THD* thd hits the UNTIL barrier. MTS: since the master and the relay-group coordinates change asynchronously logics of rli->is_until_satisfied() can't apply. - Hence, UNTIL forces the sequential applying. + A special UNTIL_SQL_AFTER_MTS_GAPS is still deployed here + temporarily (see is_until_satisfied todo). */ if (rli->until_condition != Relay_log_info::UNTIL_NONE && rli->is_until_satisfied(thd, ev)) @@ -3591,7 +3613,8 @@ static int exec_relay_log_event(THD* thd if (slave_trans_retries) { int UNINIT_VAR(temp_err); - if (exec_res && (temp_err= rli->has_temporary_error(thd)) && + if (exec_res && !is_mts_worker(thd) && + (temp_err= rli->has_temporary_error(thd)) && !thd->transaction.all.cannot_safely_rollback()) { const char *errmsg; @@ -7461,6 +7484,10 @@ int start_slave(THD* thd , Master_info* } global_sid_lock.unlock(); } + else if (thd->lex->mi.until_after_gaps) + { + mi->rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS; + } else mi->rli->clear_until_condition(); === modified file 'sql/sql_lex.h' --- a/sql/sql_lex.h 2012-03-29 13:23:06 +0000 +++ b/sql/sql_lex.h 2012-03-29 15:10:31 +0000 @@ -214,6 +214,7 @@ typedef struct st_lex_master_info ulong server_id, retry_count; char *gtid; enum {UNTIL_SQL_BEFORE_GTIDS= 0, UNTIL_SQL_AFTER_GTIDS} gtid_until_condition; + bool until_after_gaps; /* Enum is used for making it possible to detect if the user === modified file 'sql/sql_yacc.yy' --- a/sql/sql_yacc.yy 2012-03-27 08:43:25 +0000 +++ b/sql/sql_yacc.yy 2012-03-29 15:10:31 +0000 @@ -1511,6 +1511,7 @@ bool my_yyoverflow(short **a, YYSTYPE ** %token SQLSTATE_SYM /* SQL-2003-R */ %token SQLWARNING_SYM /* SQL-2003-R */ %token SQL_AFTER_GTIDS /* MYSQL */ +%token SQL_AFTER_MTS_GAPS /* MYSQL */ %token SQL_BEFORE_GTIDS /* MYSQL */ %token SQL_BIG_RESULT %token SQL_BUFFER_RESULT @@ -7685,7 +7686,13 @@ slave_until: lex->mi.gtid) || !((lex->mi.log_file_name && lex->mi.pos) || (lex->mi.relay_log_name && lex->mi.relay_log_pos) || - lex->mi.gtid)) + lex->mi.gtid || + lex->mi.until_after_gaps) || + /* SQL_AFTER_MTS_GAPS is meaningless in combination */ + /* with any other coordinates related options */ + ((lex->mi.log_file_name || lex->mi.pos || lex->mi.relay_log_name + || lex->mi.relay_log_pos || lex->mi.gtid) + && lex->mi.until_after_gaps)) { my_message(ER_BAD_SLAVE_UNTIL_COND, ER(ER_BAD_SLAVE_UNTIL_COND), MYF(0)); @@ -7707,6 +7714,10 @@ slave_until_opts: Lex->mi.gtid= $3.str; Lex->mi.gtid_until_condition= LEX_MASTER_INFO::UNTIL_SQL_AFTER_GTIDS; } + | SQL_AFTER_MTS_GAPS + { + Lex->mi.until_after_gaps= true; + } ; checksum: No bundle (reason: useless for push emails).