List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:March 29 2012 3:11pm
Subject:bzr push into mysql-trunk branch (andrei.elkin:3880 to 3881)
View as plain text  
 3881 Andrei Elkin	2012-03-29
      Bug 13893363 - MTS IS MISSING THE ABILITY TO STOP A SLAVE AFTER PROCESSING ALL GAPS
      
      The new UNTIL condition is an important feature to have 
      because of --relay-log-recovery=1 and Change-Master can run in presence of gaps.
      The user would have to execute
        START SLAVE SQL_THREAD UNTIL SQL_AFTER_MTS_GAPS
      if he needed to switch from the parallel to the sequential execution mode
      after slave SQL thread or Worker threads errored out in the parallel mode.
      
      Also, a separate issue of incorrect demotion of DEADLOCK/WAIT_FOR_LOCK
      errors into warning is fixed because at Worker execution slave does not retry.
      And a todo to relocate SQL_AFTER_MTS_GAPS and other post-exec/schedule until options
      checking at the end of read-execute loop (instead of to have them
      right after read phase which can lead to unnecessary hanging when a condition is 
      actually met).
     @ mysql-test/suite/rpl/r/rpl_parallel_start_stop.result
        results are updated.
     @ mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
        until SQL_AFTER_MTS_GAPS tests are added.
     @ mysql-test/suite/rpl/t/rpl_stm_until.test
        A memo to relocate SLAVE UNTIL SQL_AFTER_MTS_GAPS is added.
     @ sql/lex.h
        a new lex symbol for SQL_AFTER_MTS_GAPS is added.
     @ sql/rpl_rli.cc
        UNTIL_SQL_AFTER_MTS_GAPS case is added to Relay_log_info::is_until_satisfied().
     @ sql/rpl_rli.h
        UNTIL_SQL_AFTER_MTS_GAPS is added to enum conditions.
     @ sql/rpl_slave.cc
        a cause of demotion of Worker DEADLOCK/WAIT_FOR_LOCK errors into warning is fixed;
        change_master() improves a warning instucting how to proceed if MTS gaps are met;
        SQL_AFTER_MTS_GAPS (as well as another fixes added UNTIL_SQL_AFTER_GTIDS) is added 
        to displaying with Show-Slave-Status.
     @ sql/sql_lex.h
        struct st_lex_master_info is extended to hold the fact of 
        SQL_AFTER_MTS_GAPS is requested to propagate one into start_slave().
     @ sql/sql_yacc.yy
        SQL_AFTER_MTS_GAPS handling in parser is added.

    modified:
      mysql-test/suite/rpl/r/rpl_parallel_start_stop.result
      mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
      mysql-test/suite/rpl/t/rpl_stm_until.test
      sql/lex.h
      sql/rpl_rli.cc
      sql/rpl_rli.h
      sql/rpl_slave.cc
      sql/sql_lex.h
      sql/sql_yacc.yy
 3880 Tor Didriksen	2012-03-29 [merge]
      merge 5.5 => trunk

    modified:
      mysql-test/r/ps.result
      mysql-test/r/sp.result
      mysql-test/t/ps.test
      mysql-test/t/sp.test
=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_start_stop.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result	2012-03-15 09:55:15 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result	2012-03-29 15:10:31 +0000
@@ -78,6 +78,50 @@ drop table t2m;
 drop table t2;
 drop view worker_proc_list;
 drop view coord_proc_list;
-set @@global.slave_parallel_workers= @save.slave_parallel_workers;
 set @@global.slave_transaction_retries= @save.slave_transaction_retries;
+include/stop_slave.inc
+start slave until sql_after_mts_gaps relay_log_file='dummy';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'relay_log_file='dummy'' at line 1
+start slave until sql_after_mts_gaps relay_log_pos=0;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'relay_log_pos=0' at line 1
+start slave until sql_after_mts_gaps master_log_file='dummy';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'master_log_file='dummy'' at line 1
+start slave until sql_after_mts_gaps master_log_pos=0;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'master_log_pos=0' at line 1
+start slave until sql_after_mts_gaps SQL_BEFORE_GTIDS='dummy';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SQL_BEFORE_GTIDS='dummy'' at line 1
+start slave until sql_after_mts_gaps;
+show warnings;
+Level	Code	Message
+Error	1064	You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SQL_BEFORE_GTIDS='dummy'' at line 1
+call mtr.add_suppression('Slave SQL: Could not execute Update_rows event on table d1.t1; Deadlock found when trying to get lock');
+include/start_slave.inc
+create database d1;
+create database d2;
+create table d1.t1 (a int primary key) engine=innodb;
+create table d2.t1 (a int primary key) engine=innodb;
+create table d1.t2m (a int) engine=myisam;
+insert into d1.t1 values (1),(2),(3);
+insert into d2.t1 values (1),(2),(3);
+insert into d1.t2m values (1);
+begin;
+update d1.t1 set a=31 where a=3;
+insert into d1.t1 values (5),(6),(7);
+begin;
+update d1.t1 set a=20 where a=2;
+insert into d1.t2m values (2);
+update d1.t1 set a=30 where a=3;
+insert into d1.t1 values (4);
+commit;
+delete from d2.t1;
+delete from d1.t1;
+update d1.t1 set a=21 where a=2;
+include/wait_for_slave_sql_error.inc [errno=1213, 1205]
+rollback;
+start slave until sql_after_mts_gaps;
+include/wait_for_slave_sql_to_stop.inc
+include/start_slave.inc
+drop database d1;
+drop database d2;
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_start_stop.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test	2012-03-15 09:55:15 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test	2012-03-29 15:10:31 +0000
@@ -237,7 +237,8 @@ if (`select $a - max(a) from t1`)
 }
 
 #
-# UNTIL condition is not supported by Parallel slave to reject
+# UNTIL condition othen than SQL_AFTER_MTS_GAPS 
+# is not supported by Parallel slave to reject
 # with a warning and no Worker thread is started
 #
 
@@ -278,8 +279,143 @@ sync_slave_with_master;
 
 drop view worker_proc_list;
 drop view coord_proc_list;
-set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+# reuse non-zero Workers value in the follwing tests
+#set @@global.slave_parallel_workers= @save.slave_parallel_workers;
 set @@global.slave_transaction_retries= @save.slave_transaction_retries;
 
---source include/rpl_end.inc
+#
+# START SLAVE UNTIL SQL_AFTER_MTS_GAPS
+# 
+# A new UNTIL condition is introduced as a tool to
+# fill gaps in the sequence of executed transaction started
+# at Exec_Master_Log_Pos.
+# The gaps could be caused by the previous slave session stop
+# with an error, or it was killed, or the server crashed.
+#
+
+connection slave;
+
+source include/stop_slave.inc;
+
+# Show SQL_AFTER_MTS_GAPS is meaningless in combination with coordinates 
+# related options
+--error 1064
+start slave until sql_after_mts_gaps relay_log_file='dummy';
+--error 1064
+start slave until sql_after_mts_gaps relay_log_pos=0;
+--error 1064
+start slave until sql_after_mts_gaps master_log_file='dummy';
+--error 1064
+start slave until sql_after_mts_gaps master_log_pos=0;
+--error 1064
+start slave until sql_after_mts_gaps SQL_BEFORE_GTIDS='dummy';
+
+# check out a "blank" run which leaves an info line into error-log
+start slave until sql_after_mts_gaps;
+
+show warnings;
+call mtr.add_suppression('Slave SQL: Could not execute Update_rows event on table d1.t1; Deadlock found when trying to get lock');
+
+# regular start now
+source include/start_slave.inc;
+
+# set up gaps when slave sql errors out
+connection master;
+
+create database d1;
+create database d2;
+create table d1.t1 (a int primary key) engine=innodb;
+create table d2.t1 (a int primary key) engine=innodb;
+create table d1.t2m (a int) engine=myisam; # non-trans engine to detect deadlock
+insert into d1.t1 values (1),(2),(3);
+insert into d2.t1 values (1),(2),(3);
+insert into d1.t2m values (1);
+
+sync_slave_with_master;
+#connection slave;
+begin; # the blocker
+# set up a deadlock at the 1st job
+update d1.t1 set a=31 where a=3;
+insert into d1.t1 values (5),(6),(7);
+
+# create the 1st job to get blocked on the slave
+connection master;
+begin;
+update d1.t1 set a=20 where a=2;
+insert into d1.t2m values (2);
+update d1.t1 set a=30 where a=3;
+insert into d1.t1 values (4);
+commit;
+
+# create the 2nd job for another worker
+delete from d2.t1;
+
+# create the 3nd job to help UNTIL SQL_AFTER_MTS_GAPS be reached
+# (todo: if the last gap event ends the relay-log SQL thread will
+# hang - to be fixed with MTS support for regular UNTIL:s)
+delete from d1.t1;
+
+# wait till the 2nd job will be done
+connection slave1;
+let $count= 0;
+let $table= d2.t1;
+source include/wait_until_rows_count.inc;
+
+# proceed with 1st job into its middle
+connection slave;
+let $count= 2;
+let $table= d1.t2m;
+source include/wait_until_rows_count.inc;
 
+# must victimize the master trans because of deadlock or timeout
+update d1.t1 set a=21 where a=2;
+
+# slave is stopped
+# setting timeout to be as twice as greater than innodb's.
+--let $slave_timeout=`select 2*@@global.innodb_lock_wait_timeout`
+
+let $slave_sql_errno= 1213, 1205; 
+source include/wait_for_slave_sql_error.inc;
+
+rollback; # the blocker
+
+# find out Exec_Master_Log_Pos to store it in Exec_0
+let $exec_pos_0= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
+
+start slave until sql_after_mts_gaps;
+
+# Efficiency of UNTIL proof:
+source include/wait_for_slave_sql_to_stop.inc;
+
+# Consistency proof:
+
+if (`select count(*) <> 4 from d1.t1`)
+{
+    --echo *** Something is wrong in recovery ***
+    --die
+}
+
+# Efficiency of gap filling proof:
+# find out Exec_Master_Log_Pos and compare with Exec_0
+let $exec_pos_1= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
+
+if (`select $exec_pos_1 - $exec_pos_0 <= 0`)
+{
+    --echo *** No gap transaction is executed as expected ***
+    --connection slave
+    show slave status;
+    --connection master
+    show master status;
+    --die
+}
+
+# UNTIL SQL_AFTER_MTS_GAPS cleanup
+source include/start_slave.inc;
+connection master;
+drop database d1;
+drop database d2;
+
+sync_slave_with_master;
+set @@global.slave_parallel_workers= @save.slave_parallel_workers;
+
+--source include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_stm_until.test'
--- a/mysql-test/suite/rpl/t/rpl_stm_until.test	2011-08-19 13:04:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_stm_until.test	2012-03-29 15:10:31 +0000
@@ -124,6 +124,9 @@ start slave sql_thread;
 --replace_result 776 MASTER_LOG_POS
 start slave until master_log_file='master-bin.000001', master_log_pos=776;
 
+--echo *** TODO: consider to relocate SLAVE UNTIL SQL_AFTER_MTS_GAPS testing ***
+--echo *** from rpl_parallel_start_stop when this test gets enabled          ***
+
 #
 # bug#47210  first execution of "start slave until" stops too early  
 #

=== modified file 'sql/lex.h'
--- a/sql/lex.h	2012-03-06 14:29:42 +0000
+++ b/sql/lex.h	2012-03-29 15:10:31 +0000
@@ -530,7 +530,7 @@ static SYMBOL symbols[] = {
   { "SQLSTATE",         SYM(SQLSTATE_SYM)},
   { "SQLWARNING",       SYM(SQLWARNING_SYM)},
   { "SQL_AFTER_GTIDS",  SYM(SQL_AFTER_GTIDS)},
-  { "SQL_BEFORE_GTIDS", SYM(SQL_BEFORE_GTIDS)},
+  { "SQL_AFTER_MTS_GAPS", SYM(SQL_AFTER_MTS_GAPS)},
   { "SQL_BIG_RESULT",	SYM(SQL_BIG_RESULT)},
   { "SQL_BUFFER_RESULT", SYM(SQL_BUFFER_RESULT)},
   { "SQL_CACHE",        SYM(SQL_CACHE_SYM)},

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2012-03-28 18:01:14 +0000
+++ b/sql/rpl_rli.cc	2012-03-29 15:10:31 +0000
@@ -1251,6 +1251,29 @@ bool Relay_log_info::is_until_satisfied(
     DBUG_RETURN(false);
     break;
 
+  case UNTIL_SQL_AFTER_MTS_GAPS:
+    /*
+      TODO: this condition is actually post-execution or post-scheduling
+            so the proper place to check it before SQL thread goes
+            into next_event() where it can wait while the condition
+            has been satisfied already.
+            It's deployed here temporarily to be fixed along the regular UNTIL
+            support for MTS is provided.
+    */
+    if (mts_recovery_group_cnt == 0)
+    {
+      sql_print_information("Slave SQL thread stopped according to "
+                            "UNTIL SQL_AFTER_MTS_GAPS as it has "
+                            "processed all gap transactions left from "
+                            "the previous slave session.");
+      DBUG_RETURN(true);
+    }
+    else
+    {
+      DBUG_RETURN(false);
+    }
+    break;
+
   case UNTIL_NONE:
     DBUG_ASSERT(0);
     break;

=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h	2012-03-28 15:24:17 +0000
+++ b/sql/rpl_rli.h	2012-03-29 15:10:31 +0000
@@ -309,7 +309,8 @@ public:
      thread is running).
    */
   enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS,
-        UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS} until_condition;
+        UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS, UNTIL_SQL_AFTER_MTS_GAPS}
+    until_condition;
   char until_log_name[FN_REFLEN];
   ulonglong until_log_pos;
   /* extension extracted from log_name and converted to int */

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2012-03-28 15:24:17 +0000
+++ b/sql/rpl_slave.cc	2012-03-29 15:10:31 +0000
@@ -406,7 +406,7 @@ int init_recovery(Master_info* mi, const
                         "was stopped with an error or killed in MTS mode; "
                         "consider using RESET SLAVE or restart the server "
                         "with --relay-log-recovery = 0 followed by "
-                        "START SLAVE");
+                        "START SLAVE UNTIL SQL_AFTER_MTS_GAPS");
     }
   }
 
@@ -2574,11 +2574,32 @@ bool show_slave_status(THD* thd, Master_
     protocol->store((ulonglong) mi->rli->get_group_master_log_pos());
     protocol->store((ulonglong) mi->rli->log_space_total);
 
-    protocol->store(
-      mi->rli->until_condition == Relay_log_info::UNTIL_NONE ? "None" :
-        (mi->rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ? "Master" :
-         (mi->rli->until_condition == Relay_log_info::UNTIL_RELAY_POS ? "Relay" :
-          "SQL_BEFORE_GTIDS")), &my_charset_bin);
+    const char *until_type;
+
+    switch (mi->rli->until_condition)
+    {
+    case Relay_log_info::UNTIL_NONE:
+      until_type= "None";
+      break;
+    case Relay_log_info::UNTIL_MASTER_POS:
+      until_type= "Master";
+      break;
+    case Relay_log_info::UNTIL_RELAY_POS:
+      until_type= "Relay";
+      break;
+    case Relay_log_info::UNTIL_SQL_BEFORE_GTIDS:
+      until_type= "SQL_BEFORE_GTIDS";
+      break;
+    case Relay_log_info::UNTIL_SQL_AFTER_GTIDS:
+      until_type= "SQL_AFTER_GTIDS";
+      break;
+    case Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS:
+      until_type= "SQL_AFTER_MTS_GAPS";
+      break;
+    default:
+      DBUG_ASSERT(0);
+    }
+    protocol->store(until_type, &my_charset_bin);
     protocol->store(mi->rli->until_log_name, &my_charset_bin);
     protocol->store((ulonglong) mi->rli->until_log_pos);
 
@@ -3523,7 +3544,8 @@ static int exec_relay_log_event(THD* thd
       hits the UNTIL barrier.
       MTS: since the master and the relay-group coordinates change 
       asynchronously logics of rli->is_until_satisfied() can't apply.
-      Hence, UNTIL forces the sequential applying.
+      A special UNTIL_SQL_AFTER_MTS_GAPS is still deployed here
+      temporarily (see is_until_satisfied todo).
     */
     if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
         rli->is_until_satisfied(thd, ev))
@@ -3591,7 +3613,8 @@ static int exec_relay_log_event(THD* thd
     if (slave_trans_retries)
     {
       int UNINIT_VAR(temp_err);
-      if (exec_res && (temp_err= rli->has_temporary_error(thd)) &&
+      if (exec_res && !is_mts_worker(thd) &&
+          (temp_err= rli->has_temporary_error(thd)) &&
           !thd->transaction.all.cannot_safely_rollback())
       {
         const char *errmsg;
@@ -7461,6 +7484,10 @@ int start_slave(THD* thd , Master_info* 
           }
           global_sid_lock.unlock();
         }
+        else if (thd->lex->mi.until_after_gaps)
+        {
+            mi->rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
+        }
         else
           mi->rli->clear_until_condition();
 

=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h	2012-03-29 13:23:06 +0000
+++ b/sql/sql_lex.h	2012-03-29 15:10:31 +0000
@@ -214,6 +214,7 @@ typedef struct st_lex_master_info
   ulong server_id, retry_count;
   char *gtid;
   enum {UNTIL_SQL_BEFORE_GTIDS= 0, UNTIL_SQL_AFTER_GTIDS} gtid_until_condition;
+  bool until_after_gaps;
 
   /*
     Enum is used for making it possible to detect if the user

=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy	2012-03-27 08:43:25 +0000
+++ b/sql/sql_yacc.yy	2012-03-29 15:10:31 +0000
@@ -1511,6 +1511,7 @@ bool my_yyoverflow(short **a, YYSTYPE **
 %token  SQLSTATE_SYM                  /* SQL-2003-R */
 %token  SQLWARNING_SYM                /* SQL-2003-R */
 %token  SQL_AFTER_GTIDS               /* MYSQL */
+%token  SQL_AFTER_MTS_GAPS            /* MYSQL */
 %token  SQL_BEFORE_GTIDS              /* MYSQL */
 %token  SQL_BIG_RESULT
 %token  SQL_BUFFER_RESULT
@@ -7685,7 +7686,13 @@ slave_until:
                 lex->mi.gtid) ||
                 !((lex->mi.log_file_name && lex->mi.pos) ||
                   (lex->mi.relay_log_name && lex->mi.relay_log_pos) ||
-                  lex->mi.gtid))
+                  lex->mi.gtid ||
+                  lex->mi.until_after_gaps) ||
+                /* SQL_AFTER_MTS_GAPS is meaningless in combination */
+                /* with any other coordinates related options       */
+                ((lex->mi.log_file_name || lex->mi.pos || lex->mi.relay_log_name
+                  || lex->mi.relay_log_pos || lex->mi.gtid)
+                 && lex->mi.until_after_gaps))
             {
                my_message(ER_BAD_SLAVE_UNTIL_COND,
                           ER(ER_BAD_SLAVE_UNTIL_COND), MYF(0));
@@ -7707,6 +7714,10 @@ slave_until_opts:
             Lex->mi.gtid= $3.str;
             Lex->mi.gtid_until_condition= LEX_MASTER_INFO::UNTIL_SQL_AFTER_GTIDS;
           }
+        | SQL_AFTER_MTS_GAPS
+          {
+            Lex->mi.until_after_gaps= true;
+          }
         ;
 
 checksum:

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk branch (andrei.elkin:3880 to 3881) Andrei Elkin29 Mar