List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:June 6 2011 10:51am
Subject:bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3282) WL#5569
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/WL/mysql-next-mr-wl5569/ based on revid:andrei.elkin@stripped

 3282 Andrei Elkin	2011-06-06
      wl#5569 MTS
      
      STOP SLAVE now stops consistently w/o gaps, KILL
      shall be used for an urgent stop, an error case behaves like the
      killed. For instance, a Worker errors out, it sends KILL to
      Coordinator through THD::awake(), and Coordinator kill the rest
      through setting a special Worker-running status to killed (which
      breaks the read-exec loop of a Worker).
     @ sql/log_event.cc
        Changing style of computing mts-in-group bool arg into mts_async_exec_by_coordinator().
     @ sql/rpl_rli.cc
        Changing style of computing mts-in-group arg of an if in stmt_done().
     @ sql/rpl_rli.h
        Adding more states to Coordinator's MTS-group view.
     @ sql/rpl_rli_pdb.cc
        Relocating notification of a Worker's failure by the Worker into
        the error-branch of a functioning releasing common resources (entries of APH hash).
        The failed Worker trying awakening possibly waiting for the signal Coordinator.
        The latter's behaviour in it's turn is refined to not enter the waiting phase
        when it has been already killed.
     @ sql/rpl_slave.cc
        sql_slave_killed() is made of two flavors of the error branches.
        STOPped MTS coordinator does not give out too early and wait till
        its MTS-group state allows that.
        Notification with kill to Coordinator  from the errored-out or killed
        worker is moved into a functioning releasing common resources (entries of APH hash).
        This case designates a hard stop.
        In case of the soft (SLAVE-STOPped) MTS, Coordinator is made to wait for
        Workers' assignements full completion before to mark their running status for stopping.

    modified:
      sql/log_event.cc
      sql/rpl_rli.cc
      sql/rpl_rli.h
      sql/rpl_rli_pdb.cc
      sql/rpl_slave.cc
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2011-06-05 17:01:51 +0000
+++ b/sql/log_event.cc	2011-06-06 10:51:19 +0000
@@ -2581,6 +2581,7 @@ Slave_worker *Log_event::get_slave_worke
     // special marking for T event of {p,g} B-less group
     if (num_dbs == OVER_MAX_DBS_IN_EVENT_MTS)
       mts_do_isolate_event();
+    rli->mts_group_status= Relay_log_info::MTS_END_GROUP;
 
     ptr_g= (Slave_job_group *)
       dynamic_array_ptr(&rli->gaq->Q, rli->gaq->assigned_group_index);
@@ -2850,7 +2851,7 @@ int Log_event::apply_event(Relay_log_inf
   if (!(parallel= rli->is_parallel_exec()) ||
       (async_event=
        mts_async_exec_by_coordinator(::server_id, 
-                                     rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)) ||
+                                     rli->mts_group_status != Relay_log_info::MTS_NOT_IN_GROUP)) ||
       (seq_event= mts_sequential_exec()))
   {
     if (parallel)

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2011-06-05 17:01:51 +0000
+++ b/sql/rpl_rli.cc	2011-06-06 10:51:19 +0000
@@ -1020,7 +1020,7 @@ void Relay_log_info::stmt_done(my_off_t 
     while the MyISAM table has already been updated.
   */
   if ((!is_parallel_exec() && is_in_group()) ||
-      mts_group_status == MTS_IN_GROUP)
+      mts_group_status != MTS_NOT_IN_GROUP)
   {
     inc_event_relay_log_pos();
   }

=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h	2011-06-05 17:01:51 +0000
+++ b/sql/rpl_rli.h	2011-06-06 10:51:19 +0000
@@ -488,8 +488,14 @@ public:
   */
   enum
   {
-    MTS_NOT_IN_GROUP, /* not in group includes Single-Threaded-Slave */
-    MTS_IN_GROUP      /* an event was scheduled to a Worker */
+    /* 
+       no new events were scheduled after last synchronization,
+       includes Single-Threaded-Slave case.
+    */
+    MTS_NOT_IN_GROUP,
+    MTS_IN_GROUP,    /* at least one event was scheduled to a Worker */
+    MTS_END_GROUP,   /* the last scheduled event is a terminal event */
+    MTS_KILLED_GROUP /* Coordinator gave out to reach MTS_END_GROUP */
   } mts_group_status;
 
   /* most of allocation in the coordinator rli is there */

=== modified file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc	2011-06-05 17:01:51 +0000
+++ b/sql/rpl_rli_pdb.cc	2011-06-06 10:51:19 +0000
@@ -816,6 +816,17 @@ void Slave_worker::slave_worker_ends_gro
   ep->elements= 0;
 
   curr_group_seen_begin= FALSE;
+
+  if (error)
+  {
+    mysql_mutex_lock(&slave_worker_hash_lock);
+    mysql_mutex_lock(&c_rli->info_thd->LOCK_thd_data);
+
+    c_rli->info_thd->awake(THD::KILL_QUERY);          // notify Crdn
+
+    mysql_mutex_unlock(&c_rli->info_thd->LOCK_thd_data);
+    mysql_mutex_unlock(&slave_worker_hash_lock);
+  }
 }
 
 
@@ -1114,7 +1125,7 @@ int wait_for_workers_to_finish(Relay_log
       continue;
     }
 
-    if (entry->usage > 0)
+    if (entry->usage > 0 && !thd->killed)
     {
       sprintf(wait_info, info_format, entry->worker->id, entry->db);
       entry->worker= NULL; // mark Worker to signal when  usage drops to 0
@@ -1125,7 +1136,7 @@ int wait_for_workers_to_finish(Relay_log
       thd->exit_cond(proc_info);
       ret++;
 
-      DBUG_ASSERT(entry->usage == 0 || thd->killed || rli->abort_slave);
+      DBUG_ASSERT(entry->usage == 0 || thd->killed);
     }
     else
     {

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2011-06-05 17:01:51 +0000
+++ b/sql/rpl_slave.cc	2011-06-06 10:51:19 +0000
@@ -1090,9 +1090,10 @@ static bool sql_slave_killed(THD* thd, R
       as well.
       Example: OPTION_KEEP_LOG is set if a temporary table is created or dropped.
     */
-    if ((thd->transaction.all.modified_non_trans_table ||
-         (thd->variables.option_bits & OPTION_KEEP_LOG))
-        && rli->is_in_group())
+    if ((!rli->is_parallel_exec() &&
+         (thd->transaction.all.modified_non_trans_table ||
+          (thd->variables.option_bits & OPTION_KEEP_LOG)) && rli->is_in_group())
+        || (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP))
     {
       char msg_stopped[]=
         "... The slave SQL is stopped, leaving the current group "
@@ -1101,6 +1102,14 @@ static bool sql_slave_killed(THD* thd, R
         "restarting the slave with --slave-exec-mode=IDEMPOTENT, which "
         "ignores duplicate key, key not found, and similar errors (see "
         "documentation for details).";
+      char msg_stopped_mts[]=
+        "... The slave Coordinator and Worker threads are stopped, possibly "
+        "leaving data in inconsistent state. The following restart shall "
+        "restore consistency automatically. There might be exceptional situations "
+        "in the recovery caused by combination of non-transactional storage for "
+        "either of Coordinator or Workers info tables and updating non-transactional "
+        "data tables or DDL queries. In such cases you have to examine your data "
+        "(see documentation for details).";
 
       if (rli->abort_slave)
       {
@@ -1137,7 +1146,9 @@ static bool sql_slave_killed(THD* thd, R
         else
         {
           rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
-                      ER(ER_SLAVE_FATAL_ERROR), msg_stopped);
+                      ER(ER_SLAVE_FATAL_ERROR),
+                      rli->mts_group_status == Relay_log_info::MTS_NOT_IN_GROUP ?
+                      msg_stopped : msg_stopped_mts);
         }
       }
       else
@@ -1153,7 +1164,13 @@ static bool sql_slave_killed(THD* thd, R
     }
   }
   if (ret)
+  {
     rli->last_event_start_time= 0;
+    if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
+    {
+      rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
+    }
+  }
   
   DBUG_RETURN(ret);
 }
@@ -3798,14 +3815,7 @@ pthread_handler_t handle_slave_worker(vo
   {
       error= slave_worker_exec_job(w, rli);
   }
-
   w->cleanup_context(thd, error);
-  if (error)
-  {
-    mysql_mutex_lock(&rli->info_thd->LOCK_thd_data);
-    rli->info_thd->awake(THD::KILL_QUERY);          // notify Crdn
-    mysql_mutex_unlock(&rli->info_thd->LOCK_thd_data);
-  }
 
   mysql_mutex_lock(&w->jobs_lock);
 
@@ -4293,9 +4303,12 @@ err:
 /* 
    Ending Worker threads.
 
+   Not in case Coordinator is killed itself, it first waits for
+   Workers have finished their assignements, and then updates checkpoint. 
    Workers are notified with setting KILLED status
    and waited for their acknowledgment as specified by
    worker's running_status.
+   Coordinator finalizes with its MTS running status to reset few objects.
 */
 void slave_stop_workers(Relay_log_info *rli)
 {
@@ -4306,11 +4319,21 @@ void slave_stop_workers(Relay_log_info *
     return;
   
   /*
-    this is the soft stop. In order for waiting be successful Coordinator
-    needs (*TODO*) to guarantee Workers were assigned with full groups.
+    In case of the "soft" graceful stop Coordinator
+    guaranteed Workers were assigned with full groups so waiting
+    will be resultful.
+    "Hard" stop with KILLing Coordinator or erroring out by a Worker
+    can't wait for Workers' completion because those may not receive
+    commit-events of last assigned groups.
   */
-  // (void) wait_for_workers_to_finish(rli);
+  if (rli->mts_group_status != Relay_log_info::MTS_KILLED_GROUP &&
+      thd->killed == THD::NOT_KILLED)
+  {
+    DBUG_ASSERT(rli->mts_group_status != Relay_log_info::MTS_IN_GROUP);
 
+    (void) wait_for_workers_to_finish(rli);
+    (void) mts_checkpoint_routine(rli, 0, FALSE, FALSE);  // todo: error branch
+  }
   for (i= rli->workers.elements - 1; i >= 0; i--)
   {
     Slave_worker *w;


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110606105119-j8yk9b45uvirqvf4.bundle
Thread
bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3282) WL#5569Andrei Elkin6 Jun