List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:December 5 2010 8:04pm
Subject:bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3230) WL#5569
WL#5599
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/WL/mysql-next-mr-wl5569/ based on revid:andrei.elkin@stripped

 3230 Andrei Elkin	2010-12-05
      wl#5569 WL#5599 MTS & recovery
      
      Refining and correcting two wl:s integration.
      The main achievement is events execution status is consistently recorded into 
      the Worker and the central RL recovery tables.
      That was tested manually in rather agressive env where IO was used to reconnect
      randomly and load from Master contained Rotate events.
      
      TODO: 
      
        to fix: rpl.rpl_parallel_conf_limits may not pass
      
        to address: Multi-stmt Query-log-event transaction case (see todo in sources).
        to destruct by Workers their executed events (was deferred until ev->update_pos
        started working).
      
      (Alfranio)
        to deploy mts_checkpoint_routine() call inside the successful event read branch of 
           next_event(). Otherwise no calling happens when Coord is constanly busy with
           read/distribute.

    modified:
      sql/log_event.cc
      sql/rpl_slave.cc
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2010-12-04 17:14:50 +0000
+++ b/sql/log_event.cc	2010-12-05 20:04:17 +0000
@@ -2234,7 +2234,7 @@ Slave_worker *Log_event::get_slave_worke
       DBUG_ASSERT(rli->curr_group_da.elements == 1);
 
       // mark the current grup as started with B-event
-      const_cast<Relay_log_info*>(rli)->curr_group_seen_begin= is_b_event;
+      const_cast<Relay_log_info*>(rli)->curr_group_seen_begin= TRUE;
       return NULL;
     } 
     else 
@@ -2333,7 +2333,6 @@ Slave_worker *Log_event::get_slave_worke
 
     // reset the B-group marker
     const_cast<Relay_log_info*>(rli)->curr_group_seen_begin= FALSE;
-
     const_cast<Relay_log_info*>(rli)->curr_group_is_parallel= TRUE;  // mark for Coord's T-event delete
   }
   
@@ -2540,6 +2539,9 @@ int Log_event::apply_event(Relay_log_inf
         DBUG_ASSERT(rli->curr_group_da.elements == 0);
         DBUG_ASSERT(rli->curr_group_seen_begin);
 
+        // TODO: rollback
+        // c_rli->gaq->assigned_group_index= rli->gaq->en_queue((void *) &g);
+
         res= ev_begin->do_apply_event(rli);
         delete ev_begin;
         /* B appears to be serial, reset parallel status of group 

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2010-12-04 17:14:50 +0000
+++ b/sql/rpl_slave.cc	2010-12-05 20:04:17 +0000
@@ -2669,10 +2669,25 @@ int apply_event_and_update_pos(Log_event
       See sql/rpl_rli.h for further details.
     */
     int error= 0;
-    if ((rli->curr_group_is_parallel == FALSE && 
-        !(ev->get_type_code() == XID_EVENT && rli->is_transactional())) ||
-        skip_event)
+    if (!rli->is_parallel_exec() ||
+        ev->only_sequential_exec(rli->run_query_in_parallel,
+                                 ev->ends_group() ?
+                                 rli->curr_group_is_parallel :
+                                 rli->curr_group_seen_begin))
+    {
       error= ev->update_pos(rli);
+    }
+    else
+    {
+      DBUG_ASSERT(rli->is_parallel_exec());
+      /* 
+         event_relay_log_pos is an anchor to possible reading restart.
+         It may become lt than group_* value.
+         However event_relay_log_pos does not affect group_relay_log_pos
+         othen that through the sequentially executed events or via checkpoint.
+      */
+      rli->inc_event_relay_log_pos();
+    }
 
 #ifndef DBUG_OFF
     DBUG_PRINT("info", ("update_pos error = %d", error));
@@ -3610,7 +3625,7 @@ bool mts_checkpoint_routine(Relay_log_in
   DBUG_ENTER("checkpoint_routine");
 
   if (!(cnt= rli->gaq->move_queue_head(&rli->workers)))
-    DBUG_RETURN(error);
+    goto end;
 
   /* TODO: 
      to turn the least occupied selection in terms of jobs pieces
@@ -3623,25 +3638,30 @@ bool mts_checkpoint_routine(Relay_log_in
   };
   sort_dynamic(&rli->least_occupied_workers, (qsort_cmp) ulong_cmp);
 
+  mysql_mutex_lock(&rli->data_lock);
+
   // Coordinator::commit_positions() {
 
-  // Alfranio, rli->gaq->lwm contains all but rli->group_master_log_name
+  // rli->gaq->lwm contains all but rli->group_master_log_name
 
   // group_master_log_name is updated only by Coordinator and it can't change
   // within checkpoint interval because Coordinator flushes the updated value
   // at once.
 
-  mysql_mutex_lock(&rli->data_lock); 
-
   rli->set_group_master_log_pos(rli->gaq->lwm.group_master_log_pos);
   rli->set_group_relay_log_pos(rli->gaq->lwm.group_relay_log_pos);
+
   if (rli->gaq->lwm.group_relay_log_name[0] != 0)
     rli->set_group_relay_log_name(rli->gaq->lwm.group_relay_log_name);
 
   error= rli->flush_info(TRUE);
 
+  // end of commit_positions
+
   mysql_mutex_unlock(&rli->data_lock);
 
+end:
+  
   // ANDREI NOTIFICATIONS?
   DBUG_RETURN(error);
 }
@@ -5356,6 +5376,10 @@ static Log_event* next_event(Relay_log_i
         }
         else
         {
+          thd->enter_cond(log_cond, log_lock,
+                          "Slave has read all relay log; "
+                          "waiting for the slave I/O "
+                          "thread to update it");
           rli->relay_log.wait_for_update_relay_log(thd, NULL);
         }
         


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20101205200417-lezfgpc9q24mezc3.bundle
Thread
bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3230) WL#5569WL#5599Andrei Elkin5 Dec