List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:July 8 2013 1:15pm
Subject:bzr push into mysql-5.6 branch (andrei.elkin:5286 to 5287) Bug#16594095
View as plain text  
 5287 Andrei Elkin	2013-07-08
      BUG#16594095
      
      Post-push refinement to fix a valgrind issue.
      The basic pushed patch did not expect a block of
       if (!ret_worker->checkpoint_notified) {}
      be executed multiple times during scheduling of one group.
      In fact it should have been left intact to continue to be executed at T-event
      time that guarantees its single time run.
      And that is restored, that is the former patch part is reverted.
      
      As the matter of fact the physical coordinates were unknown to the Worker 
      only during time of the first transaction scheduling upon master binlog
      rotation or the very first one at MTS start.
      Information about the master binlog at such points is passed to the Worker
      now via augmented notification mechanism.
      The new notification is light as in implementation so in terms of execution
      (is supposed to be pretty rare) and never more that once during one transaction
      scheduling (as asserted in the DBUG version of the patch).
      
      Attention to rpl_conflicts.test is paid to sort out 
      rpl_{row,stm}conflicts.test failing on PB2.

    modified:
      mysql-test/extra/rpl_tests/rpl_conflicts.test
      mysql-test/suite/rpl/r/rpl_row_conflicts.result
      mysql-test/suite/rpl/r/rpl_stm_conflicts.result
      sql/log_event.cc
      sql/rpl_rli.cc
      sql/rpl_rli_pdb.cc
      sql/rpl_rli_pdb.h
=== modified file 'mysql-test/extra/rpl_tests/rpl_conflicts.test'
--- a/mysql-test/extra/rpl_tests/rpl_conflicts.test	revid:anitha.gopi@stripped
+++ b/mysql-test/extra/rpl_tests/rpl_conflicts.test	revid:andrei.elkin@stripped
@@ -89,8 +89,11 @@ if (`SELECT @@global.binlog_format != 'R
   --echo ---- Wait until slave stops with an error ----
   # Wait until the slave tries to run the query, fails with duplicate
   # key error, and stops the SQL thread.
-  let $slave_sql_errno= convert_error(ER_DUP_ENTRY)
+
+  let $slave_sql_errno= convert_error(ER_DUP_ENTRY);
   source include/wait_for_slave_sql_error.inc;
+  --let $errno= query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1)
+  --eval SELECT "$errno" as 'Last_SQL_Errno'
 
   call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
   call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
@@ -139,8 +142,11 @@ connection slave;
 if (`SELECT @@global.binlog_format = 'ROW' AND @@global.slave_exec_mode = 'STRICT'`) {
   --echo ---- Wait until slave stops with an error ----
   call mtr.add_suppression("Slave SQL.*Can.t find record in .t1., Error_code: 1032");
-  let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+
+  let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND);
   source include/wait_for_slave_sql_error.inc;
+  --let $errno= query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1)
+  --eval SELECT "$errno" as 'Last_SQL_Errno'
 
   SELECT * FROM t1;
 

=== modified file 'mysql-test/suite/rpl/r/rpl_row_conflicts.result'
--- a/mysql-test/suite/rpl/r/rpl_row_conflicts.result	revid:anitha.gopi@stripped
+++ b/mysql-test/suite/rpl/r/rpl_row_conflicts.result	revid:andrei.elkin@stripped
@@ -23,6 +23,10 @@ a
 1
 [on slave]
 ---- Wait until slave stops with an error ----
+include/wait_for_slave_sql_error.inc [errno=1062]
+SELECT "1062" as 'Last_SQL_Errno';
+Last_SQL_Errno
+1062
 call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
 call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
 SELECT * FROM t1;
@@ -52,6 +56,10 @@ a
 [on slave]
 ---- Wait until slave stops with an error ----
 call mtr.add_suppression("Slave SQL.*Can.t find record in .t1., Error_code: 1032");
+include/wait_for_slave_sql_error.inc [errno=1032]
+SELECT "1032" as 'Last_SQL_Errno';
+Last_SQL_Errno
+1032
 SELECT * FROM t1;
 a
 ---- Resolve the conflict on the slave and restart SQL thread ----

=== modified file 'mysql-test/suite/rpl/r/rpl_stm_conflicts.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_conflicts.result	revid:anitha.gopi@stripped
+++ b/mysql-test/suite/rpl/r/rpl_stm_conflicts.result	revid:andrei.elkin@stripped
@@ -18,6 +18,10 @@ a
 1
 [on slave]
 ---- Wait until slave stops with an error ----
+include/wait_for_slave_sql_error.inc [errno=1062]
+SELECT "1062" as 'Last_SQL_Errno';
+Last_SQL_Errno
+1062
 call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
 call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
 SELECT * FROM t1;

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	revid:anitha.gopi@stripped
+++ b/sql/log_event.cc	revid:andrei.elkin@stripped
@@ -2863,23 +2863,22 @@ Slave_worker *Log_event::get_slave_worke
   DBUG_ASSERT(ret_worker);
 
   /*
-    Preparing event physical coordinates info for Worker before any event got
-    scheduled so when Worker error-stopped at the first event it would be aware of
-    where exactly in the event stream.
+    Preparing event physical coordinates info for Worker before any
+    event got scheduled so when Worker error-stopped at the first
+    event it would be aware of where exactly in the event stream.
   */
-  if (!ret_worker->checkpoint_notified)
+  if (!ret_worker->master_log_change_notified)
   {
     if (!ptr_group)
       ptr_group= gaq->get_job_group(rli->gaq->assigned_group_index);
-    ptr_group->checkpoint_log_name= 
+    ptr_group->group_master_log_name=
       my_strdup(rli->get_group_master_log_name(), MYF(MY_WME));
-    ptr_group->checkpoint_log_pos= rli->get_group_master_log_pos();
-    ptr_group->checkpoint_relay_log_name=
-      my_strdup(rli->get_group_relay_log_name(), MYF(MY_WME));
-    ptr_group->checkpoint_relay_log_pos= rli->get_group_relay_log_pos();
-    ptr_group->shifted= ret_worker->bitmap_shifted;
-    ret_worker->bitmap_shifted= 0;
-    ret_worker->checkpoint_notified= TRUE;
+    ret_worker->master_log_change_notified= true;
+
+    DBUG_ASSERT(!ptr_group->notified);
+#ifndef DBUG_OFF
+    ptr_group->notified= true;
+#endif
   }
 
   // T-event: Commit, Xid, a DDL query or dml query of B-less group.
@@ -2921,6 +2920,21 @@ Slave_worker *Log_event::get_slave_worke
 
       ret_worker->relay_log_change_notified= TRUE;
     }
+
+    if (!ret_worker->checkpoint_notified)
+    {
+      if (!ptr_group)
+        ptr_group= gaq->get_job_group(rli->gaq->assigned_group_index);
+      ptr_group->checkpoint_log_name=
+        my_strdup(rli->get_group_master_log_name(), MYF(MY_WME));
+      ptr_group->checkpoint_log_pos= rli->get_group_master_log_pos();
+      ptr_group->checkpoint_relay_log_name=
+        my_strdup(rli->get_group_relay_log_name(), MYF(MY_WME));
+      ptr_group->checkpoint_relay_log_pos= rli->get_group_relay_log_pos();
+      ptr_group->shifted= ret_worker->bitmap_shifted;
+      ret_worker->bitmap_shifted= 0;
+      ret_worker->checkpoint_notified= TRUE;
+    }
     ptr_group->checkpoint_seqno= rli->checkpoint_seqno;
     ptr_group->ts= when.tv_sec + (time_t) exec_time; // Seconds_behind_master related
     rli->checkpoint_seqno++;

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	revid:anitha.gopi@stripped
+++ b/sql/rpl_rli.cc	revid:andrei.elkin@stripped
@@ -241,6 +241,13 @@ void Relay_log_info::reset_notified_chec
     */
     w->checkpoint_notified= FALSE;
     w->bitmap_shifted= w->bitmap_shifted + shift;
+    /*
+      Zero shift indicates the caller rotates the master binlog.
+      The new name will be passed to W through the group descriptor
+      during the first post-rotation time scheduling.
+    */
+    if (shift == 0)
+      w->master_log_change_notified= false;
 
     DBUG_PRINT("mts", ("reset_notified_checkpoint shift --> %lu, "
                "worker->bitmap_shifted --> %lu, worker --> %u.",

=== modified file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc	revid:anitha.gopi@stripped
+++ b/sql/rpl_rli_pdb.cc	revid:andrei.elkin@stripped
@@ -144,7 +144,8 @@ int Slave_worker::init_worker(Relay_log_
   id= i;
   curr_group_exec_parts.elements= 0;
   relay_log_change_notified= FALSE; // the 1st group to contain relaylog name
-  checkpoint_notified= FALSE;
+  checkpoint_notified= FALSE;       // the same as above
+  master_log_change_notified= false;// W learns master log during 1st group exec
   bitmap_shifted= 0;
   workers= c_rli->workers; // shallow copying is sufficient
   wq_size_waits_cnt= groups_done= events_done= curr_jobs= 0;
@@ -411,6 +412,23 @@ bool Slave_worker::commit_positions(Log_
 {
   DBUG_ENTER("Slave_worker::checkpoint_positions");
 
+  /*
+    Initial value of checkpoint_master_log_name is learned from
+    group_master_log_name. The latter can be passed to Worker
+    at rare event of master binlog rotation.
+    This initialization is needed to provide to Worker info
+    on physical coordiates during execution of the very first group
+    after a rotation.
+  */
+  if (ptr_g->group_master_log_name != NULL)
+  {
+    strmake(group_master_log_name, ptr_g->group_master_log_name,
+            sizeof(group_master_log_name) - 1);
+    my_free(ptr_g->group_master_log_name);
+    ptr_g->group_master_log_name= NULL;
+    strmake(checkpoint_master_log_name, group_master_log_name,
+            sizeof(checkpoint_master_log_name) - 1);
+  }
   if (ptr_g->checkpoint_log_name != NULL)
   {
     strmake(checkpoint_relay_log_name, ptr_g->checkpoint_relay_log_name,
@@ -1444,6 +1462,10 @@ void Slave_committed_queue::free_dynamic
     {
       my_free(ptr_g->checkpoint_relay_log_name);
     }
+    if (ptr_g->group_master_log_name)
+    {
+      my_free(ptr_g->group_master_log_name);
+    }
   }
   DBUG_ASSERT((avail == size /* full */ || entry == size /* empty */) ||
               i == avail /* all occupied are processed */);

=== modified file 'sql/rpl_rli_pdb.h'
--- a/sql/rpl_rli_pdb.h	revid:anitha.gopi@stripped
+++ b/sql/rpl_rli_pdb.h	revid:andrei.elkin@stripped
@@ -164,7 +164,9 @@ typedef struct st_slave_job_group
   volatile uchar done;  // Flag raised by W,  read and reset by Coordinator
   ulong    shifted;     // shift the last CP bitmap at receiving a new CP
   time_t   ts;          // Group's timestampt to update Seconds_behind_master
-
+#ifndef DBUG_OFF
+  bool     notified;    // to debug group_master_log_name change notification
+#endif
   /*
     Coordinator fills the struct with defaults and options at starting of 
     a group distribution.
@@ -183,6 +185,9 @@ typedef struct st_slave_job_group
     checkpoint_relay_log_pos= 0;
     checkpoint_seqno= (uint) -1;
     done= 0;
+#ifndef DBUG_OFF
+    notified= false;
+#endif
   }
 } Slave_job_group;
 
@@ -320,6 +325,7 @@ public:
 
   volatile bool relay_log_change_notified; // Coord sets and resets, W can read
   volatile bool checkpoint_notified; // Coord sets and resets, W can read
+  volatile bool master_log_change_notified; // Coord sets and resets, W can read
   ulong bitmap_shifted;  // shift the last bitmap at receiving new CP
   // WQ current excess above the overrun level
   long wq_overrun_cnt;

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.6 branch (andrei.elkin:5286 to 5287) Bug#16594095Andrei Elkin20 Aug