List:Commits« Previous MessageNext Message »
From:Sergey Vojtovich Date:February 16 2008 2:02pm
Subject:bk commit into 5.0 tree (svoj:1.2575) BUG#13861
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of svoj.  When svoj does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2008-02-16 17:02:34+04:00, svoj@stripped +5 -0
  BUG#13861 - START SLAVE UNTIL may stop 1 evnt too late if
              log-slave-updates and circul repl
  
  Slave SQL thread may execute one extra event when there are events
  skipped by slave I/O thread (e.g. originated by the same server).
  Whereas it was requested not to do so by the UNTIL condition.
  
  This happens because we check for end position of previously executed
  event. This is fine when there are no skipped by slave I/O thread
  events, as end position of previous event equals to start position
  of to be executed event. Otherwise this position equals to start
  position of skipped event.
  
  This is fixed by:
  - reading to be executed event before checking if until is satisfied.
  - checking beginning position of to be executed event. As we do not
    store the beginning position anywhere, it is calculated by subtracting
    event length from the end position.
  - if there are no events on the event queue, that meet until condition,
    we stop immediately, as in this case we do not want to wait for next
    event.

  mysql-test/r/rpl_dual_pos_advance.result@stripped, 2008-02-16 17:02:33+04:00, svoj@stripped
+21 -1
    A test case for BUG#13861.

  mysql-test/t/rpl_dual_pos_advance.test@stripped, 2008-02-16 17:02:33+04:00, svoj@stripped
+16 -16
    A test case for BUG#13861.

  sql/log_event.cc@stripped, 2008-02-16 17:02:33+04:00, svoj@stripped +2 -1
    Store length of event. This is needed for further calculation of
    the beginning of event.

  sql/slave.cc@stripped, 2008-02-16 17:02:33+04:00, svoj@stripped +41 -24
    Slave SQL thread may execute one extra event when there are events
    skipped by slave I/O thread (e.g. originated by the same server).
    Whereas it was requested not to do so by the UNTIL condition.
    
    This happens because we check for end position of previously executed
    event. This is fine when there are no skipped by slave I/O thread
    events, as end position of previous event equals to start position
    of to be executed event. Otherwise this position equals to start
    position of skipped event.
    
    This is fixed by:
    - reading to be executed event before checking if until is satisfied.
    - checking beginning position of to be executed event. As we do not
      store the beginning position anywhere, it is calculated by subtracting
      event length from the end position.
    - if there are no events on the event queue, that meet until condition,
      we stop immediately, as in this case we do not want to wait for next
      event.

  sql/slave.h@stripped, 2008-02-16 17:02:33+04:00, svoj@stripped +1 -1
    Added master_log_pos parametr to is_until_satisfied().

diff -Nrup a/mysql-test/r/rpl_dual_pos_advance.result
b/mysql-test/r/rpl_dual_pos_advance.result
--- a/mysql-test/r/rpl_dual_pos_advance.result	2006-05-12 21:26:43 +05:00
+++ b/mysql-test/r/rpl_dual_pos_advance.result	2008-02-16 17:02:33 +04:00
@@ -8,15 +8,35 @@ reset master;
 change master to master_host="127.0.0.1",master_port=SLAVE_PORT,master_user="root";
 start slave;
 create table t1 (n int);
+stop slave;
+create table t2 (n int);
+show tables;
+Tables_in_test
+t1
+t2
+create table t3 (n int);
+start slave until master_log_file="slave-bin.000001",master_log_pos=195;
+Warnings:
+Note	1278	It is recommended to use --skip-slave-start when doing step-by-step replication
with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's
mysqld restart
+select master_pos_wait("slave-bin.000001",137);
+master_pos_wait("slave-bin.000001",137)
+0
+show tables;
+Tables_in_test
+t1
+t2
+start slave;
 create table t4 (n int);
 create table t5 (n int);
 create table t6 (n int);
 show tables;
 Tables_in_test
 t1
+t2
+t3
 t4
 t5
 t6
 stop slave;
 reset slave;
-drop table t1,t4,t5,t6;
+drop table t1,t2,t3,t4,t5,t6;
diff -Nrup a/mysql-test/t/rpl_dual_pos_advance.test
b/mysql-test/t/rpl_dual_pos_advance.test
--- a/mysql-test/t/rpl_dual_pos_advance.test	2007-11-05 19:20:08 +04:00
+++ b/mysql-test/t/rpl_dual_pos_advance.test	2008-02-16 17:02:33 +04:00
@@ -33,42 +33,42 @@ sync_with_master;
 # Now test BUG#13861. This will be enabled when Guilhem fixes this
 # bug.
 
-# stop slave
+stop slave;
 
-# create table t2 (n int); # create one ignored event
+create table t2 (n int); # create one ignored event
 
-# save_master_pos;
-# connection slave;
-# sync_with_master;
+save_master_pos;
+connection slave;
+sync_with_master;
 
-# connection slave;
+connection slave;
 
-# show tables;
+show tables;
 
-# save_master_pos;
+save_master_pos;
 
-# create table t3 (n int);
+create table t3 (n int);
 
-# connection master;
+connection master;
 
 # bug is that START SLAVE UNTIL may stop too late, we test that by
 # asking it to stop before creation of t3.
 
-# start slave until master_log_file="slave-bin.000001",master_log_pos=195;
+start slave until master_log_file="slave-bin.000001",master_log_pos=195;
 
 # wait until it's started (the position below is the start of "CREATE
 # TABLE t2") (otherwise wait_for_slave_to_stop may return at once)
 
-# select master_pos_wait("slave-bin.000001",137);
+select master_pos_wait("slave-bin.000001",137);
 
-# wait_for_slave_to_stop;
+wait_for_slave_to_stop;
 
 # then BUG#13861 causes t3 to show up below (because stopped too
 # late).
 
-# show tables;
+show tables;
 
-# start slave;
+start slave;
 
 # BUG#13023 is that Exec_master_log_pos may stay too low "forever":
 
@@ -99,7 +99,7 @@ show tables;
 
 stop slave;
 reset slave;
-drop table t1,t4,t5,t6; # add t2 and t3 later
+drop table t1,t2,t3,t4,t5,t6;
 
 save_master_pos;
 connection slave;
diff -Nrup a/sql/log_event.cc b/sql/log_event.cc
--- a/sql/log_event.cc	2007-12-21 23:30:22 +04:00
+++ b/sql/log_event.cc	2008-02-16 17:02:33 +04:00
@@ -373,6 +373,7 @@ Log_event::Log_event(const char* buf,
 #endif
   when = uint4korr(buf);
   server_id = uint4korr(buf + SERVER_ID_OFFSET);
+  data_written= uint4korr(buf + EVENT_LEN_OFFSET);
   if (description_event->binlog_version==1)
   {
     log_pos= 0;
@@ -405,7 +406,7 @@ Log_event::Log_event(const char* buf,
         binlog, so which will cause problems if the user uses this value
         in CHANGE MASTER).
       */
-    log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
+    log_pos+= data_written;
   }
   DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
 
diff -Nrup a/sql/slave.cc b/sql/slave.cc
--- a/sql/slave.cc	2008-02-05 19:36:17 +04:00
+++ b/sql/slave.cc	2008-02-16 17:02:33 +04:00
@@ -3115,6 +3115,8 @@ int check_expected_error(THD* thd, RELAY
      Check if condition stated in UNTIL clause of START SLAVE is reached.
    SYNOPSYS
      st_relay_log_info::is_until_satisfied()
+     master_log_pos    position of the begginning of to be executed event
+
    DESCRIPTION
      Checks if UNTIL condition is reached. Uses caching result of last 
      comparison of current log file name and target log file name. So cached 
@@ -3139,7 +3141,7 @@ int check_expected_error(THD* thd, RELAY
      false - condition not met
 */
 
-bool st_relay_log_info::is_until_satisfied()
+bool st_relay_log_info::is_until_satisfied(my_off_t master_log_pos)
 {
   const char *log_name;
   ulonglong log_pos;
@@ -3149,7 +3151,7 @@ bool st_relay_log_info::is_until_satisfi
   if (until_condition == UNTIL_MASTER_POS)
   {
     log_name= group_master_log_name;
-    log_pos= group_master_log_pos;
+    log_pos= master_log_pos;
   }
   else
   { /* until_condition == UNTIL_RELAY_POS */
@@ -3228,28 +3230,6 @@ static int exec_relay_log_event(THD* thd
      wait for something for example inside of next_event().
    */
   pthread_mutex_lock(&rli->data_lock);
-  /*
-    This tests if the position of the end of the last previous executed event
-    hits the UNTIL barrier.
-    We would prefer to test if the position of the start (or possibly) end of
-    the to-be-read event hits the UNTIL barrier, this is different if there
-    was an event ignored by the I/O thread just before (BUG#13861 to be
-    fixed).
-  */
-  if (rli->until_condition!=RELAY_LOG_INFO::UNTIL_NONE &&
-      rli->is_until_satisfied())
-  {
-    char buf[22];
-    sql_print_information("Slave SQL thread stopped because it reached its"
-                    " UNTIL position %s", llstr(rli->until_pos(), buf));
-    /*
-      Setting abort_slave flag because we do not want additional message about
-      error in query execution to be printed.
-    */
-    rli->abort_slave= 1;
-    pthread_mutex_unlock(&rli->data_lock);
-    return 1;
-  }
 
   Log_event * ev = next_event(rli);
 
@@ -3267,6 +3247,27 @@ static int exec_relay_log_event(THD* thd
     int exec_res;
 
     /*
+      This tests if the position of the beginning of the current event
+      hits the UNTIL barrier.
+    */
+    if (rli->until_condition != RELAY_LOG_INFO::UNTIL_NONE &&
+        rli->is_until_satisfied((thd->options & OPTION_BEGIN ||
!ev->log_pos) ?
+                                rli->group_master_log_pos :
+                                ev->log_pos - ev->data_written))
+    {
+      char buf[22];
+      sql_print_information("Slave SQL thread stopped because it reached its"
+                            " UNTIL position %s", llstr(rli->until_pos(), buf));
+      /*
+        Setting abort_slave flag because we do not want additional message about
+        error in query execution to be printed.
+      */
+      rli->abort_slave= 1;
+      pthread_mutex_unlock(&rli->data_lock);
+      delete ev;
+      return 1;
+    }
+    /*
       Queries originating from this server must be skipped.
       Low-level events (Format_desc, Rotate, Stop) from this server
       must also be skipped. But for those we don't want to modify
@@ -3976,6 +3977,22 @@ Slave SQL thread aborted. Can't execute 
       goto err;
     }
   }
+
+  /*
+    First check until condition - probably there is nothing to execute. We
+    do not want to wait for next event in this case.
+  */
+  pthread_mutex_lock(&rli->data_lock);
+  if (rli->until_condition != RELAY_LOG_INFO::UNTIL_NONE &&
+      rli->is_until_satisfied(rli->group_master_log_pos))
+  {
+    char buf[22];
+    sql_print_information("Slave SQL thread stopped because it reached its"
+                          " UNTIL position %s", llstr(rli->until_pos(), buf));
+    pthread_mutex_unlock(&rli->data_lock);
+    goto err;
+  }
+  pthread_mutex_unlock(&rli->data_lock);
 
   /* Read queries from the IO/THREAD until this thread is killed */
 
diff -Nrup a/sql/slave.h b/sql/slave.h
--- a/sql/slave.h	2008-02-05 19:36:17 +04:00
+++ b/sql/slave.h	2008-02-16 17:02:33 +04:00
@@ -348,7 +348,7 @@ typedef struct st_relay_log_info
   void close_temporary_tables();
 
   /* Check if UNTIL condition is satisfied. See slave.cc for more. */
-  bool is_until_satisfied();
+  bool is_until_satisfied(my_off_t master_log_pos);
   inline ulonglong until_pos()
   {
     return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos :
Thread
bk commit into 5.0 tree (svoj:1.2575) BUG#13861Sergey Vojtovich16 Feb
  • Re: bk commit into 5.0 tree (svoj:1.2575) BUG#13861Andrei Elkin20 Feb
  • Re: bk commit into 5.0 tree (svoj:1.2575) BUG#13861Mats Kindahl20 Feb