List:Commits« Previous MessageNext Message »
From:Alfranio Correia Date:March 12 2009 11:17am
Subject:bzr commit into mysql-6.0-rpl branch (alfranio.correia:2821) Bug#43075
View as plain text  
#At file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-43075/mysql-6.0-rpl/ based on revid:aelkin@stripped

 2821 Alfranio Correia	2009-03-12
      BUG#43075 rpl.rpl_sync fails sporadically on pushbuild
      
      The slave was crashing while failing to execute the init_slave() function.
      
      The issue stems from two different reasons:
      
      1 - A failure while allocating the master info structure generated a
          segfault due to a NULL pointer.
      
      2 - A failure while recovering generated a segfault due to a non-initialized
          relay log file. In other words, the mi->init and rli->init were both set to true
          before executing the recovery process thus creating an inconsistent state as the
          relay log file was not initialized.
      
      To circumvent such problems, we refactored the recovery process which is now executed while
      initializing the relay log. It is ensured that the master info structure is created
      before accessing it and any error is propagated thus avoiding to set mi->init and
      rli->init to true when for instance the relay log is not initialized or the relay info
      is not flushed.
      
      The changes related to the refactory are described below:
      
      1 - Removed call to init_recovery from init_slave.
      
      2 - Changed the signature of the function init_recovery.
      
      3 - Removed flushes. They are called while initializing the relay log and master info.
      
      4 - Made sure that if the relay info is not flushed the mi-init and rli-init are not
      set to true.
      
      In this patch, we also replaced the exit(1) in the fault injection by DBUG_ABORT() to
      make it compliant with the code guidelines.
modified:
  mysql-test/suite/rpl/r/rpl_sync.result
  mysql-test/suite/rpl/t/rpl_sync.test
  sql/rpl_mi.cc
  sql/rpl_rli.cc
  sql/slave.cc
  sql/slave.h

per-file messages:
  mysql-test/suite/rpl/r/rpl_sync.result
    Added abort while simulating crashes.
  mysql-test/suite/rpl/t/rpl_sync.test
    Added abort while simulating crashes.
  sql/rpl_mi.cc
    If evertything went well the recovery is over.
  sql/rpl_rli.cc
    The mi->init and rli->init were both set to true before executing the recovery
    process which might fail thus generating an iconsistent state as the relay 
    log file was not intialized.
        
    To avoid problems, the recovery process is now executed while initializing 
    the relay log and any error is propagated thus avoiding to set mi->init 
    and rli->init to true.
    
    It is also ensured that the relay log is flushed.
  sql/slave.cc
    Changed due to the refactory.
  sql/slave.h
    Changes due to the refactory.
=== modified file 'mysql-test/suite/rpl/r/rpl_sync.result'
--- a/mysql-test/suite/rpl/r/rpl_sync.result	2009-02-17 23:06:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_sync.result	2009-03-12 11:16:58 +0000
@@ -5,6 +5,7 @@ reset master;
 reset slave;
 drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
 start slave;
+call mtr.add_suppression('Attempting backtrace');
 CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
 insert into t1(a) values(1);
 insert into t1(a) values(2);

=== modified file 'mysql-test/suite/rpl/t/rpl_sync.test'
--- a/mysql-test/suite/rpl/t/rpl_sync.test	2009-02-17 23:06:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_sync.test	2009-03-12 11:16:58 +0000
@@ -29,11 +29,11 @@
 --source include/master-slave.inc
 --source include/not_embedded.inc
 --source include/not_valgrind.inc
-
 --source include/have_debug.inc
-
 --source include/have_innodb.inc
 
+call mtr.add_suppression('Attempting backtrace');
+
 CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
 
 insert into t1(a) values(1);

=== modified file 'sql/rpl_mi.cc'
--- a/sql/rpl_mi.cc	2009-02-19 08:54:07 +0000
+++ b/sql/rpl_mi.cc	2009-03-12 11:16:58 +0000
@@ -380,6 +380,7 @@ file '%s')", fname);
     goto err;
 
   mi->inited = 1;
+  mi->rli.is_relay_log_recovery= FALSE;
   // now change cache READ -> WRITE - must do this before flush_master_info
   reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1);
   if ((error=test(flush_master_info(mi, 1))))

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2009-03-09 12:55:05 +0000
+++ b/sql/rpl_rli.cc	2009-03-12 11:16:58 +0000
@@ -250,8 +250,10 @@ Failed to open the existing relay log in
     rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
     rli->group_master_log_pos= master_log_pos;
 
-    if (!rli->is_relay_log_recovery &&
-        init_relay_log_pos(rli,
+    if (rli->is_relay_log_recovery && init_recovery(rli->mi, &msg)) 
+      goto err;
+
+    if (init_relay_log_pos(rli,
                            rli->group_relay_log_name,
                            rli->group_relay_log_pos,
                            0 /* no data lock*/,
@@ -266,7 +268,6 @@ Failed to open the existing relay log in
   }
 
 #ifndef DBUG_OFF
-  if (!(rli->is_relay_log_recovery))
   {
     char llbuf1[22], llbuf2[22];
     DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
@@ -283,7 +284,10 @@ Failed to open the existing relay log in
   */
   reinit_io_cache(&rli->info_file, WRITE_CACHE,0L,0,1);
   if ((error= flush_relay_log_info(rli)))
-    sql_print_error("Failed to flush relay log info file");
+  {
+    msg="Failed to flush relay log info file";
+    goto err;
+  }
   if (count_relay_log_space(rli))
   {
     msg="Error counting relay log space";

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2009-03-09 12:55:05 +0000
+++ b/sql/slave.cc	2009-03-12 11:16:58 +0000
@@ -132,7 +132,6 @@ static bool wait_for_relay_log_space(Rel
 static inline bool io_slave_killed(THD* thd,Master_info* mi);
 static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
-static int init_recovery(Master_info* mi);
 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
                           bool suppress_warnings);
@@ -261,12 +260,6 @@ int init_slave()
     goto err;
   }
 
-  if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi))
-  {
-    error= 1;
-    goto err;
-  }
-
   /* If server id is not set, start_slave_thread() will say it */
 
   if (active_mi->host[0] && !opt_skip_slave_start)
@@ -285,7 +278,6 @@ int init_slave()
   }
 
 err:
-  active_mi->rli.is_relay_log_recovery= FALSE;
   pthread_mutex_unlock(&LOCK_active_mi);
   DBUG_RETURN(error);
 }
@@ -317,9 +309,8 @@ err:
   
    If there is an error, it returns (1), otherwise returns (0).
  */
-static int init_recovery(Master_info* mi)
+int init_recovery(Master_info* mi, const char** errmsg)
 {
-  const char *errmsg= 0;
   DBUG_ENTER("init_recovery");
  
   Relay_log_info *rli= &mi->rli;
@@ -339,26 +330,8 @@ static int init_recovery(Master_info* mi
             sizeof(mi->rli.event_relay_log_name)-1);
  
     rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
- 
-    if (init_relay_log_pos(rli,
-                           rli->group_relay_log_name,
-                           rli->group_relay_log_pos,
-                           0 /*no data lock*/,
-                            &errmsg, 0))
-      DBUG_RETURN(1);
- 
-    if (flush_master_info(mi, 0))
-    {
-      sql_print_error("Failed to flush master info file");
-      DBUG_RETURN(1);
-    }
-    if (flush_relay_log_info(rli))
-    {
-       sql_print_error("Failed to flush relay info file");
-       DBUG_RETURN(1);
-    }
   }
- 
+
   DBUG_RETURN(0);
 }
  
@@ -4331,7 +4304,7 @@ void rotate_relay_log(Master_info* mi)
   DBUG_ENTER("rotate_relay_log");
   Relay_log_info* rli= &mi->rli;
 
-  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", exit(1););
+  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_ABORT(););
 
   /* We don't lock rli->run_lock. This would lead to deadlocks. */
   pthread_mutex_lock(&mi->run_lock);

=== modified file 'sql/slave.h'
--- a/sql/slave.h	2008-12-24 10:48:24 +0000
+++ b/sql/slave.h	2009-03-12 11:16:58 +0000
@@ -144,6 +144,7 @@ extern ulonglong relay_log_space_limit;
 #define SLAVE_FORCE_ALL 4
 
 int init_slave();
+int init_recovery(Master_info* mi, const char** errmsg);
 void init_slave_skip_errors(const char* arg);
 bool flush_relay_log_info(Relay_log_info* rli);
 int register_slave_on_master(MYSQL* mysql);

Thread
bzr commit into mysql-6.0-rpl branch (alfranio.correia:2821) Bug#43075Alfranio Correia12 Mar
  • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2821)Bug#43075He Zhenxing13 Mar