List:Commits« Previous MessageNext Message »
From:He Zhenxing Date:March 13 2009 3:17am
Subject:Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2821)
Bug#43075
View as plain text  
Hi Alfranio,

Patch approved, thank you!

Alfranio Correia wrote:
> #At
> file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-43075/mysql-6.0-rpl/
> based on revid:aelkin@stripped
> 
>  2821 Alfranio Correia	2009-03-12
>       BUG#43075 rpl.rpl_sync fails sporadically on pushbuild
>       
>       The slave was crashing while failing to execute the init_slave() function.
>       
>       The issue stems from two different reasons:
>       
>       1 - A failure while allocating the master info structure generated a
>           segfault due to a NULL pointer.
>       
>       2 - A failure while recovering generated a segfault due to a non-initialized
>           relay log file. In other words, the mi->init and rli->init were both
> set to true
>           before executing the recovery process thus creating an inconsistent state
> as the
>           relay log file was not initialized.
>       
>       To circumvent such problems, we refactored the recovery process which is now
> executed while
>       initializing the relay log. It is ensured that the master info structure is
> created
>       before accessing it and any error is propagated thus avoiding to set
> mi->init and
>       rli->init to true when for instance the relay log is not initialized or the
> relay info
>       is not flushed.
>       
>       The changes related to the refactory are described below:
>       
>       1 - Removed call to init_recovery from init_slave.
>       
>       2 - Changed the signature of the function init_recovery.
>       
>       3 - Removed flushes. They are called while initializing the relay log and
> master info.
>       
>       4 - Made sure that if the relay info is not flushed the mi-init and rli-init
> are not
>       set to true.
>       
>       In this patch, we also replaced the exit(1) in the fault injection by
> DBUG_ABORT() to
>       make it compliant with the code guidelines.
> modified:
>   mysql-test/suite/rpl/r/rpl_sync.result
>   mysql-test/suite/rpl/t/rpl_sync.test
>   sql/rpl_mi.cc
>   sql/rpl_rli.cc
>   sql/slave.cc
>   sql/slave.h
> 
> per-file messages:
>   mysql-test/suite/rpl/r/rpl_sync.result
>     Added abort while simulating crashes.
>   mysql-test/suite/rpl/t/rpl_sync.test
>     Added abort while simulating crashes.
>   sql/rpl_mi.cc
>     If evertything went well the recovery is over.
>   sql/rpl_rli.cc
>     The mi->init and rli->init were both set to true before executing the
> recovery
>     process which might fail thus generating an iconsistent state as the relay 
>     log file was not intialized.
>         
>     To avoid problems, the recovery process is now executed while initializing 
>     the relay log and any error is propagated thus avoiding to set mi->init 
>     and rli->init to true.
>     
>     It is also ensured that the relay log is flushed.
>   sql/slave.cc
>     Changed due to the refactory.
>   sql/slave.h
>     Changes due to the refactory.
> === modified file 'mysql-test/suite/rpl/r/rpl_sync.result'
> --- a/mysql-test/suite/rpl/r/rpl_sync.result	2009-02-17 23:06:00 +0000
> +++ b/mysql-test/suite/rpl/r/rpl_sync.result	2009-03-12 11:16:58 +0000
> @@ -5,6 +5,7 @@ reset master;
>  reset slave;
>  drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
>  start slave;
> +call mtr.add_suppression('Attempting backtrace');
>  CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
>  insert into t1(a) values(1);
>  insert into t1(a) values(2);
> 
> === modified file 'mysql-test/suite/rpl/t/rpl_sync.test'
> --- a/mysql-test/suite/rpl/t/rpl_sync.test	2009-02-17 23:06:00 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_sync.test	2009-03-12 11:16:58 +0000
> @@ -29,11 +29,11 @@
>  --source include/master-slave.inc
>  --source include/not_embedded.inc
>  --source include/not_valgrind.inc
> -
>  --source include/have_debug.inc
> -
>  --source include/have_innodb.inc
>  
> +call mtr.add_suppression('Attempting backtrace');
> +
>  CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
>  
>  insert into t1(a) values(1);
> 
> === modified file 'sql/rpl_mi.cc'
> --- a/sql/rpl_mi.cc	2009-02-19 08:54:07 +0000
> +++ b/sql/rpl_mi.cc	2009-03-12 11:16:58 +0000
> @@ -380,6 +380,7 @@ file '%s')", fname);
>      goto err;
>  
>    mi->inited = 1;
> +  mi->rli.is_relay_log_recovery= FALSE;
>    // now change cache READ -> WRITE - must do this before flush_master_info
>    reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1);
>    if ((error=test(flush_master_info(mi, 1))))
> 
> === modified file 'sql/rpl_rli.cc'
> --- a/sql/rpl_rli.cc	2009-03-09 12:55:05 +0000
> +++ b/sql/rpl_rli.cc	2009-03-12 11:16:58 +0000
> @@ -250,8 +250,10 @@ Failed to open the existing relay log in
>      rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
>      rli->group_master_log_pos= master_log_pos;
>  
> -    if (!rli->is_relay_log_recovery &&
> -        init_relay_log_pos(rli,
> +    if (rli->is_relay_log_recovery && init_recovery(rli->mi,
> &msg)) 
> +      goto err;
> +
> +    if (init_relay_log_pos(rli,
>                             rli->group_relay_log_name,
>                             rli->group_relay_log_pos,
>                             0 /* no data lock*/,
> @@ -266,7 +268,6 @@ Failed to open the existing relay log in
>    }
>  
>  #ifndef DBUG_OFF
> -  if (!(rli->is_relay_log_recovery))
>    {
>      char llbuf1[22], llbuf2[22];
>      DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s
> rli->event_relay_log_pos=%s",
> @@ -283,7 +284,10 @@ Failed to open the existing relay log in
>    */
>    reinit_io_cache(&rli->info_file, WRITE_CACHE,0L,0,1);
>    if ((error= flush_relay_log_info(rli)))
> -    sql_print_error("Failed to flush relay log info file");
> +  {
> +    msg="Failed to flush relay log info file";
> +    goto err;
> +  }
>    if (count_relay_log_space(rli))
>    {
>      msg="Error counting relay log space";
> 
> === modified file 'sql/slave.cc'
> --- a/sql/slave.cc	2009-03-09 12:55:05 +0000
> +++ b/sql/slave.cc	2009-03-12 11:16:58 +0000
> @@ -132,7 +132,6 @@ static bool wait_for_relay_log_space(Rel
>  static inline bool io_slave_killed(THD* thd,Master_info* mi);
>  static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
>  static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
> -static int init_recovery(Master_info* mi);
>  static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
>  static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
>                            bool suppress_warnings);
> @@ -261,12 +260,6 @@ int init_slave()
>      goto err;
>    }
>  
> -  if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi))
> -  {
> -    error= 1;
> -    goto err;
> -  }
> -
>    /* If server id is not set, start_slave_thread() will say it */
>  
>    if (active_mi->host[0] && !opt_skip_slave_start)
> @@ -285,7 +278,6 @@ int init_slave()
>    }
>  
>  err:
> -  active_mi->rli.is_relay_log_recovery= FALSE;
>    pthread_mutex_unlock(&LOCK_active_mi);
>    DBUG_RETURN(error);
>  }
> @@ -317,9 +309,8 @@ err:
>    
>     If there is an error, it returns (1), otherwise returns (0).
>   */
> -static int init_recovery(Master_info* mi)
> +int init_recovery(Master_info* mi, const char** errmsg)
>  {
> -  const char *errmsg= 0;
>    DBUG_ENTER("init_recovery");
>   
>    Relay_log_info *rli= &mi->rli;
> @@ -339,26 +330,8 @@ static int init_recovery(Master_info* mi
>              sizeof(mi->rli.event_relay_log_name)-1);
>   
>      rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
> - 
> -    if (init_relay_log_pos(rli,
> -                           rli->group_relay_log_name,
> -                           rli->group_relay_log_pos,
> -                           0 /*no data lock*/,
> -                            &errmsg, 0))
> -      DBUG_RETURN(1);
> - 
> -    if (flush_master_info(mi, 0))
> -    {
> -      sql_print_error("Failed to flush master info file");
> -      DBUG_RETURN(1);
> -    }
> -    if (flush_relay_log_info(rli))
> -    {
> -       sql_print_error("Failed to flush relay info file");
> -       DBUG_RETURN(1);
> -    }
>    }
> - 
> +
>    DBUG_RETURN(0);
>  }
>   
> @@ -4331,7 +4304,7 @@ void rotate_relay_log(Master_info* mi)
>    DBUG_ENTER("rotate_relay_log");
>    Relay_log_info* rli= &mi->rli;
>  
> -  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", exit(1););
> +  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_ABORT(););
>  
>    /* We don't lock rli->run_lock. This would lead to deadlocks. */
>    pthread_mutex_lock(&mi->run_lock);
> 
> === modified file 'sql/slave.h'
> --- a/sql/slave.h	2008-12-24 10:48:24 +0000
> +++ b/sql/slave.h	2009-03-12 11:16:58 +0000
> @@ -144,6 +144,7 @@ extern ulonglong relay_log_space_limit;
>  #define SLAVE_FORCE_ALL 4
>  
>  int init_slave();
> +int init_recovery(Master_info* mi, const char** errmsg);
>  void init_slave_skip_errors(const char* arg);
>  bool flush_relay_log_info(Relay_log_info* rli);
>  int register_slave_on_master(MYSQL* mysql);
> 
> 

Thread
bzr commit into mysql-6.0-rpl branch (alfranio.correia:2821) Bug#43075Alfranio Correia12 Mar
  • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2821)Bug#43075He Zhenxing13 Mar