#At file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-43075/mysql-6.0-rpl/ based on revid:zhenxing.he@stripped
2820 Alfranio Correia 2009-03-11
BUG#43075 rpl.rpl_sync fails sporadically on pushbuild
The slave was crashing while failing to execute the init_slave() function.
The issue stems from two different reasons:
1 - A failure while allocating the master info structure generated a
segfault due to a NULL pointer.
2 - A failure while recovering generated a segfault due to a non-initialized
relay log file. In other words, the mi->init and rli->init were both set to true
before executing the recovery process thus creating an inconsistent state as the
relay log file was not initialized.
To circumvent such problems, we verified if active_mi is not null before
accessing it, refactored the recovery process which is now executed while
initializing the relay log and any error is propagated thus avoiding
to set mi->init and rli->init to true when the relay log is not initialized.
The changes related to the refactory are described below:
1 - Removed call to init_recovery from init_slave.
2 - Changed the signature of the function init_recovery.
3 - Removed flushes. They are called while initializing the relay log and master info.
modified:
sql/rpl_rli.cc
sql/slave.cc
sql/slave.h
per-file messages:
sql/rpl_rli.cc
The mi->init and rli->init were both set to true before executing the recovery process which
might fail thus generating an iconsistent state as the relay log file was not intialized.
To avoid problems, the recovery process is now executed while initializing the relay log and
any error is propagated thus avoiding to set mi->init and rli->init to true.
sql/slave.cc
Verified if active_mi is not null before acessing it. And refactored the recovery process
which is now executed while initializing the relay log. The changes related to the refactory
are described below:
1 - Removed call to init_recovery from init_slave.
2 - Changed the signature of the function init_recovery.
3 - Removed flushes. They are called while initializing the relay log and master info.
See rpl_rli.cc - init_relay_log_info and rlp_mi.cc - init_master_info.
sql/slave.h
Changed init_recovery's signature.
=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc 2009-02-27 13:20:11 +0000
+++ b/sql/rpl_rli.cc 2009-03-11 22:34:46 +0000
@@ -250,8 +250,10 @@ Failed to open the existing relay log in
rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
rli->group_master_log_pos= master_log_pos;
- if (!rli->is_relay_log_recovery &&
- init_relay_log_pos(rli,
+ if (rli->is_relay_log_recovery && init_recovery(rli->mi, &msg))
+ goto err;
+
+ if (init_relay_log_pos(rli,
rli->group_relay_log_name,
rli->group_relay_log_pos,
0 /* no data lock*/,
@@ -266,7 +268,6 @@ Failed to open the existing relay log in
}
#ifndef DBUG_OFF
- if (!(rli->is_relay_log_recovery))
{
char llbuf1[22], llbuf2[22];
DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc 2009-02-27 13:20:11 +0000
+++ b/sql/slave.cc 2009-03-11 22:34:46 +0000
@@ -132,7 +132,6 @@ static bool wait_for_relay_log_space(Rel
static inline bool io_slave_killed(THD* thd,Master_info* mi);
static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
-static int init_recovery(Master_info* mi);
static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
bool suppress_warnings);
@@ -261,12 +260,6 @@ int init_slave()
goto err;
}
- if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi))
- {
- error= 1;
- goto err;
- }
-
/* If server id is not set, start_slave_thread() will say it */
if (active_mi->host[0] && !opt_skip_slave_start)
@@ -285,7 +278,8 @@ int init_slave()
}
err:
- active_mi->rli.is_relay_log_recovery= FALSE;
+ if (active_mi)
+ active_mi->rli.is_relay_log_recovery= FALSE;
pthread_mutex_unlock(&LOCK_active_mi);
DBUG_RETURN(error);
}
@@ -317,9 +311,8 @@ err:
If there is an error, it returns (1), otherwise returns (0).
*/
-static int init_recovery(Master_info* mi)
+int init_recovery(Master_info* mi, const char** errmsg)
{
- const char *errmsg= 0;
DBUG_ENTER("init_recovery");
Relay_log_info *rli= &mi->rli;
@@ -339,26 +332,8 @@ static int init_recovery(Master_info* mi
sizeof(mi->rli.event_relay_log_name)-1);
rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
-
- if (init_relay_log_pos(rli,
- rli->group_relay_log_name,
- rli->group_relay_log_pos,
- 0 /*no data lock*/,
- &errmsg, 0))
- DBUG_RETURN(1);
-
- if (flush_master_info(mi, 0))
- {
- sql_print_error("Failed to flush master info file");
- DBUG_RETURN(1);
- }
- if (flush_relay_log_info(rli))
- {
- sql_print_error("Failed to flush relay info file");
- DBUG_RETURN(1);
- }
}
-
+
DBUG_RETURN(0);
}
=== modified file 'sql/slave.h'
--- a/sql/slave.h 2008-12-24 10:48:24 +0000
+++ b/sql/slave.h 2009-03-11 22:34:46 +0000
@@ -144,6 +144,7 @@ extern ulonglong relay_log_space_limit;
#define SLAVE_FORCE_ALL 4
int init_slave();
+int init_recovery(Master_info* mi, const char** errmsg);
void init_slave_skip_errors(const char* arg);
bool flush_relay_log_info(Relay_log_info* rli);
int register_slave_on_master(MYSQL* mysql);