#At file:///home/daogangq/mysql/bzrwork/bug45214/5.1-bt/ based on revid:davi.arnaut@stripped
2941 Dao-Gang.Qu@stripped 2009-06-23
Bug #45214 get_master_version_and_clock does not report error when queries fail
The "get_master_version_and_clock(...)" function in sql/slave.cc ignores error and passes directly
when queries get a failed response from master because of network transient disconnection between master and slave
or get a successful response but the result retrieved is NULL.
The "get_master_version_and_clock(...)" function should retry to connect master if the queries get a failed response
from master because of network transient disconnection between master and slave. And it should report a NULL value error
and stop the slave I/O thread if the queries get a successful response but the result retrieved from master is NULL.
@ sql/slave.cc
Update the slave.cc for fixing bug #45214
modified:
sql/slave.cc
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc 2009-06-09 16:44:26 +0000
+++ b/sql/slave.cc 2009-06-23 08:37:21 +0000
@@ -864,7 +864,7 @@ static int get_master_version_and_clock(
MYSQL_RES *master_res= 0;
MYSQL_ROW master_row;
DBUG_ENTER("get_master_version_and_clock");
-
+ int query_re= 0;
/*
Free old description_event_for_queue (that is needed if we are in
a reconnection).
@@ -938,8 +938,9 @@ static int get_master_version_and_clock(
Compare the master and slave's clock. Do not die if master's clock is
unavailable (very old master not supporting UNIX_TIMESTAMP()?).
*/
-
- if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
+ DBUG_SYNC_POINT("debug_lock.before_get_UNIX_TIMESTAMP", 1000);
+ query_re= mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()"));
+ if (!query_re &&
(master_res= mysql_store_result(mysql)) &&
(master_row= mysql_fetch_row(master_res)))
{
@@ -954,6 +955,10 @@ static int get_master_version_and_clock(
"SLAVE STATUS. Error: %s (%d)",
mysql_error(mysql), mysql_errno(mysql));
}
+ else if (query_re)
+ {
+ DBUG_RETURN(2);
+ }
if (master_res)
mysql_free_result(master_res);
@@ -967,26 +972,54 @@ static int get_master_version_and_clock(
Note: we could have put a @@SERVER_ID in the previous SELECT
UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
*/
- if (!mysql_real_query(mysql,
- STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
- (master_res= mysql_store_result(mysql)))
- {
- if ((master_row= mysql_fetch_row(master_res)) &&
- (::server_id == strtoul(master_row[1], 0, 10)) &&
- !mi->rli.replicate_same_server_id)
+ query_re= mysql_real_query(mysql, STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'"));
+ if (query_re)
+ {
+ DBUG_RETURN(2);
+ }
+ else
+ {
+ master_res= mysql_store_result(mysql);
+ if (master_res)
{
- errmsg= "The slave I/O thread stops because master and slave have equal \
+ master_row= mysql_fetch_row(master_res);
+ if (master_row && (::server_id == strtoul(master_row[1], 0, 10)) &&
+ !mi->rli.replicate_same_server_id)
+ {
+ errmsg= "The slave I/O thread stops because master and slave have equal \
MySQL server ids; these ids must be different for replication to work (or \
the --replicate-same-server-id option must be used on slave but this does \
not always make sense; please check the manual before using it).";
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ }
+ else if (!master_row)
+ {
+ errmsg= "The slave I/O thread stops because the value of \
+the MySQL server id is NULL on master.";
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ }
+ else
+ {
+ //Do nothing.
+ }
+ mysql_free_result(master_res);
+
+ if (errmsg)
+ goto err;
+ }
+ else
+ {
+ errmsg= "The slave I/O thread stops because the value of \
+the MySQL server id is NULL on master.";
err_code= ER_SLAVE_FATAL_ERROR;
sprintf(err_buff, ER(err_code), errmsg);
- }
- mysql_free_result(master_res);
- if (errmsg)
goto err;
+ }
}
+
/*
Check that the master's global character_set_server and ours are the same.
Not fatal if query fails (old master?).
@@ -1008,23 +1041,53 @@ not always make sense; please check the
if (*mysql->server_version == '3')
goto err;
- if ((*mysql->server_version == '4') &&
- !mysql_real_query(mysql,
- STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
- (master_res= mysql_store_result(mysql)))
+ if (*mysql->server_version == '4')
{
- if ((master_row= mysql_fetch_row(master_res)) &&
- strcmp(master_row[0], global_system_variables.collation_server->name))
+ query_re= mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER"));
+ if (query_re)
+ {
+ DBUG_RETURN(2);
+ }
+ else
{
- errmsg= "The slave I/O thread stops because master and slave have \
+ master_res= mysql_store_result(mysql);
+ if (master_res)
+ {
+ master_row= mysql_fetch_row(master_res);
+ if (master_row && strcmp(master_row[0],
+ global_system_variables.collation_server->name))
+ {
+ errmsg= "The slave I/O thread stops because master and slave have \
different values for the COLLATION_SERVER global variable. The values must \
be equal for replication to work";
- err_code= ER_SLAVE_FATAL_ERROR;
- sprintf(err_buff, ER(err_code), errmsg);
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ }
+ else if (!master_row)
+ {
+ errmsg= "The slave I/O thread stops because the value of \
+the COLLATION_SERVER global variable is NULL on master.";
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ }
+ else
+ {
+ //Do nothing.
+ }
+ mysql_free_result(master_res);
+
+ if (errmsg)
+ goto err;
+ }
+ else
+ {
+ errmsg= "The slave I/O thread stops because the value of \
+the COLLATION_SERVER global variable is NULL on master.";
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ goto err;
+ }
}
- mysql_free_result(master_res);
- if (errmsg)
- goto err;
}
/*
@@ -1042,24 +1105,53 @@ be equal for replication to work";
This check is only necessary for 4.x masters (and < 5.0.4 masters but
those were alpha).
*/
- if ((*mysql->server_version == '4') &&
- !mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
- (master_res= mysql_store_result(mysql)))
- {
- if ((master_row= mysql_fetch_row(master_res)) &&
- strcmp(master_row[0],
- global_system_variables.time_zone->get_name()->ptr()))
+ if (*mysql->server_version == '4')
+ {
+ query_re= mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE"));
+ if (query_re)
{
- errmsg= "The slave I/O thread stops because master and slave have \
+ DBUG_RETURN(2);
+ }
+ else
+ {
+ master_res= mysql_store_result(mysql);
+ if (master_res)
+ {
+ master_row= mysql_fetch_row(master_res);
+ if (master_row && strcmp(master_row[0],
+ global_system_variables.time_zone->get_name()->ptr()))
+ {
+ errmsg= "The slave I/O thread stops because master and slave have \
different values for the TIME_ZONE global variable. The values must \
be equal for replication to work";
- err_code= ER_SLAVE_FATAL_ERROR;
- sprintf(err_buff, ER(err_code), errmsg);
- }
- mysql_free_result(master_res);
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ }
+ else if (!master_row)
+ {
+ errmsg= "The slave I/O thread stops because the value of \
+the TIME_ZONE global variable is NULL on master.";
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ }
+ else
+ {
+ //Do nothing.
+ }
+ mysql_free_result(master_res);
- if (errmsg)
- goto err;
+ if (errmsg)
+ goto err;
+ }
+ else
+ {
+ errmsg= "The slave I/O thread stops because the value of \
+the TIME_ZONE global variable is NULL on master.";
+ err_code= ER_SLAVE_FATAL_ERROR;
+ sprintf(err_buff, ER(err_code), errmsg);
+ goto err;
+ }
+ }
}
err:
@@ -2372,6 +2464,7 @@ pthread_handler_t handle_slave_io(void *
char llbuff[22];
uint retry_count;
bool suppress_warnings;
+ int get_master_version_and_clock_re;
#ifndef DBUG_OFF
uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0;
#endif
@@ -2382,7 +2475,7 @@ pthread_handler_t handle_slave_io(void *
DBUG_ASSERT(mi->inited);
mysql= NULL ;
retry_count= 0;
-
+
pthread_mutex_lock(&mi->run_lock);
/* Inform waiting threads that slave has started */
mi->slave_run_id++;
@@ -2451,8 +2544,28 @@ connected:
mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
thd->slave_net = &mysql->net;
thd_proc_info(thd, "Checking master version");
- if (get_master_version_and_clock(mysql, mi))
+ get_master_version_and_clock_re= get_master_version_and_clock(mysql, mi);
+ if (get_master_version_and_clock_re == 1)
+ {
goto err;
+ }
+ else if (get_master_version_and_clock_re == 2)
+ {
+ //Retry to connect because the error was caused by a transient network problem
+ if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+ reconnect_messages[SLAVE_RECON_ACT_REG]))
+ {
+ goto err;
+ }
+ else
+ {
+ goto connected;
+ }
+ }
+ else
+ {
+ //Do nothing.
+ }
if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
{
Attachment: [text/bzr-bundle] bzr/dao-gang.qu@sun.com-20090623083721-1lf1nq7wo1bvpjqv.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-bugteam branch (Dao-Gang.Qu:2941) Bug#45214 | Dao-Gang.Qu | 23 Jun |