List:Commits« Previous MessageNext Message »
From:Dao-Gang.Qu Date:June 23 2009 8:37am
Subject:bzr commit into mysql-5.1-bugteam branch (Dao-Gang.Qu:2941) Bug#45214
View as plain text  
#At file:///home/daogangq/mysql/bzrwork/bug45214/5.1-bt/ based on revid:davi.arnaut@stripped

 2941 Dao-Gang.Qu@stripped	2009-06-23
      Bug #45214  get_master_version_and_clock does not report error when queries fail
      
      The "get_master_version_and_clock(...)" function in sql/slave.cc ignores error and passes directly 
      when queries get a failed response from master because of network transient disconnection between master and slave 
      or get a successful response but the result retrieved is NULL.
      
      The "get_master_version_and_clock(...)" function should retry to connect master if the queries get a failed response 
      from master because of network transient disconnection between master and slave. And it should report a NULL value error 
      and stop the slave I/O thread if the queries get a successful response but the result retrieved from master is NULL.
     @ sql/slave.cc
        Update the slave.cc for fixing bug #45214

    modified:
      sql/slave.cc
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2009-06-09 16:44:26 +0000
+++ b/sql/slave.cc	2009-06-23 08:37:21 +0000
@@ -864,7 +864,7 @@ static int get_master_version_and_clock(
   MYSQL_RES *master_res= 0;
   MYSQL_ROW master_row;
   DBUG_ENTER("get_master_version_and_clock");
-
+  int query_re= 0;
   /*
     Free old description_event_for_queue (that is needed if we are in
     a reconnection).
@@ -938,8 +938,9 @@ static int get_master_version_and_clock(
     Compare the master and slave's clock. Do not die if master's clock is
     unavailable (very old master not supporting UNIX_TIMESTAMP()?).
   */
-
-  if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
+  DBUG_SYNC_POINT("debug_lock.before_get_UNIX_TIMESTAMP", 1000);
+  query_re= mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()"));
+  if (!query_re &&
       (master_res= mysql_store_result(mysql)) &&
       (master_row= mysql_fetch_row(master_res)))
   {
@@ -954,6 +955,10 @@ static int get_master_version_and_clock(
                       "SLAVE STATUS. Error: %s (%d)",
                       mysql_error(mysql), mysql_errno(mysql));
   }
+  else if (query_re)
+  {
+    DBUG_RETURN(2);
+  }
   if (master_res)
     mysql_free_result(master_res);
 
@@ -967,26 +972,54 @@ static int get_master_version_and_clock(
     Note: we could have put a @@SERVER_ID in the previous SELECT
     UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
   */
-  if (!mysql_real_query(mysql,
-                        STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
-      (master_res= mysql_store_result(mysql)))
-  {
-    if ((master_row= mysql_fetch_row(master_res)) &&
-        (::server_id == strtoul(master_row[1], 0, 10)) &&
-        !mi->rli.replicate_same_server_id)
+  query_re= mysql_real_query(mysql, STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'"));
+  if (query_re)
+  {
+    DBUG_RETURN(2);
+  }
+  else
+  {
+    master_res= mysql_store_result(mysql);
+    if (master_res)
     {
-      errmsg= "The slave I/O thread stops because master and slave have equal \
+      master_row= mysql_fetch_row(master_res);
+      if (master_row && (::server_id == strtoul(master_row[1], 0, 10)) &&
+          !mi->rli.replicate_same_server_id)
+      {
+        errmsg= "The slave I/O thread stops because master and slave have equal \
 MySQL server ids; these ids must be different for replication to work (or \
 the --replicate-same-server-id option must be used on slave but this does \
 not always make sense; please check the manual before using it).";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, ER(err_code), errmsg);
+      }
+      else if (!master_row)
+      {
+        errmsg= "The slave I/O thread stops because the value of \
+the MySQL server id is NULL on master.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, ER(err_code), errmsg);
+      }
+      else
+      {
+        //Do nothing.
+      }
+      mysql_free_result(master_res);
+
+      if (errmsg)
+        goto err;
+    }
+    else
+    {
+      errmsg= "The slave I/O thread stops because the value of \
+the MySQL server id is NULL on master.";
       err_code= ER_SLAVE_FATAL_ERROR;
       sprintf(err_buff, ER(err_code), errmsg);
-    }
-    mysql_free_result(master_res);
-    if (errmsg)
       goto err;
+    }
   }
 
+ 
   /*
     Check that the master's global character_set_server and ours are the same.
     Not fatal if query fails (old master?).
@@ -1008,23 +1041,53 @@ not always make sense; please check the 
   if (*mysql->server_version == '3')
     goto err;
 
-  if ((*mysql->server_version == '4') &&
-      !mysql_real_query(mysql,
-                        STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
-      (master_res= mysql_store_result(mysql)))
+  if (*mysql->server_version == '4')
   {
-    if ((master_row= mysql_fetch_row(master_res)) &&
-        strcmp(master_row[0], global_system_variables.collation_server->name))
+    query_re= mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER"));
+    if (query_re)
+    {
+      DBUG_RETURN(2);
+    }
+    else
     {
-      errmsg= "The slave I/O thread stops because master and slave have \
+      master_res= mysql_store_result(mysql);
+      if (master_res)
+      {
+        master_row= mysql_fetch_row(master_res);
+        if (master_row && strcmp(master_row[0],
+                                 global_system_variables.collation_server->name))
+        {
+          errmsg= "The slave I/O thread stops because master and slave have \
 different values for the COLLATION_SERVER global variable. The values must \
 be equal for replication to work";
-      err_code= ER_SLAVE_FATAL_ERROR;
-      sprintf(err_buff, ER(err_code), errmsg);
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, ER(err_code), errmsg);
+        }
+        else if (!master_row)
+        {
+          errmsg= "The slave I/O thread stops because the value of \
+the COLLATION_SERVER global variable is NULL on master.";
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, ER(err_code), errmsg);
+        }
+        else
+        {
+          //Do nothing.
+        }
+        mysql_free_result(master_res);
+
+        if (errmsg)
+          goto err;
+      }
+      else
+      {
+        errmsg= "The slave I/O thread stops because the value of \
+the COLLATION_SERVER global variable is NULL on master.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, ER(err_code), errmsg);
+        goto err;
+      }
     }
-    mysql_free_result(master_res);
-    if (errmsg)
-      goto err;
   }
 
   /*
@@ -1042,24 +1105,53 @@ be equal for replication to work";
     This check is only necessary for 4.x masters (and < 5.0.4 masters but
     those were alpha).
   */
-  if ((*mysql->server_version == '4') &&
-      !mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
-      (master_res= mysql_store_result(mysql)))
-  {
-    if ((master_row= mysql_fetch_row(master_res)) &&
-        strcmp(master_row[0],
-               global_system_variables.time_zone->get_name()->ptr()))
+  if (*mysql->server_version == '4')
+  {
+    query_re= mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE"));
+    if (query_re)
     {
-      errmsg= "The slave I/O thread stops because master and slave have \
+      DBUG_RETURN(2);
+    }
+    else
+    {
+      master_res= mysql_store_result(mysql);
+      if (master_res)
+      {
+        master_row= mysql_fetch_row(master_res);
+        if (master_row && strcmp(master_row[0],
+                                 global_system_variables.time_zone->get_name()->ptr()))
+        {
+          errmsg= "The slave I/O thread stops because master and slave have \
 different values for the TIME_ZONE global variable. The values must \
 be equal for replication to work";
-      err_code= ER_SLAVE_FATAL_ERROR;
-      sprintf(err_buff, ER(err_code), errmsg);
-    }
-    mysql_free_result(master_res);
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, ER(err_code), errmsg);
+        }
+        else if (!master_row)
+        {
+          errmsg= "The slave I/O thread stops because the value of \
+the TIME_ZONE global variable is NULL on master.";
+          err_code= ER_SLAVE_FATAL_ERROR;
+          sprintf(err_buff, ER(err_code), errmsg);
+        }
+        else
+        {
+          //Do nothing.
+        }
+        mysql_free_result(master_res);
 
-    if (errmsg)
-      goto err;
+        if (errmsg)
+          goto err;
+      }
+      else
+      {
+        errmsg= "The slave I/O thread stops because the value of \
+the TIME_ZONE global variable is NULL on master.";
+        err_code= ER_SLAVE_FATAL_ERROR;
+        sprintf(err_buff, ER(err_code), errmsg);
+        goto err;
+      }
+    }
   }
 
 err:
@@ -2372,6 +2464,7 @@ pthread_handler_t handle_slave_io(void *
   char llbuff[22];
   uint retry_count;
   bool suppress_warnings;
+  int get_master_version_and_clock_re;
 #ifndef DBUG_OFF
   uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0;
 #endif
@@ -2382,7 +2475,7 @@ pthread_handler_t handle_slave_io(void *
   DBUG_ASSERT(mi->inited);
   mysql= NULL ;
   retry_count= 0;
-
+  
   pthread_mutex_lock(&mi->run_lock);
   /* Inform waiting threads that slave has started */
   mi->slave_run_id++;
@@ -2451,8 +2544,28 @@ connected:
   mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
   thd->slave_net = &mysql->net;
   thd_proc_info(thd, "Checking master version");
-  if (get_master_version_and_clock(mysql, mi))
+  get_master_version_and_clock_re= get_master_version_and_clock(mysql, mi);
+  if (get_master_version_and_clock_re == 1)
+  {
     goto err;
+  } 
+  else if (get_master_version_and_clock_re == 2) 
+  {
+    //Retry to connect because the error was caused by a transient network problem
+    if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                             reconnect_messages[SLAVE_RECON_ACT_REG]))
+    {
+      goto err;
+    }
+    else 
+    {
+      goto connected;
+    }
+  } 
+  else
+  {
+    //Do nothing.
+  }
 
   if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
   {


Attachment: [text/bzr-bundle] bzr/dao-gang.qu@sun.com-20090623083721-1lf1nq7wo1bvpjqv.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (Dao-Gang.Qu:2941) Bug#45214Dao-Gang.Qu23 Jun