List:Commits« Previous MessageNext Message »
From:ramil Date:June 26 2007 8:32am
Subject:bk commit into 5.1 tree (ramil:1.2509) BUG#19328
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of ram. When ram does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-26 13:32:30+05:00, ramil@stripped +1 -0
  Fix for bug #19328: Slave timeout with COM_REGISTER_SLAVE error causing stop
  
  Problem: "Under high load, the slave registering to the master can timeout 
  during the COM_REGISTER_SLAVE execution. This causes an error, which 
  prevents the slave from connecting at all."
  
  Fix: Do not abort immediately, but retry registering on master.

  sql/slave.cc@stripped, 2007-06-26 13:32:28+05:00, ramil@stripped +116 -99
    Fix for bug #19328: Slave timeout with COM_REGISTER_SLAVE error causing stop
      - retry connecting after a failed slave registration on master.
        Reconnecting has been factored out (see try_to_reconnect() and 
        check_io_slave_killed()) to get rid of similar code used during 
        registering on master/dump requesting/event reading.
        All messages have been moved to reconnect_messages[] array 
        for easy usage and maintenance.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	ramil
# Host:	ramil.myoffice.izhnet.ru
# Root:	/home/ram/work/b19328/b19328.5.1

--- 1.312/sql/slave.cc	2007-06-26 13:32:34 +05:00
+++ 1.313/sql/slave.cc	2007-06-26 13:32:34 +05:00
@@ -59,6 +59,37 @@ ulonglong relay_log_space_limit = 0;
 int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
 int events_till_abort = -1;
 
+static const char *reconnect_messages[3][6]=
+{
+  {
+    "Waiting to reconnect after a failed registration on master",
+    "Slave I/O thread killed while waitnig to reconnect after a failed \
+registration on master",
+    "Reconnecting after a failed registration on master",
+    "failed registering on master, reconnecting to try again",
+    "COM_REGISTER_SLAVE",
+    "Slave I/O thread killed during or after reconnect"
+  },
+  {
+    "Waiting to reconnect after a failed binlog dump request",
+    "Slave I/O thread killed while retrying master dump",
+    "Reconnecting after a failed binlog dump request",
+    "failed dump request, reconnecting to try again",
+    "COM_BINLOG_DUMP",
+    "Slave I/O thread killed during or after reconnect"
+  },
+  {
+    "Waiting to reconnect after a failed master event read",
+    "Slave I/O thread killed while waiting to reconnect after a failed read",
+    "Reconnecting after a failed master event read",
+    "failed reading log event, reconnecting to try again",
+    "",
+    "Slave I/O thread killed during or after a reconnect done to recover from \
+failed read"
+  }
+};
+ 
+
 typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
 
 static int process_io_rotate(MASTER_INFO* mi, Rotate_log_event* rev);
@@ -1098,12 +1129,14 @@ static void write_ignored_events_info_to
 }
 
 
-int register_slave_on_master(MYSQL* mysql, MASTER_INFO *mi)
+int register_slave_on_master(MYSQL* mysql, MASTER_INFO *mi,
+                             bool *suppress_warnings)
 {
   uchar buf[1024], *pos= buf;
   uint report_host_len, report_user_len=0, report_password_len=0;
   DBUG_ENTER("register_slave_on_master");
 
+  *suppress_warnings= FALSE;
   if (!report_host)
     DBUG_RETURN(0);
   report_host_len= strlen(report_host);
@@ -1127,11 +1160,18 @@ int register_slave_on_master(MYSQL* mysq
 
   if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (size_t) (pos- buf), 0))
   {
-    char buf[256];
-    my_snprintf(buf, sizeof(buf),
-                "%s (Errno: %d)", mysql_error(mysql), mysql_errno(mysql));
-    mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
-               ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
+    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
+    {
+      *suppress_warnings= TRUE;                 // Suppress reconnect warning
+    }
+    else
+    {
+      char buf[256];
+      my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql), 
+                  mysql_errno(mysql));
+      mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
+                 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
+    }
     DBUG_RETURN(1);
   }
   DBUG_RETURN(0);
@@ -1460,6 +1500,8 @@ static int request_dump(MYSQL* mysql, MA
   int binlog_flags = 0; // for now
   char* logname = mi->master_log_name;
   DBUG_ENTER("request_dump");
+  
+  *suppress_warnings= FALSE;
 
   // TODO if big log files: Change next to int8store()
   int4store(buf, (ulong) mi->master_log_pos);
@@ -1475,7 +1517,7 @@ static int request_dump(MYSQL* mysql, MA
       now we just fill up the error log :-)
     */
     if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
-      *suppress_warnings= 1;                    // Suppress reconnect warning
+      *suppress_warnings= TRUE;                 // Suppress reconnect warning
     else
       sql_print_error("Error on COM_BINLOG_DUMP: %d  %s, will retry in %d secs",
                       mysql_errno(mysql), mysql_error(mysql),
@@ -1539,7 +1581,7 @@ static ulong read_event(MYSQL* mysql, MA
   ulong len;
   DBUG_ENTER("read_event");
 
-  *suppress_warnings= 0;
+  *suppress_warnings= FALSE;
   /*
     my_real_read() will time us out
     We check if we were told to die, and if not, try reading again
@@ -1879,6 +1921,56 @@ on this slave.\
 }
 
 
+static bool check_io_slave_killed(THD *thd, MASTER_INFO *mi, const char *info)
+{
+  if (io_slave_killed(thd, mi))
+  {
+    if (global_system_variables.log_warnings)
+      sql_print_information(info);
+    return TRUE;
+  }
+  return FALSE;
+}
+
+
+static int try_to_reconnect(THD *thd, MYSQL *mysql, MASTER_INFO *mi,
+                            uint *retry_count, bool suppress_warnings,
+                            const char *messages[6])
+{
+  mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
+  thd->proc_info= messages[0];
+#ifdef SIGNAL_WITH_VIO_CLOSE  
+  thd->clear_active_vio();
+#endif
+  end_server(mysql);
+  if (*retry_count++)
+  {
+    if (*retry_count > master_retry_count)
+      return 1;                             // Don't retry forever
+    safe_sleep(thd, mi->connect_retry, (CHECK_KILLED_FUNC) io_slave_killed,
+               (void *) mi);
+  }
+  if (check_io_slave_killed(thd, mi, messages[1]))
+    return 1;
+  thd->proc_info = messages[2];
+  if (!suppress_warnings) 
+  {
+    char buf[256], llbuff[22];
+    my_snprintf(buf, sizeof(buf), messages[3], " log '%s' at postion %s",
+                 IO_RPL_LOG_NAME, llstr(mi->master_log_pos, llbuff));
+    mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
+               ER(ER_SLAVE_MASTER_COM_FAILURE), messages[4], buf);
+  }
+  if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi))
+  {
+    if (global_system_variables.log_warnings)
+      sql_print_information(messages[5]);
+    return 1;
+  }
+  return 0;
+}
+
+
 /* Slave I/O Thread entry point */
 
 pthread_handler_t handle_slave_io(void *arg)
@@ -1889,6 +1981,7 @@ pthread_handler_t handle_slave_io(void *
   RELAY_LOG_INFO *rli= &mi->rli;
   char llbuff[22];
   uint retry_count;
+  bool suppress_warnings;
 
   // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
   my_thread_init();
@@ -1975,79 +2068,36 @@ connected:
       Register ourselves with the master.
     */
     thd->proc_info = "Registering slave on master";
-    if (register_slave_on_master(mysql, mi))
+    if (register_slave_on_master(mysql, mi, &suppress_warnings))
     {
       sql_print_error("Slave I/O thread couldn't register on master");
-      goto err;
+      if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
+registering slave on master") ||
+          try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                           reconnect_messages[0]))
+        goto err;
+      goto connected;
     }
   }
 
   DBUG_PRINT("info",("Starting reading binary log from master"));
   while (!io_slave_killed(thd,mi))
   {
-    bool suppress_warnings= 0;
     thd->proc_info = "Requesting binlog dump";
     if (request_dump(mysql, mi, &suppress_warnings))
     {
       sql_print_error("Failed on request_dump()");
-      if (io_slave_killed(thd,mi))
-      {
-        sql_print_information("Slave I/O thread killed while requesting master \
-dump");
+      if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
+requesting master dump") ||
+          try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                           reconnect_messages[1]))
         goto err;
-      }
-
-      mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
-      thd->proc_info= "Waiting to reconnect after a failed binlog dump request";
-#ifdef SIGNAL_WITH_VIO_CLOSE
-      thd->clear_active_vio();
-#endif
-      end_server(mysql);
-      /*
-        First time retry immediately, assuming that we can recover
-        right away - if first time fails, sleep between re-tries
-        hopefuly the admin can fix the problem sometime
-      */
-      if (retry_count++)
-      {
-        if (retry_count > master_retry_count)
-          goto err;                             // Don't retry forever
-        safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed,
-                   (void*)mi);
-      }
-      if (io_slave_killed(thd,mi))
-      {
-        sql_print_information("Slave I/O thread killed while retrying master \
-dump");
-        goto err;
-      }
-
-      thd->proc_info = "Reconnecting after a failed binlog dump request";
-      if (!suppress_warnings) {
-        char buf[256];
-        my_snprintf(buf, sizeof(buf),
-                    "failed dump request, reconnecting to try again,"
-                    " log '%s' at postion %s",
-                    IO_RPL_LOG_NAME,
-                    llstr(mi->master_log_pos,llbuff));
-        mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
-                   ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_BINLOG_DUMP", buf);
-      }
-      if (safe_reconnect(thd, mysql, mi, suppress_warnings) ||
-          io_slave_killed(thd,mi))
-      {
-        sql_print_information("Slave I/O thread killed during or \
-after reconnect");
-        goto err;
-      }
-
       goto connected;
     }
 
     while (!io_slave_killed(thd,mi))
     {
       ulong event_len;
-      suppress_warnings= 0;
       /*
          We say "waiting" because read_event() will wait if there's nothing to
          read. But if there's something to read, it will not wait. The
@@ -2056,12 +2106,9 @@ after reconnect");
       */
       thd->proc_info= "Waiting for master to send event";
       event_len= read_event(mysql, mi, &suppress_warnings);
-      if (io_slave_killed(thd,mi))
-      {
-        if (global_system_variables.log_warnings)
-          sql_print_information("Slave I/O thread killed while reading event");
+      if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
+reading event"))
         goto err;
-      }
 
       if (event_len == packet_error)
       {
@@ -2081,39 +2128,9 @@ max_allowed_packet",
                           mysql_error(mysql));
           goto err;
         }
-        mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
-        thd->proc_info = "Waiting to reconnect after a failed master event read";
-#ifdef SIGNAL_WITH_VIO_CLOSE
-        thd->clear_active_vio();
-#endif
-        end_server(mysql);
-        if (retry_count++)
-        {
-          if (retry_count > master_retry_count)
-            goto err;                           // Don't retry forever
-          safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed,
-                     (void*) mi);
-        }
-        if (io_slave_killed(thd,mi))
-        {
-          if (global_system_variables.log_warnings)
-            sql_print_information("Slave I/O thread killed while waiting to \
-reconnect after a failed read");
-          goto err;
-        }
-        thd->proc_info = "Reconnecting after a failed master event read";
-        if (!suppress_warnings)
-          sql_print_information("Slave I/O thread: Failed reading log event, \
-reconnecting to retry, log '%s' position %s", IO_RPL_LOG_NAME,
-                          llstr(mi->master_log_pos, llbuff));
-        if (safe_reconnect(thd, mysql, mi, suppress_warnings) ||
-            io_slave_killed(thd,mi))
-        {
-          if (global_system_variables.log_warnings)
-            sql_print_information("Slave I/O thread killed during or after a \
-reconnect done to recover from failed read");
+        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
+                             reconnect_messages[2]))
           goto err;
-        }
         goto connected;
       } // if (event_len == packet_error)
 
Thread
bk commit into 5.1 tree (ramil:1.2509) BUG#19328ramil26 Jun