List:Commits« Previous MessageNext Message »
From:rsomla Date:January 23 2007 7:20am
Subject:bk commit into 5.1 tree (rafal:1.2373)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of rafal. When rafal does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-01-23 08:19:53+01:00, rafal@quant.(none) +2 -0
  Internal commit, please ignore.

  sql/slave.cc@stripped, 2007-01-23 08:19:51+01:00, rafal@quant.(none) +99 -82
    - Added info about log coordinates to error info.
    
    - Moved code printing THD warnings inside exec_relay_log_event() after 
      event execution.
    
    - Always print exit mesage of SQL thread.
    
    - Added error messages to helper functions of the I/O thread.

  sql/slave.h@stripped, 2007-01-23 08:19:51+01:00, rafal@quant.(none) +7 -0
    Added macros for printing various log coordinates.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	rafal
# Host:	quant.(none)
# Root:	/ext/mysql/bk/mysql-5.1-bug21842

--- 1.302/sql/slave.cc	2007-01-23 08:20:00 +01:00
+++ 1.303/sql/slave.cc	2007-01-23 08:20:00 +01:00
@@ -1178,9 +1178,8 @@
   if (simple_command(mysql, COM_REGISTER_SLAVE, (char*) buf,
                         (uint) (pos- buf), 0))
   {
-    sql_print_error("Error on COM_REGISTER_SLAVE: %d '%s'",
-                    mysql_errno(mysql),
-                    mysql_error(mysql));
+    sql_print_error("Slave I/O thread: Error %d (%s) while executing Register Slave command",
+                    mysql_errno(mysql), mysql_error(mysql));
     DBUG_RETURN(1);
   }
   DBUG_RETURN(0);
@@ -1510,9 +1509,8 @@
     if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
       *suppress_warnings= 1;                    // Suppress reconnect warning
     else
-      sql_print_error("Error on COM_BINLOG_DUMP: %d  %s, will retry in %d secs",
-                      mysql_errno(mysql), mysql_error(mysql),
-                      master_connect_retry);
+      sql_print_error("Slave I/O thread: Error %d (%s) when executing Binlog Dump command",
+                      mysql_errno(mysql), mysql_error(mysql));
     DBUG_RETURN(1);
   }
 
@@ -1746,6 +1744,8 @@
 {
   DBUG_ENTER("exec_relay_log_event");
 
+  char llbuf[22], llbuf1[22];
+  
   /*
      We acquire this mutex since we need it for all operations except
      event execution. But we will release it in places where we will
@@ -1764,9 +1764,8 @@
   if (rli->until_condition!=RELAY_LOG_INFO::UNTIL_NONE &&
       rli->is_until_satisfied())
   {
-    char buf[22];
     sql_print_information("Slave SQL thread stopped because it reached its"
-                    " UNTIL position %s", llstr(rli->until_pos(), buf));
+                    " UNTIL position %s", llstr(rli->until_pos(), llbuf));
     /*
       Setting abort_slave flag because we do not want additional message about
       error in query execution to be printed.
@@ -1776,17 +1775,17 @@
     DBUG_RETURN(1);
   }
 
-  Log_event * ev = next_event(rli);
+  Log_event *ev= next_event(rli);
 
   DBUG_ASSERT(rli->sql_thd==thd);
-
+  
   if (sql_slave_killed(thd,rli))
   {
     sql_print_information("Slave SQL thread killed while waiting for next event.");
     pthread_mutex_unlock(&rli->data_lock);
     delete ev;
     DBUG_RETURN(1);
-  }
+  } 
 
   if (ev)
   {
@@ -1895,7 +1894,48 @@
     exec_res= ev->exec_event(rli);
     
     DBUG_ASSERT(rli->sql_thd==thd);
-     
+    
+    /*
+      Retrieve as much info as possible from the thd and, error codes and warnings
+      and print this to the error log as to allow the user to locate the error.
+      However, do not scare the user if SQL thread was simply killed or stopped. 
+    */
+    if (exec_res && !sql_slave_killed(thd,rli))
+    {      
+      int last_errno= thd->net.last_errno;
+      
+      if (last_errno != 0)
+      {
+        if (rli->last_slave_errno == 0)
+        {
+          slave_print_msg(ERROR_LEVEL, rli, last_errno,
+            "Slave SQL thread: error after executing event: %s",
+            thd->net.last_error ?
+            thd->net.last_error : "<no message>");
+        }
+        else if (last_errno != ER_UNKNOWN_ERROR && rli->last_slave_errno != last_errno)
+        {
+          // TODO: reconsider printing unknown errors when their desctiptions are
+          //       meaningful. In that case how to avoid double printing messages
+          //       reported via slave_print_msg?
+           
+          sql_print_error("Slave SQL thread: Additional error info: %s Error_code: %d",
+                          thd->net.last_error ?
+                          thd->net.last_error : "<no message>",
+                          thd->net.last_errno);
+        }
+      }
+  
+      /* Print any warnings issued */
+      List_iterator_fast<MYSQL_ERROR> it(thd->warn_list);
+      MYSQL_ERROR *err;
+      
+      while ((err= it++))
+        if( (int)err->code != rli->last_slave_errno && (int)err->code != last_errno )
+          sql_print_warning("Slave SQL thread: %s Error_code: %d",err->msg, err->code);
+      
+    } // if (!sql_slave_killed(thd,rli))
+
     /*
        Format_description_log_event should not be deleted because it will be
        used to read info about the relay log's format; it will be deleted when
@@ -1927,9 +1967,14 @@
           init_master_info()).
           b) init_relay_log_pos(), because the BEGIN may be an older relay log.
         */
+        sql_print_information("Slave SQL thread: Transient error when executing event "
+                              "at %s in %s.", RPL_EVENT_POS(llbuf), RPL_EVENT_LOG_NAME);
+
         if (rli->trans_retries < slave_trans_retries)
         {
-          sql_print_information("Slave SQL thread: Transient error, re-executing event group.");
+          sql_print_information("Slave SQL thread: Re-executing event group "
+                                "from pos %s in %s", RPL_GROUP_POS(llbuf), 
+                                RPL_GROUP_LOG_NAME);
           
           if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
             sql_print_error("Slave SQL thread: Failed to initialize "
@@ -1956,9 +2001,9 @@
           }
         }
         else
-          sql_print_error("Slave SQL thread retried transaction %lu time(s) "
-                          "in vain, giving up. Consider raising the value of "
-                          "the slave_transaction_retries variable.",
+          sql_print_error("Slave SQL thread: Retried execution of an event group "
+                          "for %lu time(s) in vain, giving up. Consider raising the "
+                          "value of the slave_transaction_retries variable.",
                           slave_trans_retries);
       }
       else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
@@ -1983,19 +2028,30 @@
 
     }  // if (slave_trans_retries)
 
+    if (exec_res)
+      sql_print_error("Slave SQL thread: Error detected when executing event "
+                      "at pos %s in %s.", RPL_EVENT_POS(llbuf), RPL_EVENT_LOG_NAME);
+      
     DBUG_RETURN(exec_res);
   }
   
   pthread_mutex_unlock(&rli->data_lock);
   
+  /* 
+    FIXME: Move reporting of error inside next_event() function where more info
+    is avaliable.
+  */
+  
   slave_print_msg(ERROR_LEVEL, rli, 0, 
-    "Could not parse relay log event entry. The possible reasons are: the master's "
-    "binary log is corrupted (you can check this by running 'mysqlbinlog' on the "
-    "binary log), the slave's relay log is corrupted (you can check this by running "
-    "'mysqlbinlog' on the relay log), a network problem, or a bug in the master's "
-    "or slave's MySQL code. If you want to check the master's binary log or slave's "
-    "relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' "
-    "on this slave."
+    "Slave SQL thread: Failed to read event at position %s in relay log %s. "
+    "The possible reasons are: the master's binary log is corrupted "
+    "(you can check this by running 'mysqlbinlog' on the binary log), "
+    "the slave's relay log is corrupted "
+    "(you can check this by running 'mysqlbinlog' on the relay log), "
+    "a network problem, or a bug in the master's or slave's MySQL code. "
+    "If you want to check the master's binary log or slave's relay log, "
+    "you will be able to know their names by issuing 'SHOW SLAVE STATUS' "
+    "on this slave.", RPL_EVENT_POS(llbuf), RPL_EVENT_LOG_NAME
   );
   
   DBUG_RETURN(1);
@@ -2120,7 +2176,8 @@
   // we can get killed during connection
   if (io_slave_killed(thd,mi))
   {
-    sql_print_information("Slave I/O thread killed while connecting to master");
+    sql_print_information("Slave I/O thread: Killed while (re)connecting to master, "
+                          "retry_count= %d", retry_count);
     goto err;
   }
   
@@ -2459,7 +2516,7 @@
     }
   }
 
-  /* Read queries from the IO/THREAD until this thread is killed */
+  /* Read events from the relay log until this thread is killed */
 
   while (!sql_slave_killed(thd,rli))
   {
@@ -2469,69 +2526,17 @@
     
     if (exec_relay_log_event(thd,rli))
     {
-      // FIXME: move whole error reporting inside exec_relay_log_event where we have
-      // more information to report. Alternatively, move all event execution logic 
-      // into this loop and leave to exec_relay_log_event the simple task of executing
-      // a single event.
-      
-      // do not scare the user if SQL thread was simply killed or stopped
-      if (!sql_slave_killed(thd,rli))
-      {
-        /*
-          retrieve as much info as possible from the thd and, error codes and warnings
-          and print this to the error log as to allow the user to locate the error
-        */
-        
-        int last_errno= thd->net.last_errno;
-        
-        if (last_errno != 0)
-        {
-          if (rli->last_slave_errno == 0)
-          {
-            slave_print_msg(ERROR_LEVEL, rli, last_errno,
-              "Slave SQL thread: error after executing event: %s",
-              thd->net.last_error ?
-              thd->net.last_error : "<no message>");
-          }
-          else if (last_errno != ER_UNKNOWN_ERROR && rli->last_slave_errno != last_errno)
-          {
-            // TODO: reconsider printing unknown errors when their desctiptions are
-            //       meaningful. In that case how to avoid double printing messages
-            //       reported via slave_print_msg?
-             
-            sql_print_error("Slave SQL thread: Additional error info: %s Error_code: %d",
-                            thd->net.last_error ?
-                            thd->net.last_error : "<no message>",
-                            thd->net.last_errno);
-          }
-        }
-
-        /* Print any warnings issued */
-        List_iterator_fast<MYSQL_ERROR> it(thd->warn_list);
-        MYSQL_ERROR *err;
-        
-        while ((err= it++))
-          if( (int)err->code != rli->last_slave_errno && (int)err->code != last_errno )
-            sql_print_warning("Slave SQL thread: %s Error_code: %d",err->msg, err->code);
-
-        sql_print_error(
-          "Slave SQL thread: Error executing event; thread aborted. "
-          "Fix the problem, and restart the slave SQL thread with \"SLAVE START\". "
-          "We stopped at log '%s' position %s", 
-          RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff)
-        );
-      }
       goto err;
     }
   }
 
+ err:
+
   /* Thread stopped. Print the current replication position to the log */
   sql_print_information("Slave SQL thread exiting, replication stopped in log "
                         "'%s' at position %s",
                         RPL_LOG_NAME, llstr(rli->group_master_log_pos,llbuff));
 
- err:
-
   /*
     Some events set some playgrounds, which won't be cleared because thread
     stops. Stopping of this thread may not be known to these events ("stop"
@@ -2723,7 +2728,10 @@
   safe_mutex_assert_owner(&mi->data_lock);
 
   if (unlikely(!rev->is_valid()))
+  {
+    sql_print_error("Slave I/O thread: Encountered invalid Rotate event.");
     DBUG_RETURN(1);
+  }
 
   /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
   memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1);
@@ -2949,16 +2957,18 @@
                            ulong event_len)
 {
   DBUG_ENTER("queue_old_event");
+  
+  int ver;
 
-  switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
+  switch (ver= mi->rli.relay_log.description_event_for_queue->binlog_version)
   {
   case 1:
       DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
   case 3:
       DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
   default: /* unsupported format; eg version 2 */
-    DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
-                       mi->rli.relay_log.description_event_for_queue->binlog_version));
+    sql_print_information("Slave I/O event: Unsupported binlog version %d.", ver);
+    DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()", ver));
     DBUG_RETURN(1);
   }
 }
@@ -3002,6 +3012,7 @@
       event from the next binlog (unless the master is presently running
       without --log-bin).
     */
+    sql_print_information("Slave I/O thread: Got Stop event from master.");
     goto err;
   case ROTATE_EVENT:
   {
@@ -3035,6 +3046,7 @@
           Log_event::read_log_event(buf, event_len, &errmsg,
                                     mi->rli.relay_log.description_event_for_queue)))
     {
+      sql_print_error("Slave I/O thread: Error '%s' when reading Format description event.",errmsg);
       error= 2;
       goto err;
     }
@@ -3100,6 +3112,8 @@
       DBUG_ASSERT(rli->ign_master_log_name_end[0]);
       rli->ign_master_log_pos_end= mi->master_log_pos;
     }
+    
+    sql_print_information("Slave I/O thread: Ignoring event originating with the same server.");
     rli->relay_log.signal_update(); // the slave SQL thread needs to re-check
     DBUG_PRINT("info", ("master_log_pos: %lu, event originating from the same server, ignored",
                         (ulong) mi->master_log_pos));
@@ -3114,7 +3128,10 @@
       rli->relay_log.harvest_bytes_written(&rli->log_space_total);
     }
     else
+    {
+      sql_print_error("Slave I/O thread: Error writing event to relay log.");
       error= 3;
+    }
     rli->ign_master_log_name_end[0]= 0; // last event is not ignored
   }
   pthread_mutex_unlock(log_lock);

--- 1.102/sql/slave.h	2007-01-23 08:20:00 +01:00
+++ 1.103/sql/slave.h	2007-01-23 08:20:00 +01:00
@@ -118,6 +118,13 @@
  "FIRST")
 #define IO_RPL_LOG_NAME (mi->master_log_name[0] ? mi->master_log_name :\
  "FIRST")
+#define RPL_SAFE_LOG_NAME(N) ((N)[0] ? (N) : "FIRST")
+#define RPL_EVENT_LOG_NAME  RPL_SAFE_LOG_NAME(rli->event_relay_log_name)
+#define RPL_GROUP_LOG_NAME  RPL_SAFE_LOG_NAME(rli->group_relay_log_name)
+#define RPL_MASTER_LOG_NAME  RPL_SAFE_LOG_NAME(rli->group_master_log_name)
+#define RPL_EVENT_POS(BUF)  llstr(rli->event_relay_log_pos,(BUF))
+#define RPL_GROUP_POS(BUF)  llstr(rli->group_relay_log_pos,(BUF))
+#define RPL_MASTER_GROUP_POS(BUF)  llstr(rli->group_master_log_pos,(BUF))
 
 /*
   If the following is set, if first gives an error, second will be
Thread
bk commit into 5.1 tree (rafal:1.2373)rsomla23 Jan