List:Commits« Previous MessageNext Message »
From:Luis Soares Date:December 4 2008 3:08pm
Subject:bzr commit into mysql-5.1 branch (luis.soares:2720) Bug#38826 Bug#39325
View as plain text  
#At file:///stuff/workspace/mysql-server/bugfix/5.1-bugteam-38826-39325/

 2720 Luis Soares	2008-12-04 [merge]
      BUG#38826 Race in MYSQL_LOG::purge_logs is impossible to debug in production
      BUG#39325 Server crash inside MYSQL_LOG::purge_first_log halts replicaiton
      
      The patch reverses the order of the purging and updating events for log and
      relay-log.info/index files respectively.
      
      This solves the problem of having holes caused by crashes happening between updating
      info/index files and purging logs.
      
      This patch also contains an aditional test case for testing the crashing before purge logs.
            
      NOTE1: This is a combined patch for BUG#38826 and BUG#39325. This patch is based on
      bugteam tree and takes into account reviewers suggestions.
      
      NOTE2: Merge from 5.0-bugteam
added:
  mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result
  mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test
modified:
  mysql-test/include/wait_until_connected_again.inc
  mysql-test/r/binlog_index.result
  sql/log.cc
  sql/log.h

=== modified file 'mysql-test/include/wait_until_connected_again.inc'
--- a/mysql-test/include/wait_until_connected_again.inc	2006-09-28 09:53:42 +0000
+++ b/mysql-test/include/wait_until_connected_again.inc	2008-12-04 14:07:56 +0000
@@ -6,7 +6,7 @@
 let $counter= 500;
 while ($mysql_errno)
 {
-  --error 0,2002,2006
+  --error 0,2002,2006,2003
   show status;
 
   dec $counter;

=== modified file 'mysql-test/r/binlog_index.result'
--- a/mysql-test/r/binlog_index.result	2008-04-01 10:36:20 +0000
+++ b/mysql-test/r/binlog_index.result	2008-12-04 14:07:56 +0000
@@ -21,6 +21,7 @@ flush logs;
 *** must be a warning master-bin.000001 was not found ***
 Warnings:
 Warning	1612	Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
+Warning	1612	Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
 *** must show one record, of the active binlog, left in the index file after PURGE ***
 show binary logs;
 Log_name	File_size

=== added file 'mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result'
--- a/mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result	2008-12-04 14:07:56 +0000
@@ -0,0 +1,14 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+stop slave;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (id INT);
+start slave;
+SET GLOBAL debug= "+d,crash_before_purge_logs";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+start slave;

=== added file 'mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test'
--- a/mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test	2008-12-04 14:07:56 +0000
@@ -0,0 +1,57 @@
+--source include/master-slave.inc
+--source include/have_debug.inc
+--disable_reconnect
+
+# We have to sync with master, to ensure slave had time to start properly
+# # before we stop it. If not, we get errors about UNIX_TIMESTAMP() in the
+# log.
+sync_slave_with_master;
+stop slave;
+--source include/wait_for_slave_to_stop.inc
+
+# ON MASTER 
+
+connection master;
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (id INT);
+
+let $1=100;
+disable_query_log;
+begin;
+while ($1)
+{
+  eval INSERT INTO t1 VALUES( $1 );
+  dec $1;
+}
+DROP TABLE t1;
+save_master_pos;
+
+enable_query_log;
+
+## ON SLAVE
+
+connection slave;
+start slave;
+--source include/wait_for_slave_to_start.inc
+sync_with_master 0;
+connection master;
+save_master_pos;
+connection slave;
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/slave0.expect
+SET GLOBAL debug= "+d,crash_before_purge_logs";
+
+--error 2013
+# try to rotate logs
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+start slave;
+--source include/wait_for_slave_to_start.inc
+
+sync_with_master 0;

=== modified file 'sql/log.cc'
--- a/sql/log.cc	2008-11-06 14:18:25 +0000
+++ b/sql/log.cc	2008-12-04 14:07:56 +0000
@@ -2359,6 +2359,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG()
   */
   index_file_name[0] = 0;
   bzero((char*) &index_file, sizeof(index_file));
+  bzero((char*) &purge_temp, sizeof(purge_temp));
 }
 
 /* this is called only once */
@@ -2953,6 +2954,7 @@ err:
 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
 {
   int error;
+  char *to_purge_if_included= NULL;
   DBUG_ENTER("purge_first_log");
 
   DBUG_ASSERT(is_open());
@@ -2960,36 +2962,20 @@ int MYSQL_BIN_LOG::purge_first_log(Relay
   DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
 
   pthread_mutex_lock(&LOCK_index);
-  pthread_mutex_lock(&rli->log_space_lock);
-  rli->relay_log.purge_logs(rli->group_relay_log_name, included,
-                            0, 0, &rli->log_space_total);
-  // Tell the I/O thread to take the relay_log_space_limit into account
-  rli->ignore_log_space_limit= 0;
-  pthread_mutex_unlock(&rli->log_space_lock);
+  to_purge_if_included= my_strdup(rli->group_relay_log_name, MYF(0));
 
   /*
-    Ok to broadcast after the critical region as there is no risk of
-    the mutex being destroyed by this thread later - this helps save
-    context switches
-  */
-  pthread_cond_broadcast(&rli->log_space_cond);
-  
-  /*
     Read the next log file name from the index file and pass it back to
-    the caller
-    If included is true, we want the first relay log;
-    otherwise we want the one after event_relay_log_name.
-  */
-  if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
-      (!included &&
-       ((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
-        (error=find_next_log(&rli->linfo, 0)))))
+    the caller.
+  */
+  if((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) || 
+     (error=find_next_log(&rli->linfo, 0)))
   {
     char buff[22];
     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
                     error,
                     llstr(rli->linfo.index_file_offset,buff),
-                    rli->group_relay_log_name,
+                    rli->event_relay_log_name,
                     included);
     goto err;
   }
@@ -3017,7 +3003,42 @@ int MYSQL_BIN_LOG::purge_first_log(Relay
   /* Store where we are in the new file for the execution thread */
   flush_relay_log_info(rli);
 
+  DBUG_EXECUTE_IF("crash_before_purge_logs", abort(););
+
+  pthread_mutex_lock(&rli->log_space_lock);
+  rli->relay_log.purge_logs(to_purge_if_included, included,
+                            0, 0, &rli->log_space_total);
+  // Tell the I/O thread to take the relay_log_space_limit into account
+  rli->ignore_log_space_limit= 0;
+  pthread_mutex_unlock(&rli->log_space_lock);
+
+  /*
+    Ok to broadcast after the critical region as there is no risk of
+    the mutex being destroyed by this thread later - this helps save
+    context switches
+  */
+  pthread_cond_broadcast(&rli->log_space_cond);
+
+  /*
+   * Need to update the log pos because purge logs has been called 
+   * after fetching initially the log pos at the begining of the method.
+   */
+  if(error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))
+  {
+    char buff[22];
+    sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
+                    error,
+                    llstr(rli->linfo.index_file_offset,buff),
+                    rli->group_relay_log_name,
+                    included);
+    goto err;
+  }
+
+  /* If included was passed, rli->linfo should be the first entry. */
+  DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
+
 err:
+  my_free(to_purge_if_included, MYF(0));
   pthread_mutex_unlock(&LOCK_index);
   DBUG_RETURN(error);
 }
@@ -3077,8 +3098,36 @@ int MYSQL_BIN_LOG::purge_logs(const char
 
   if (need_mutex)
     pthread_mutex_lock(&LOCK_index);
-  if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
+  if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) 
+  {
+    sql_print_error("MYSQL_LOG::purge_logs was called with file %s not "
+                    "listed in the index.", to_log);
     goto err;
+  }
+
+  /*
+    For crash recovery reasons the index needs to be updated before
+    any files are deleted. Move files to be deleted into a temp file
+    to be processed after the index is updated.
+  */
+  if (!my_b_inited(&purge_temp))
+  {
+    if (error=open_cached_file(&purge_temp, mysql_tmpdir, TEMP_PREFIX,
+                               DISK_BUFFER_SIZE, MYF(MY_WME)))
+    {
+      sql_print_error("MYSQL_LOG::purge_logs failed to open purge_temp");
+      goto err;
+    }
+  }
+  else
+  {
+    if (error=reinit_io_cache(&purge_temp, WRITE_CACHE, 0, 0, 1))
+    {
+      sql_print_error("MYSQL_LOG::purge_logs failed to reinit purge_temp "
+                      "for write");
+      goto err;
+    }
+  }
 
   /*
     File name exists in index file; delete until we find this file
@@ -3089,6 +3138,61 @@ int MYSQL_BIN_LOG::purge_logs(const char
   while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
          !log_in_use(log_info.log_file_name))
   {
+    if ((error=my_b_write(&purge_temp, (const uchar*)log_info.log_file_name,
+                          strlen(log_info.log_file_name))) ||
+        (error=my_b_write(&purge_temp, (const uchar*)"\n", 1)))
+    {
+      sql_print_error("MYSQL_LOG::purge_logs failed to copy %s to purge_temp",
+                      log_info.log_file_name);
+      goto err;
+    }
+
+    if (find_next_log(&log_info, 0) || exit_loop)
+      break;
+ }
+
+  /* We know how many files to delete. Update index file. */
+  if (error=update_log_index(&log_info, need_update_threads))
+  {
+    sql_print_error("MSYQL_LOG::purge_logs failed to update the index file");
+    goto err;
+  }
+
+  DBUG_EXECUTE_IF("crash_after_update_index", abort(););
+
+  /* Switch purge_temp for read. */
+  if (error=reinit_io_cache(&purge_temp, READ_CACHE, 0, 0, 0))
+  {
+    sql_print_error("MSYQL_LOG::purge_logs failed to reinit purge_temp "
+                    "for read");
+    goto err;
+  }
+
+  /* Read each entry from purge_temp and delete the file. */
+  for (;;)
+  {
+    uint length;
+
+    if ((length=my_b_gets(&purge_temp, log_info.log_file_name,
+                          FN_REFLEN)) <= 1)
+    {
+      if (purge_temp.error)
+      {
+        error= purge_temp.error;
+        sql_print_error("MSYQL_LOG::purge_logs error %d reading from "
+                        "purge_temp", error);
+        goto err;
+      }
+
+      /* Reached EOF */
+      break;
+    }
+
+    /* Get rid of the trailing '\n' */
+    log_info.log_file_name[length-1]= 0;
+
+    ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
+
     MY_STAT s;
     if (!my_stat(log_info.log_file_name, &s, MYF(0)))
     {
@@ -3189,20 +3293,6 @@ int MYSQL_BIN_LOG::purge_logs(const char
         }
       }
     }
-
-    ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
-
-    if (find_next_log(&log_info, 0) || exit_loop)
-      break;
-  }
-  
-  /*
-    If we get killed -9 here, the sysadmin would have to edit
-    the log index file after restart - otherwise, this should be safe
-  */
-  error= update_log_index(&log_info, need_update_threads);
-  if (error == 0) {
-    error = ret;
   }
 
 err:
@@ -3216,7 +3306,7 @@ err:
   index file.
 
   @param thd		Thread pointer
-  @param before_date	Delete all log files before given date.
+  @param purge_time	Delete all log files before given date.
 
   @note
     If any of the logs before the deleted one is in use,
@@ -3233,6 +3323,7 @@ err:
 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
 {
   int error;
+  char to_log[FN_REFLEN];
   LOG_INFO log_info;
   MY_STAT stat_area;
   THD *thd= current_thd;
@@ -3240,12 +3331,8 @@ int MYSQL_BIN_LOG::purge_logs_before_dat
   DBUG_ENTER("purge_logs_before_date");
 
   pthread_mutex_lock(&LOCK_index);
+  to_log[0]= 0;
 
-  /*
-    Delete until we find curren file
-    or a file that is used or a file
-    that is older than purge_time.
-  */
   if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
     goto err;
 
@@ -3295,55 +3382,18 @@ int MYSQL_BIN_LOG::purge_logs_before_dat
     }
     else
     {
-      if (stat_area.st_mtime >= purge_time)
+      if (stat_area.st_mtime < purge_time) 
+        strmake(to_log, 
+                log_info.log_file_name, 
+                sizeof(log_info.log_file_name));
+      else
         break;
-      if (my_delete(log_info.log_file_name, MYF(0)))
-      {
-        if (my_errno == ENOENT) 
-        {
-          /* It's not fatal even if we can't delete a log file */
-          if (thd)
-          {
-            push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
-                                ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
-                                log_info.log_file_name);
-          }
-          sql_print_information("Failed to delete file '%s'",
-                                log_info.log_file_name);
-          my_errno= 0;
-        }
-        else
-        {
-          if (thd)
-          {
-            push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
-                                ER_BINLOG_PURGE_FATAL_ERR,
-                                "a problem with deleting %s; "
-                                "consider examining correspondence "
-                                "of your binlog index file "
-                                "to the actual binlog files",
-                                log_info.log_file_name);
-          }
-          else
-          {
-            sql_print_information("Failed to delete log file '%s'",
-                                  log_info.log_file_name); 
-          }
-          error= LOG_INFO_FATAL;
-          goto err;
-        }
-      }
-      ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
     }
     if (find_next_log(&log_info, 0))
       break;
   }
 
-  /*
-    If we get killed -9 here, the sysadmin would have to edit
-    the log index file after restart - otherwise, this should be safe
-  */
-  error= update_log_index(&log_info, 1);
+  error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
 
 err:
   pthread_mutex_unlock(&LOCK_index);

=== modified file 'sql/log.h'
--- a/sql/log.h	2008-09-03 20:04:07 +0000
+++ b/sql/log.h	2008-12-04 14:07:56 +0000
@@ -233,6 +233,13 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
   pthread_cond_t update_cond;
   ulonglong bytes_written;
   IO_CACHE index_file;
+  /*
+    purge_temp is a temp file used in purge_logs so that the index file
+    can be updated before deleting files from disk, yielding better crash
+    recovery. It is created on demand the first time purge_logs is called
+    and then reused for subsequent calls. It is cleaned up in cleanup().
+  */
+  IO_CACHE purge_temp;
   char index_file_name[FN_REFLEN];
   /*
      The max size before rotation (usable only if log_type == LOG_BIN: binary

Thread
bzr commit into mysql-5.1 branch (luis.soares:2720) Bug#38826 Bug#39325Luis Soares4 Dec
  • Re: bzr commit into mysql-5.1 branch (luis.soares:2720) Bug#38826Bug#39325alfranio correia5 Dec