MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:alfranio correia Date:December 5 2008 1:04pm
Subject:Re: bzr commit into mysql-5.1 branch (luis.soares:2720) Bug#38826
Bug#39325
View as plain text  
Luis Soares wrote:

Hi Luis,

The patch is approved.

Find in-line comments.

> #At file:///stuff/workspace/mysql-server/bugfix/5.1-bugteam-38826-39325/
>
>  2720 Luis Soares	2008-12-04 [merge]
>       BUG#38826 Race in MYSQL_LOG::purge_logs is impossible to debug in production
>       BUG#39325 Server crash inside MYSQL_LOG::purge_first_log halts replicaiton
>       
>       The patch reverses the order of the purging and updating events for log and
>       relay-log.info/index files respectively.
>       
>       This solves the problem of having holes caused by crashes happening between
> updating
>       info/index files and purging logs.
>       
>       This patch also contains an aditional test case for testing the crashing before
> purge logs.
>             
>       NOTE1: This is a combined patch for BUG#38826 and BUG#39325. This patch is
> based on
>       bugteam tree and takes into account reviewers suggestions.
>       
>       NOTE2: Merge from 5.0-bugteam
> added:
>   mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result
>   mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test
> modified:
>   mysql-test/include/wait_until_connected_again.inc
>   mysql-test/r/binlog_index.result
>   sql/log.cc
>   sql/log.h
>
> === modified file 'mysql-test/include/wait_until_connected_again.inc'
> --- a/mysql-test/include/wait_until_connected_again.inc	2006-09-28 09:53:42 +0000
> +++ b/mysql-test/include/wait_until_connected_again.inc	2008-12-04 14:07:56 +0000
> @@ -6,7 +6,7 @@
>  let $counter= 500;
>  while ($mysql_errno)
>  {
> -  --error 0,2002,2006
> +  --error 0,2002,2006,2003
>    show status;
>  
>    dec $counter;
>
> === modified file 'mysql-test/r/binlog_index.result'
> --- a/mysql-test/r/binlog_index.result	2008-04-01 10:36:20 +0000
> +++ b/mysql-test/r/binlog_index.result	2008-12-04 14:07:56 +0000
> @@ -21,6 +21,7 @@ flush logs;
>  *** must be a warning master-bin.000001 was not found ***
>  Warnings:
>  Warning	1612	Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
> +Warning	1612	Being purged log MYSQLTEST_VARDIR/log/master-bin.000001 was not found
>  *** must show one record, of the active binlog, left in the index file after PURGE
> ***
>  show binary logs;
>  Log_name	File_size
>
> === added file 'mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result'
> --- a/mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result	1970-01-01 00:00:00
> +0000
> +++ b/mysql-test/suite/rpl/r/rpl_crash_before_purge_logs.result	2008-12-04 14:07:56
> +0000
> @@ -0,0 +1,14 @@
> +stop slave;
> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> +reset master;
> +reset slave;
> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
> +start slave;
> +stop slave;
> +DROP TABLE IF EXISTS t1;
> +CREATE TABLE t1 (id INT);
> +start slave;
> +SET GLOBAL debug= "+d,crash_before_purge_logs";
> +FLUSH LOGS;
> +ERROR HY000: Lost connection to MySQL server during query
> +start slave;
>
> === added file 'mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test'
> --- a/mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test	1970-01-01 00:00:00
> +0000
> +++ b/mysql-test/suite/rpl/t/rpl_crash_before_purge_logs.test	2008-12-04 14:07:56
> +0000
>   
You need to describe what is the purpose of the test.
Put some echo commands to show its progress too.
This easies debugging in case of failures. ;)

That's all.

> @@ -0,0 +1,57 @@
> +--source include/master-slave.inc
> +--source include/have_debug.inc
> +--disable_reconnect
> +
> +# We have to sync with master, to ensure slave had time to start properly
> +# # before we stop it. If not, we get errors about UNIX_TIMESTAMP() in the
> +# log.
> +sync_slave_with_master;
> +stop slave;
> +--source include/wait_for_slave_to_stop.inc
> +
> +# ON MASTER 
> +
> +connection master;
> +
> +--disable_warnings
> +DROP TABLE IF EXISTS t1;
> +--enable_warnings
> +
> +CREATE TABLE t1 (id INT);
> +
> +let $1=100;
> +disable_query_log;
> +begin;
> +while ($1)
> +{
> +  eval INSERT INTO t1 VALUES( $1 );
> +  dec $1;
> +}
> +DROP TABLE t1;
> +save_master_pos;
> +
> +enable_query_log;
> +
> +## ON SLAVE
> +
> +connection slave;
> +start slave;
> +--source include/wait_for_slave_to_start.inc
> +sync_with_master 0;
> +connection master;
> +save_master_pos;
> +connection slave;
> +
> +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/slave0.expect
> +SET GLOBAL debug= "+d,crash_before_purge_logs";
> +
> +--error 2013
> +# try to rotate logs
> +FLUSH LOGS;
> +
> +--enable_reconnect
> +--source include/wait_until_connected_again.inc
> +start slave;
> +--source include/wait_for_slave_to_start.inc
> +
> +sync_with_master 0;
>
> === modified file 'sql/log.cc'
> --- a/sql/log.cc	2008-11-06 14:18:25 +0000
> +++ b/sql/log.cc	2008-12-04 14:07:56 +0000
> @@ -2359,6 +2359,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG()
>    */
>    index_file_name[0] = 0;
>    bzero((char*) &index_file, sizeof(index_file));
> +  bzero((char*) &purge_temp, sizeof(purge_temp));
>  }
>  
>  /* this is called only once */
> @@ -2953,6 +2954,7 @@ err:
>  int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
>  {
>    int error;
> +  char *to_purge_if_included= NULL;
>    DBUG_ENTER("purge_first_log");
>  
>    DBUG_ASSERT(is_open());
> @@ -2960,36 +2962,20 @@ int MYSQL_BIN_LOG::purge_first_log(Relay
>    DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
>  
>    pthread_mutex_lock(&LOCK_index);
> -  pthread_mutex_lock(&rli->log_space_lock);
> -  rli->relay_log.purge_logs(rli->group_relay_log_name, included,
> -                            0, 0, &rli->log_space_total);
> -  // Tell the I/O thread to take the relay_log_space_limit into account
> -  rli->ignore_log_space_limit= 0;
> -  pthread_mutex_unlock(&rli->log_space_lock);
> +  to_purge_if_included= my_strdup(rli->group_relay_log_name, MYF(0));
>  
>    /*
> -    Ok to broadcast after the critical region as there is no risk of
> -    the mutex being destroyed by this thread later - this helps save
> -    context switches
> -  */
> -  pthread_cond_broadcast(&rli->log_space_cond);
> -  
> -  /*
>      Read the next log file name from the index file and pass it back to
> -    the caller
> -    If included is true, we want the first relay log;
> -    otherwise we want the one after event_relay_log_name.
> -  */
> -  if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
> -      (!included &&
> -       ((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))
> ||
> -        (error=find_next_log(&rli->linfo, 0)))))
> +    the caller.
> +  */
> +  if((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) || 
> +     (error=find_next_log(&rli->linfo, 0)))
>    {
>      char buff[22];
>      sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
>                      error,
>                      llstr(rli->linfo.index_file_offset,buff),
> -                    rli->group_relay_log_name,
> +                    rli->event_relay_log_name,
>                      included);
>      goto err;
>    }
> @@ -3017,7 +3003,42 @@ int MYSQL_BIN_LOG::purge_first_log(Relay
>    /* Store where we are in the new file for the execution thread */
>    flush_relay_log_info(rli);
>  
> +  DBUG_EXECUTE_IF("crash_before_purge_logs", abort(););
> +
> +  pthread_mutex_lock(&rli->log_space_lock);
> +  rli->relay_log.purge_logs(to_purge_if_included, included,
> +                            0, 0, &rli->log_space_total);
> +  // Tell the I/O thread to take the relay_log_space_limit into account
> +  rli->ignore_log_space_limit= 0;
> +  pthread_mutex_unlock(&rli->log_space_lock);
> +
> +  /*
> +    Ok to broadcast after the critical region as there is no risk of
> +    the mutex being destroyed by this thread later - this helps save
> +    context switches
> +  */
> +  pthread_cond_broadcast(&rli->log_space_cond);
> +
> +  /*
> +   * Need to update the log pos because purge logs has been called 
> +   * after fetching initially the log pos at the begining of the method.
> +   */
> +  if(error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0))
> +  {
> +    char buff[22];
> +    sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
> +                    error,
> +                    llstr(rli->linfo.index_file_offset,buff),
> +                    rli->group_relay_log_name,
> +                    included);
> +    goto err;
> +  }
> +
> +  /* If included was passed, rli->linfo should be the first entry. */
> +  DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
> +
>  err:
> +  my_free(to_purge_if_included, MYF(0));
>    pthread_mutex_unlock(&LOCK_index);
>    DBUG_RETURN(error);
>  }
> @@ -3077,8 +3098,36 @@ int MYSQL_BIN_LOG::purge_logs(const char
>  
>    if (need_mutex)
>      pthread_mutex_lock(&LOCK_index);
> -  if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
> +  if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) 
> +  {
> +    sql_print_error("MYSQL_LOG::purge_logs was called with file %s not "
> +                    "listed in the index.", to_log);
>      goto err;
> +  }
> +
> +  /*
> +    For crash recovery reasons the index needs to be updated before
> +    any files are deleted. Move files to be deleted into a temp file
> +    to be processed after the index is updated.
> +  */
> +  if (!my_b_inited(&purge_temp))
> +  {
> +    if (error=open_cached_file(&purge_temp, mysql_tmpdir, TEMP_PREFIX,
> +                               DISK_BUFFER_SIZE, MYF(MY_WME)))
> +    {
> +      sql_print_error("MYSQL_LOG::purge_logs failed to open purge_temp");
> +      goto err;
> +    }
> +  }
> +  else
> +  {
> +    if (error=reinit_io_cache(&purge_temp, WRITE_CACHE, 0, 0, 1))
> +    {
> +      sql_print_error("MYSQL_LOG::purge_logs failed to reinit purge_temp "
> +                      "for write");
> +      goto err;
> +    }
> +  }
>  
>    /*
>      File name exists in index file; delete until we find this file
> @@ -3089,6 +3138,61 @@ int MYSQL_BIN_LOG::purge_logs(const char
>    while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
>           !log_in_use(log_info.log_file_name))
>    {
> +    if ((error=my_b_write(&purge_temp, (const uchar*)log_info.log_file_name,
> +                          strlen(log_info.log_file_name))) ||
> +        (error=my_b_write(&purge_temp, (const uchar*)"\n", 1)))
> +    {
> +      sql_print_error("MYSQL_LOG::purge_logs failed to copy %s to purge_temp",
> +                      log_info.log_file_name);
> +      goto err;
> +    }
> +
> +    if (find_next_log(&log_info, 0) || exit_loop)
> +      break;
> + }
> +
> +  /* We know how many files to delete. Update index file. */
> +  if (error=update_log_index(&log_info, need_update_threads))
> +  {
> +    sql_print_error("MSYQL_LOG::purge_logs failed to update the index file");
> +    goto err;
> +  }
> +
> +  DBUG_EXECUTE_IF("crash_after_update_index", abort(););
> +
> +  /* Switch purge_temp for read. */
> +  if (error=reinit_io_cache(&purge_temp, READ_CACHE, 0, 0, 0))
> +  {
> +    sql_print_error("MSYQL_LOG::purge_logs failed to reinit purge_temp "
> +                    "for read");
> +    goto err;
> +  }
> +
> +  /* Read each entry from purge_temp and delete the file. */
> +  for (;;)
> +  {
> +    uint length;
> +
> +    if ((length=my_b_gets(&purge_temp, log_info.log_file_name,
> +                          FN_REFLEN)) <= 1)
> +    {
> +      if (purge_temp.error)
> +      {
> +        error= purge_temp.error;
> +        sql_print_error("MSYQL_LOG::purge_logs error %d reading from "
> +                        "purge_temp", error);
> +        goto err;
> +      }
> +
> +      /* Reached EOF */
> +      break;
> +    }
> +
> +    /* Get rid of the trailing '\n' */
> +    log_info.log_file_name[length-1]= 0;
> +
> +    ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
> +
>      MY_STAT s;
>      if (!my_stat(log_info.log_file_name, &s, MYF(0)))
>      {
> @@ -3189,20 +3293,6 @@ int MYSQL_BIN_LOG::purge_logs(const char
>          }
>        }
>      }
> -
> -    ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
> -
> -    if (find_next_log(&log_info, 0) || exit_loop)
> -      break;
> -  }
> -  
> -  /*
> -    If we get killed -9 here, the sysadmin would have to edit
> -    the log index file after restart - otherwise, this should be safe
> -  */
> -  error= update_log_index(&log_info, need_update_threads);
> -  if (error == 0) {
> -    error = ret;
>    }
>  
>  err:
> @@ -3216,7 +3306,7 @@ err:
>    index file.
>  
>    @param thd		Thread pointer
> -  @param before_date	Delete all log files before given date.
> +  @param purge_time	Delete all log files before given date.
>  
>    @note
>      If any of the logs before the deleted one is in use,
> @@ -3233,6 +3323,7 @@ err:
>  int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
>  {
>    int error;
> +  char to_log[FN_REFLEN];
>    LOG_INFO log_info;
>    MY_STAT stat_area;
>    THD *thd= current_thd;
> @@ -3240,12 +3331,8 @@ int MYSQL_BIN_LOG::purge_logs_before_dat
>    DBUG_ENTER("purge_logs_before_date");
>  
>    pthread_mutex_lock(&LOCK_index);
> +  to_log[0]= 0;
>  
> -  /*
> -    Delete until we find curren file
> -    or a file that is used or a file
> -    that is older than purge_time.
> -  */
>    if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
>      goto err;
>  
> @@ -3295,55 +3382,18 @@ int MYSQL_BIN_LOG::purge_logs_before_dat
>      }
>      else
>      {
> -      if (stat_area.st_mtime >= purge_time)
> +      if (stat_area.st_mtime < purge_time) 
> +        strmake(to_log, 
> +                log_info.log_file_name, 
> +                sizeof(log_info.log_file_name));
> +      else
>          break;
> -      if (my_delete(log_info.log_file_name, MYF(0)))
> -      {
> -        if (my_errno == ENOENT) 
> -        {
> -          /* It's not fatal even if we can't delete a log file */
> -          if (thd)
> -          {
> -            push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
> -                                ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
> -                                log_info.log_file_name);
> -          }
> -          sql_print_information("Failed to delete file '%s'",
> -                                log_info.log_file_name);
> -          my_errno= 0;
> -        }
> -        else
> -        {
> -          if (thd)
> -          {
> -            push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
> -                                ER_BINLOG_PURGE_FATAL_ERR,
> -                                "a problem with deleting %s; "
> -                                "consider examining correspondence "
> -                                "of your binlog index file "
> -                                "to the actual binlog files",
> -                                log_info.log_file_name);
> -          }
> -          else
> -          {
> -            sql_print_information("Failed to delete log file '%s'",
> -                                  log_info.log_file_name); 
> -          }
> -          error= LOG_INFO_FATAL;
> -          goto err;
> -        }
> -      }
> -      ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
>      }
>      if (find_next_log(&log_info, 0))
>        break;
>    }
>  
> -  /*
> -    If we get killed -9 here, the sysadmin would have to edit
> -    the log index file after restart - otherwise, this should be safe
> -  */
> -  error= update_log_index(&log_info, 1);
> +  error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
>  
>  err:
>    pthread_mutex_unlock(&LOCK_index);
>
> === modified file 'sql/log.h'
> --- a/sql/log.h	2008-09-03 20:04:07 +0000
> +++ b/sql/log.h	2008-12-04 14:07:56 +0000
> @@ -233,6 +233,13 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
>    pthread_cond_t update_cond;
>    ulonglong bytes_written;
>    IO_CACHE index_file;
> +  /*
> +    purge_temp is a temp file used in purge_logs so that the index file
> +    can be updated before deleting files from disk, yielding better crash
> +    recovery. It is created on demand the first time purge_logs is called
> +    and then reused for subsequent calls. It is cleaned up in cleanup().
> +  */
> +  IO_CACHE purge_temp;
>    char index_file_name[FN_REFLEN];
>    /*
>       The max size before rotation (usable only if log_type == LOG_BIN: binary
>
>
>   

Thread
bzr commit into mysql-5.1 branch (luis.soares:2720) Bug#38826 Bug#39325Luis Soares4 Dec
  • Re: bzr commit into mysql-5.1 branch (luis.soares:2720) Bug#38826Bug#39325alfranio correia5 Dec