List:Commits« Previous MessageNext Message »
From:Alfranio Correia Date:December 8 2008 1:40am
Subject:bzr commit into mysql-6.0-rpl branch (alfranio.correia:2747) Bug#40337
View as plain text  
#At file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-40337/mysql-6.0-rpl/

 2747 Alfranio Correia	2008-12-08
      BUG#40337. Introduced four options to improve reliability in the slave:
      
      . (--sync-master-info, integer) which syncs the master.info after #th event;
      . (--sync-relay-log, integer) which syncs the relay-log.bin* after #th events.
      . (--sync-relay-log-info, integer) which syncs the relay.info after #th transactions.
      . (--relay-log-recovery, boolean) which enables a recovery mechanism that
      throws away relay-log.bin* after a crash.
      
      The recovery process is carried on because such files may be corrupted due to
      a crash. This obliges to re-fetch events from them master. It is advisable
      to use this feature, when the parameter --syn-relay-log-info = 1.
      
      This supersedes the patch proposed to fix bugs: BUG#35542 and BUG#31665.
modified:
  sql/log.cc
  sql/log.h
  sql/mysql_priv.h
  sql/mysqld.cc
  sql/rpl_mi.cc
  sql/rpl_rli.cc
  sql/set_var.cc
  sql/set_var.h
  sql/slave.cc
  sql/sql_repl.cc
  sql/sql_repl.h

=== modified file 'sql/log.cc'
--- a/sql/log.cc	2008-11-19 17:29:41 +0000
+++ b/sql/log.cc	2008-12-08 01:40:22 +0000
@@ -4173,7 +4173,7 @@ const char *MYSQL_LOG::generate_name(con
 
 
 
-MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
+MYSQL_BIN_LOG::MYSQL_BIN_LOG(ulong *sync_period)
   :bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
    need_start_event(TRUE), m_table_map_version(0),
    sync_period_ptr(sync_period),
@@ -5409,7 +5409,7 @@ bool MYSQL_BIN_LOG::flush_and_sync(bool 
   safe_mutex_assert_owner(&LOCK_log);
   if (flush_io_cache(&log_file))
     return 1;
-  uint sync_period= get_sync_period();
+  ulong sync_period= get_sync_period();
   if (sync_period && ++sync_counter >= sync_period)
   {
     sync_counter= 0;

=== modified file 'sql/log.h'
--- a/sql/log.h	2008-11-19 17:29:41 +0000
+++ b/sql/log.h	2008-12-08 01:40:22 +0000
@@ -371,10 +371,10 @@ class MYSQL_BIN_LOG: public TC_LOG, priv
      sync_binlog_period, for relay log this will be
      sync_relay_log_period
   */
-  uint *sync_period_ptr;
-  uint sync_counter;
+  ulong *sync_period_ptr;
+  ulong sync_counter;
 
-  inline uint get_sync_period()
+  inline ulong get_sync_period()
   {
     return *sync_period_ptr;
   }
@@ -406,7 +406,7 @@ public:
   Format_description_log_event *description_event_for_exec,
     *description_event_for_queue;
 
-  MYSQL_BIN_LOG(uint *sync_period);
+  MYSQL_BIN_LOG(ulong *sync_period);
   /*
     note that there's no destructor ~MYSQL_BIN_LOG() !
     The reason is that we don't want it to be automatically called

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2008-11-12 15:23:22 +0000
+++ b/sql/mysql_priv.h	2008-12-08 01:40:22 +0000
@@ -2005,10 +2005,12 @@ extern ulong specialflag;
 #ifdef MYSQL_SERVER
 extern ulong current_pid;
 extern ulong expire_logs_days;
-extern uint sync_binlog_period, sync_relaylog_period;
+extern ulong sync_binlog_period, sync_relaylog_period, 
+             sync_relayloginfo_period, sync_masterinfo_period;
 extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
 extern ulong tc_log_page_waits;
 extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
+extern my_bool relay_log_recovery;
 extern uint test_flags,select_errors,ha_open_options;
 extern uint protocol_version, mysqld_port, dropping_tables;
 extern uint delay_key_write_options;

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2008-12-06 00:24:23 +0000
+++ b/sql/mysqld.cc	2008-12-08 01:40:22 +0000
@@ -492,6 +492,7 @@ extern const char *opt_ndb_distribution;
 extern enum ndb_distribution opt_ndb_distribution_id;
 #endif
 my_bool opt_readonly, use_temp_pool, relay_log_purge;
+my_bool relay_log_recovery;
 my_bool opt_sync_frm, opt_allow_suspicious_udfs;
 my_bool opt_secure_auth= 0;
 char* opt_secure_file_priv= 0;
@@ -573,7 +574,8 @@ ulong max_prepared_stmt_count;
 ulong prepared_stmt_count=0;
 ulong thread_id=1L,current_pid;
 ulong slow_launch_threads = 0;
-uint sync_binlog_period= 0, sync_relaylog_period= 0;
+ulong sync_binlog_period= 0, sync_relaylog_period= 0,
+      sync_relayloginfo_period= 0, sync_masterinfo_period= 0;
 ulong expire_logs_days = 0;
 ulong rpl_recovery_rank=0;
 const char *log_output_str= "FILE";
@@ -5778,6 +5780,7 @@ enum options_mysqld
   OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER,
   OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
   OPT_RELAY_LOG_PURGE,
+  OPT_RELAY_LOG_RECOVERY,
   OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
   OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING,
   OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
@@ -5851,6 +5854,8 @@ enum options_mysqld
   OPT_GENERAL_LOG_FILE,
   OPT_SLOW_QUERY_LOG_FILE,
   OPT_SYNC_RELAY_LOG,
+  OPT_SYNC_RELAY_LOG_INFO,
+  OPT_SYNC_MASTER_INFO,
   OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
   OPT_DEADLOCK_SEARCH_DEPTH_LONG,
   OPT_DEADLOCK_TIMEOUT_SHORT,
@@ -7096,6 +7101,13 @@ The minimum value for this variable is 4
    (uchar**) &relay_log_purge,
    (uchar**) &relay_log_purge, 0, GET_BOOL, NO_ARG,
    1, 0, 1, 0, 1, 0},
+  {"relay_log_recovery", OPT_RELAY_LOG_RECOVERY,
+   "Enables automatic relay log recovery right after the database startup, "
+   "which means that the IO Thread starts re-fetching from the master " 
+   "right after the last transaction processed.",
+   (uchar**) &relay_log_recovery,
+   (uchar**) &relay_log_recovery, 0, GET_BOOL, NO_ARG,
+   0, 0, 1, 0, 1, 0},
   {"relay_log_space_limit", OPT_RELAY_LOG_SPACE_LIMIT,
    "Maximum space to use for all relay logs.",
    (uchar**) &relay_log_space_limit,
@@ -7134,13 +7146,23 @@ The minimum value for this variable is 4
   {"sync-binlog", OPT_SYNC_BINLOG,
    "Synchronously flush binary log to disk after every #th event. "
    "Use 0 (default) to disable synchronous flushing.",
-   (uchar**) &sync_binlog_period, (uchar**) &sync_binlog_period, 0, GET_UINT,
-   REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
+   (uchar**) &sync_binlog_period, (uchar**) &sync_binlog_period, 0, GET_ULONG,
+   REQUIRED_ARG, 0, 0, ULONG_MAX, 0, 1, 0},
   {"sync-relay-log", OPT_SYNC_RELAY_LOG,
    "Synchronously flush relay log to disk after every #th event. "
    "Use 0 (default) to disable synchronous flushing.",
-   (uchar**) &sync_relaylog_period, (uchar**) &sync_relaylog_period, 0, GET_UINT,
-   REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
+   (uchar**) &sync_relaylog_period, (uchar**) &sync_relaylog_period, 0, GET_ULONG,
+   REQUIRED_ARG, 0, 0, ULONG_MAX, 0, 1, 0},
+  {"sync-relay-log-info", OPT_SYNC_RELAY_LOG_INFO,
+   "Synchronously flush relay log info to disk after #th transaction. "
+   "Use 0 (default) to disable synchronous flushing.",
+   (uchar**) &sync_relayloginfo_period, (uchar**) &sync_relayloginfo_period, 0, GET_ULONG,
+   REQUIRED_ARG, 0, 0, ULONG_MAX, 0, 1, 0},
+  {"sync-master-info", OPT_SYNC_MASTER_INFO,
+   "Synchronously flush master info to disk after every #th event. "
+   "Use 0 (default) to disable synchronous flushing.",
+   (uchar**) &sync_masterinfo_period, (uchar**) &sync_masterinfo_period, 0, GET_ULONG,
+   REQUIRED_ARG, 0, 0, ULONG_MAX, 0, 1, 0},
   {"sync-frm", OPT_SYNC_FRM, "Sync .frm to disk on create. Enabled by default.",
    (uchar**) &opt_sync_frm, (uchar**) &opt_sync_frm, 0, GET_BOOL, NO_ARG, 1, 0,
    0, 0, 0, 0},

=== modified file 'sql/rpl_mi.cc'
--- a/sql/rpl_mi.cc	2008-07-22 06:29:49 +0000
+++ b/sql/rpl_mi.cc	2008-12-08 01:40:22 +0000
@@ -412,17 +412,9 @@ int flush_master_info(Master_info* mi, b
     When we come to this place in code, relay log may or not be initialized;
     the caller is responsible for setting 'flush_relay_log_cache' accordingly.
   */
-  if (flush_relay_log_cache)
-  {
-    IO_CACHE *log_file= mi->rli.relay_log.get_log_file();
-    if (flush_io_cache(log_file))
-      DBUG_RETURN(2);
-
-    /* Sync to disk if --sync-relay-log is set */
-    if (sync_relaylog_period &&
-        my_sync(log_file->file, MY_WME))
-      DBUG_RETURN(2);
-  }
+  if (flush_relay_log_cache &&
+      flush_io_cache(mi->rli.relay_log.get_log_file()))
+    DBUG_RETURN(2);
 
   /*
     We flushed the relay log BEFORE the master.info file, because if we crash
@@ -477,8 +469,13 @@ int flush_master_info(Master_info* mi, b
               ignore_server_ids_buf);
   my_free(ignore_server_ids_buf, MYF(0));
   err= flush_io_cache(file);
-  if (sync_relaylog_period && !err)
+  static ulong sync_counter= 0;
+  if (sync_masterinfo_period && !err && 
+      ++sync_counter >= sync_masterinfo_period)
+  {
     err= my_sync(mi->fd, MYF(MY_WME));
+    sync_counter= 0;
+  }
   DBUG_RETURN(-err);
 }
 

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2008-10-07 10:26:19 +0000
+++ b/sql/rpl_rli.cc	2008-12-08 01:40:22 +0000
@@ -248,7 +248,8 @@ Failed to open the existing relay log in
     rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
     rli->group_master_log_pos= master_log_pos;
 
-    if (init_relay_log_pos(rli,
+    if (!is_relay_log_recovery &&
+        init_relay_log_pos(rli,
                            rli->group_relay_log_name,
                            rli->group_relay_log_pos,
                            0 /* no data lock*/,
@@ -269,7 +270,8 @@ Failed to open the existing relay log in
                         llstr(my_b_tell(rli->cur_log),llbuf1),
                         llstr(rli->event_relay_log_pos,llbuf2)));
     DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
-    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+    if (!is_relay_log_recovery)
+      DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
   }
 #endif
 

=== modified file 'sql/set_var.cc'
--- a/sql/set_var.cc	2008-11-19 17:29:41 +0000
+++ b/sql/set_var.cc	2008-12-08 01:40:22 +0000
@@ -1439,22 +1439,6 @@ static bool get_unsigned(THD *thd, set_v
 }
 
 
-bool sys_var_int_ptr::check(THD *thd, set_var *var)
-{
-  return get_unsigned(thd, var);
-}
-
-bool sys_var_int_ptr::update(THD *thd, set_var *var)
-{
-  *value= (uint) var->save_result.ulonglong_value;
-  return 0;
-}
-
-void sys_var_int_ptr::set_default(THD *thd, enum_var_type type)
-{
-  *value= (uint) option_limits->def_value;
-}
-
 sys_var_long_ptr::
 sys_var_long_ptr(sys_var_chain *chain, const char *name_arg, ulong *value_ptr_arg,
                  sys_after_update_func after_update_arg)

=== modified file 'sql/set_var.h'
--- a/sql/set_var.h	2008-11-12 15:23:22 +0000
+++ b/sql/set_var.h	2008-12-08 01:40:22 +0000
@@ -162,28 +162,6 @@ public:
   { return (uchar*) value; }
 };
 
-/**
-   Unsigned int system variable class
- */
-class sys_var_int_ptr :public sys_var
-{
-public:
-  sys_var_int_ptr(sys_var_chain *chain, const char *name_arg, 
-                  uint *value_ptr_arg,
-                  sys_after_update_func after_update_arg= NULL)
-    :sys_var(name_arg, after_update_arg),
-     value(value_ptr_arg)
-  { chain_sys_var(chain); }
-  bool check(THD *thd, set_var *var);
-  bool update(THD *thd, set_var *var);
-  void set_default(THD *thd, enum_var_type type);
-  SHOW_TYPE show_type() { return SHOW_INT; }
-  uchar *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base)
-  { return (uchar*) value; }
-private:
-  uint *value;
-};
-
 /*
   A global ulong variable that is protected by LOCK_global_system_variables
 */

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2008-11-12 15:23:22 +0000
+++ b/sql/slave.cc	2008-12-08 01:40:22 +0000
@@ -58,6 +58,7 @@ char* slave_load_tmpdir = 0;
 Master_info *active_mi= 0;
 my_bool replicate_same_server_id;
 ulonglong relay_log_space_limit = 0;
+bool is_relay_log_recovery= 0;
 
 /*
   When slave thread exits, we need to remember the temporary tables so we
@@ -131,6 +132,7 @@ static bool wait_for_relay_log_space(Rel
 static inline bool io_slave_killed(THD* thd,Master_info* mi);
 static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
+static int init_recovery(Master_info* mi);
 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
                           bool suppress_warnings);
@@ -253,6 +255,10 @@ int init_slave()
     goto err;
   }
 
+  is_relay_log_recovery= relay_log_recovery;
+  if (is_relay_log_recovery && init_recovery(active_mi))
+    goto err;
+
   /* If server id is not set, start_slave_thread() will say it */
 
   if (active_mi->host[0] && !opt_skip_slave_start)
@@ -268,16 +274,89 @@ int init_slave()
       goto err;
     }
   }
+  is_relay_log_recovery= 0;
   pthread_mutex_unlock(&LOCK_active_mi);
   DBUG_RETURN(0);
 
 err:
+  is_relay_log_recovery= 0;
   pthread_mutex_unlock(&LOCK_active_mi);
   DBUG_RETURN(1);
 }
 
 
 /*
+ * Updates the master info based on the information stored in the
+ * relay info and ignores relay logs previously retrieved by the IO 
+ * thread, which thus starts fetching again based on to the  
+ * group_master_log_pos and group_master_log_name. Eventually, the old
+ * relay logs will be purged by the normal purge mechanism.
+ * 
+ * In the feature, we can improve this routine in order to avoid throwing
+ * away logs that are safely stored in the disk.
+ * 
+ * There is no need for a mutex as the caller (i.e. init_slave) already 
+ * has one acquired.
+ * 
+ * Specifically, the following structures are updated:
+ * 
+ * 1 - mi->master_log_pos  <-- rli->group_master_log_pos
+ * 2 - mi->master_log_name <-- rli->group_master_log_name
+ * 3 - It moves the relay log to the new relay log file, by
+ *     rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+ *     rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+ *     rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
+ *     rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
+ * 
+ *  If there is an error, it returns (1), otherwise returns (0).
+ */
+static int init_recovery(Master_info* mi)
+{
+  const char *errmsg= 0;
+  DBUG_ENTER("init_recovery");
+
+  Relay_log_info *rli= &mi->rli;
+  if (rli->group_master_log_name[0])
+  {
+    mi->master_log_pos= max(BIN_LOG_HEADER_SIZE,
+                            rli->group_master_log_pos);
+    strmake(mi->master_log_name, rli->group_master_log_name,
+            sizeof(mi->master_log_name)-1);
+
+    sql_print_warning("Recovery from master pos %ld and file %s.",
+                      (ulong) mi->master_log_pos, mi->master_log_name);
+
+    strmake(rli->group_relay_log_name, rli->relay_log.get_log_fname(),
+            sizeof(rli->group_relay_log_name)-1);
+    strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
+            sizeof(mi->rli.event_relay_log_name)-1);
+
+    rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
+
+    if (init_relay_log_pos(rli,
+                           rli->group_relay_log_name,
+                           rli->group_relay_log_pos,
+                           0 /*no data lock*/,
+                           &errmsg, 0))
+      DBUG_RETURN(1);
+
+    if (flush_master_info(mi, 0))
+    {
+      sql_print_error("Failed to flush master info file");
+      DBUG_RETURN(1);
+    }
+    if (flush_relay_log_info(rli))
+    {
+       sql_print_error("Failed to flush relay info file");
+       DBUG_RETURN(1);
+    }
+  }
+
+  DBUG_RETURN(0);
+}
+
+
+/*
   Init function to set up array for errors that should be skipped for slave
 
   SYNOPSIS
@@ -3741,7 +3820,15 @@ bool flush_relay_log_info(Relay_log_info
     error=1;
   if (flush_io_cache(file))
     error=1;
-
+  static ulong sync_counter= 0;
+  if (sync_relayloginfo_period &&
+      !error &&
+      ++sync_counter >= sync_relayloginfo_period)
+  {
+    if (my_sync(rli->info_fd, MYF(MY_WME)))
+      error=1;
+    sync_counter= 0;
+  }
   /* Flushing the relay log is done by the slave I/O thread */
   DBUG_RETURN(error);
 }

=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc	2008-11-19 17:29:41 +0000
+++ b/sql/sql_repl.cc	2008-12-08 01:40:22 +0000
@@ -1939,13 +1939,21 @@ static sys_var_chain vars = { NULL, NULL
 
 static sys_var_bool_ptr	sys_relay_log_purge(&vars, "relay_log_purge",
 					    &relay_log_purge);
+static sys_var_bool_ptr sys_relay_log_recovery(&vars, "relay_log_recovery",
+                                               &relay_log_recovery);
 static sys_var_long_ptr	sys_slave_net_timeout(&vars, "slave_net_timeout",
 					      &slave_net_timeout,
                                               fix_slave_net_timeout);
 static sys_var_long_ptr	sys_slave_trans_retries(&vars, "slave_transaction_retries",
 						&slave_trans_retries);
-static sys_var_int_ptr sys_sync_binlog_period(&vars, "sync_binlog", &sync_binlog_period);
-static sys_var_int_ptr sys_sync_relaylog_period(&vars, "sync_relay_log", &sync_relaylog_period);
+static sys_var_long_ptr sys_sync_binlog_period(&vars, "sync_binlog",
+                                               &sync_binlog_period);
+static sys_var_long_ptr sys_sync_relaylog_period(&vars, "sync_relay_log",
+                                                 &sync_relaylog_period);
+static sys_var_long_ptr sys_sync_relayloginfo_period(&vars, "sync_relay_log_info",
+                                                     &sync_relayloginfo_period);
+static sys_var_long_ptr sys_sync_masterinfo_period(&vars, "sync_master_info",
+                                                   &sync_masterinfo_period);
 static sys_var_slave_skip_counter sys_slave_skip_counter(&vars, "sql_slave_skip_counter");
 
 static int show_slave_skip_errors(THD *thd, SHOW_VAR *var, char *buff);

=== modified file 'sql/sql_repl.h'
--- a/sql/sql_repl.h	2008-05-09 10:27:23 +0000
+++ b/sql/sql_repl.h	2008-12-08 01:40:22 +0000
@@ -35,6 +35,7 @@ extern bool server_id_supplied;
 
 extern int max_binlog_dump_events;
 extern my_bool opt_sporadic_binlog_dump_fail;
+extern bool is_relay_log_recovery;
 
 int start_slave(THD* thd, Master_info* mi, bool net_report);
 int stop_slave(THD* thd, Master_info* mi, bool net_report);

Thread
bzr commit into mysql-6.0-rpl branch (alfranio.correia:2747) Bug#40337Alfranio Correia8 Dec
  • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2747)Bug#40337He Zhenxing8 Dec
  • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2747)Bug#40337Mats Kindahl12 Dec