List:Commits« Previous MessageNext Message »
From:Alfranio Correia Date:February 12 2009 9:14am
Subject:bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811) Bug#40337
View as plain text  
#At file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-40337/mysql-6.0-rpl/ based on revid:alfranio.correia@stripped

 2811 Alfranio Correia	2009-02-12
      BUG#40337  Fsyncing master and relay log to disk after every event is too slow
      
      Post-fix.
added:
  mysql-test/suite/rpl/r/rpl_sync.result
  mysql-test/suite/rpl/t/rpl_sync-slave.opt
  mysql-test/suite/rpl/t/rpl_sync.test
modified:
  sql/rpl_mi.cc
  sql/rpl_mi.h
  sql/rpl_rli.cc
  sql/rpl_rli.h
  sql/slave.cc
  sql/sql_binlog.cc

=== added file 'mysql-test/suite/rpl/r/rpl_sync.result'
--- a/mysql-test/suite/rpl/r/rpl_sync.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_sync.result	2009-02-12 09:13:57 +0000
@@ -0,0 +1,75 @@
+=====Configuring the enviroment=======;
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
+insert into t1(a) values(1);
+insert into t1(a) values(2);
+insert into t1(a) values(3);
+=====Inserting data on the master but without the SQL Thread being running=======;
+stop slave SQL_THREAD;
+insert into t1(a) values(4);
+insert into t1(a) values(5);
+insert into t1(a) values(6);
+=====Removing relay log files and crashing/recoverying the slave=======;
+stop slave IO_THREAD;
+failure
+SET SESSION debug="d,crash_before_rotate_relaylog";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+=====Dumping and comparing tables=======;
+start slave;
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+=====Corrupting the master.info=======;
+stop slave;
+FLUSH LOGS;
+insert into t1(a) values(7);
+insert into t1(a) values(8);
+insert into t1(a) values(9);
+SET SESSION debug="d,crash_before_rotate_relaylog";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+=====Dumping and comparing tables=======;
+start slave;
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+7
+8
+9
+select * from t1;
+a
+1
+2
+3
+4
+5
+6
+7
+8
+9
+=====Clean up=======;
+drop table t1;

=== added file 'mysql-test/suite/rpl/t/rpl_sync-slave.opt'
--- a/mysql-test/suite/rpl/t/rpl_sync-slave.opt	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_sync-slave.opt	2009-02-12 09:13:57 +0000
@@ -0,0 +1 @@
+--sync-relay-log-info=1 --relay-log-recovery=1

=== added file 'mysql-test/suite/rpl/t/rpl_sync.test'
--- a/mysql-test/suite/rpl/t/rpl_sync.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_sync.test	2009-02-12 09:13:57 +0000
@@ -0,0 +1,143 @@
+########################################################################################
+# This test verifies the options --sync-relay-log-info and --relay-log-recovery by 
+# crashing the slave in two different situations:
+#  (case-1) - Corrupt the relay log with changes which were not processed by
+#  the SQL Thread and crashes it.
+#  (case-2) - Corrupt the master.info with wrong coordinates and crashes it.
+#
+#  Case 1:
+#    1 - Stops the SQL Thread
+#    2 - Inserts new records into the master.
+#    3 - Corrupts the relay-log.bin* which most likely has such changes.
+#    4 - Crashes the slave
+#    5 - Verifies if the slave is sync with the master which means that the information
+#    loss was circumvented by the recovery process.
+#
+#  Case 2:
+#    1 - Stops the SQL/IO Threads
+#    2 - Inserts new records into the master.
+#    3 - Corrupts the master.info with wrong coordinates.
+#    4 - Crashes the slave
+#    5 - Verifies if the slave is sync with the master which means that the information
+#    loss was circumvented by the recovery process.
+########################################################################################
+
+########################################################################################
+#                                Configuring the environment
+########################################################################################
+--echo =====Configuring the enviroment=======;
+--source include/master-slave.inc
+--source include/not_embedded.inc
+--source include/not_valgrind.inc
+
+--source include/have_debug.inc
+
+--source include/have_innodb.inc
+
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
+
+insert into t1(a) values(1);
+insert into t1(a) values(2);
+insert into t1(a) values(3);
+
+########################################################################################
+#                             Case 1: Corrupt a relay-log.bin*
+########################################################################################
+--echo =====Inserting data on the master but without the SQL Thread being running=======;
+save_master_pos;
+connection slave;
+sync_with_master;
+
+connection slave;
+let $MYSQLD_SLAVE_DATADIR= `select @@datadir`;
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--copy_file $MYSQLD_SLAVE_DATADIR/master.info $MYSQLD_SLAVE_DATADIR/master.backup
+stop slave SQL_THREAD;
+source include/wait_for_slave_sql_to_stop.inc;
+
+connection master;
+insert into t1(a) values(4);
+insert into t1(a) values(5);
+insert into t1(a) values(6);
+
+--echo =====Removing relay log files and crashing/recoverying the slave=======;
+connection slave;
+stop slave IO_THREAD;
+source include/wait_for_slave_io_to_stop.inc;
+
+let $file= query_get_value("SHOW SLAVE STATUS", Relay_Log_File, 1);
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--exec echo "failure" > $MYSQLD_SLAVE_DATADIR/$file
+--exec cat  $MYSQLD_SLAVE_DATADIR/$file
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+SET SESSION debug="d,crash_before_rotate_relaylog";
+--error 2013
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo =====Dumping and comparing tables=======;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+save_master_pos;
+connection slave;
+sync_with_master;
+
+connection master;
+select * from t1;
+
+connection slave;
+select * from t1;
+
+########################################################################################
+#                             Case 2: Corrupt a master.info
+########################################################################################
+--echo =====Corrupting the master.info=======;
+connection slave;
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+connection master;
+FLUSH LOGS;
+
+insert into t1(a) values(7);
+insert into t1(a) values(8);
+insert into t1(a) values(9);
+
+connection slave;
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--exec cat $MYSQLD_SLAVE_DATADIR/master.backup > $MYSQLD_SLAVE_DATADIR/master.info
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+SET SESSION debug="d,crash_before_rotate_relaylog";
+--error 2013
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo =====Dumping and comparing tables=======;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+save_master_pos;
+connection slave;
+sync_with_master;
+
+connection master;
+select * from t1;
+
+connection slave;
+select * from t1;
+
+########################################################################################
+#                                      Clean up
+########################################################################################
+--echo =====Clean up=======;
+connection master;
+drop table t1;

=== modified file 'sql/rpl_mi.cc'
--- a/sql/rpl_mi.cc	2009-02-04 16:11:20 +0000
+++ b/sql/rpl_mi.cc	2009-02-12 09:13:57 +0000
@@ -30,13 +30,14 @@ int init_strvar_from_file(char *var, int
 int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val);
 int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f);
 
-Master_info::Master_info()
+Master_info::Master_info(bool is_slave_recovery)
   :Slave_reporting_capability("I/O"),
    ssl(0), ssl_verify_server_cert(0), fd(-1),  io_thd(0), port(MYSQL_PORT),
    connect_retry(DEFAULT_CONNECT_RETRY), heartbeat_period(0),
    received_heartbeats(0), inited(0), master_id(0),
    abort_slave(0), slave_running(0), slave_run_id(0),
-   sync_counter(0)
+   sync_counter(0),
+   rli(is_slave_recovery) 
 {
   host[0] = 0; user[0] = 0; password[0] = 0;
   ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;

=== modified file 'sql/rpl_mi.h'
--- a/sql/rpl_mi.h	2009-02-04 16:11:20 +0000
+++ b/sql/rpl_mi.h	2009-02-12 09:13:57 +0000
@@ -59,7 +59,7 @@
 class Master_info : public Slave_reporting_capability
 {
  public:
-  Master_info();
+  Master_info(bool is_slave_recovery);
   ~Master_info();
   bool shall_ignore_server_id(ulong s_id);
 

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2009-02-04 16:11:20 +0000
+++ b/sql/rpl_rli.cc	2009-02-12 09:13:57 +0000
@@ -30,7 +30,7 @@ int init_strvar_from_file(char *var, int
 			  const char *default_val);
 
 
-Relay_log_info::Relay_log_info()
+Relay_log_info::Relay_log_info(bool is_slave_recovery)
   :Slave_reporting_capability("SQL"),
    no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
    info_fd(-1), cur_log_fd(-1), 
@@ -50,6 +50,8 @@ Relay_log_info::Relay_log_info()
 {
   DBUG_ENTER("Relay_log_info::Relay_log_info");
 
+  is_relay_log_recovery= is_slave_recovery;
+
   group_relay_log_name[0]= event_relay_log_name[0]=
     group_master_log_name[0]= 0;
   until_log_name[0]= ign_master_log_name_end[0]= 0;
@@ -265,14 +267,14 @@ Failed to open the existing relay log in
   }
 
 #ifndef DBUG_OFF
+  if (!(rli->is_relay_log_recovery))
   {
     char llbuf1[22], llbuf2[22];
     DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
                         llstr(my_b_tell(rli->cur_log),llbuf1),
                         llstr(rli->event_relay_log_pos,llbuf2)));
     DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
-    DBUG_ASSERT(rli->is_relay_log_recovery || 
-                (my_b_tell(rli->cur_log) == rli->event_relay_log_pos));
+    DBUG_ASSERT((my_b_tell(rli->cur_log) == rli->event_relay_log_pos));
   }
 #endif
 

=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h	2009-02-04 16:11:20 +0000
+++ b/sql/rpl_rli.h	2009-02-12 09:13:57 +0000
@@ -273,7 +273,7 @@ public:
   char ign_master_log_name_end[FN_REFLEN];
   ulonglong ign_master_log_pos_end;
 
-  Relay_log_info();
+  Relay_log_info(bool is_slave_recovery);
   ~Relay_log_info();
 
   /*

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2009-02-04 16:11:20 +0000
+++ b/sql/slave.cc	2009-02-12 09:13:57 +0000
@@ -220,6 +220,7 @@ void unlock_slave_threads(Master_info* m
 int init_slave()
 {
   DBUG_ENTER("init_slave");
+  int error= 0;
 
   /*
     This is called when mysqld starts. Before client connections are
@@ -233,9 +234,12 @@ int init_slave()
   */
 
   if (pthread_key_create(&RPL_MASTER_INFO, NULL))
+  {
+    error= 1;
     goto err;
+  }
 
-  active_mi= new Master_info;
+  active_mi= new Master_info(relay_log_recovery);
 
   /*
     If master_host is not specified, try to read it from the master_info file.
@@ -245,6 +249,7 @@ int init_slave()
   if (!active_mi)
   {
     sql_print_error("Failed to allocate memory for the master info structure");
+    error= 1;
     goto err;
   }
 
@@ -252,12 +257,15 @@ int init_slave()
                        1, (SLAVE_IO | SLAVE_SQL)))
   {
     sql_print_error("Failed to initialize the master info structure");
+    error= 1;
     goto err;
   }
 
-  active_mi->rli.is_relay_log_recovery= relay_log_recovery;
   if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi))
+  {
+    error= 1;
     goto err;
+  }
 
   /* If server id is not set, start_slave_thread() will say it */
 
@@ -271,43 +279,43 @@ int init_slave()
                             SLAVE_IO | SLAVE_SQL))
     {
       sql_print_error("Failed to create slave threads");
+      error= 1;
       goto err;
     }
   }
-  active_mi->rli.is_relay_log_recovery= 0;
-  pthread_mutex_unlock(&LOCK_active_mi);
-  DBUG_RETURN(0);
 
 err:
-  active_mi->rli.is_relay_log_recovery= 0;
+  active_mi->rli.is_relay_log_recovery= false;
   pthread_mutex_unlock(&LOCK_active_mi);
-  DBUG_RETURN(1);
+  DBUG_RETURN(error);
 }
 
 /*
- * Updates the master info based on the information stored in the
- * relay info and ignores relay logs previously retrieved by the IO 
- * thread, which thus starts fetching again based on to the  
- * group_master_log_pos and group_master_log_name. Eventually, the old
- * relay logs will be purged by the normal purge mechanism.
- * 
- * In the feature, we can improve this routine in order to avoid throwing
- * away logs that are safely stored in the disk.
- * 
- * There is no need for a mutex as the caller (i.e. init_slave) already 
- * has one acquired.
- * 
- * Specifically, the following structures are updated:
- * 
- * 1 - mi->master_log_pos  <-- rli->group_master_log_pos
- * 2 - mi->master_log_name <-- rli->group_master_log_name
- * 3 - It moves the relay log to the new relay log file, by
- *     rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
- *     rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
- *     rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
- *     rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
- * 
- *  If there is an error, it returns (1), otherwise returns (0).
+  Updates the master info based on the information stored in the
+  relay info and ignores relay logs previously retrieved by the IO 
+  thread, which thus starts fetching again based on to the  
+  group_master_log_pos and group_master_log_name. Eventually, the old
+  relay logs will be purged by the normal purge mechanism.
+
+  In the feature, we should improve this routine in order to avoid throwing
+  away logs that are safely stored in the disk. Note also that this recovery 
+  routine relies on the correctness of the relay-log.info and only tolerates 
+  coordinate problems in master.info.
+  
+  In this function, there is no need for a mutex as the caller 
+  (i.e. init_slave) already has one acquired.
+  
+  Specifically, the following structures are updated:
+ 
+  1 - mi->master_log_pos  <-- rli->group_master_log_pos
+  2 - mi->master_log_name <-- rli->group_master_log_name
+  3 - It moves the relay log to the new relay log file, by
+      rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+      rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+      rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
+      rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
+  
+   If there is an error, it returns (1), otherwise returns (0).
  */
 static int init_recovery(Master_info* mi)
 {
@@ -4316,6 +4324,8 @@ void rotate_relay_log(Master_info* mi)
   DBUG_ENTER("rotate_relay_log");
   Relay_log_info* rli= &mi->rli;
 
+  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", exit(1););
+
   /* We don't lock rli->run_lock. This would lead to deadlocks. */
   pthread_mutex_lock(&mi->run_lock);
 

=== modified file 'sql/sql_binlog.cc'
--- a/sql/sql_binlog.cc	2009-01-26 16:03:39 +0000
+++ b/sql/sql_binlog.cc	2009-02-12 09:13:57 +0000
@@ -58,7 +58,7 @@ void mysql_client_binlog_statement(THD* 
   my_bool have_fd_event= TRUE;
   if (!thd->rli_fake)
   {
-    thd->rli_fake= new Relay_log_info;
+    thd->rli_fake= new Relay_log_info(false);
 #ifdef HAVE_purify
     thd->rli_fake->is_fake= TRUE;
 #endif

Thread
bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811) Bug#40337Alfranio Correia12 Feb
  • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811)Bug#40337Mats Kindahl16 Feb
    • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811)Bug#40337Alfranio Correia17 Feb