List:Commits« Previous MessageNext Message »
From:Chuck Bell Date:November 4 2008 4:07pm
Subject:bzr commit into mysql-6.0-backup branch (cbell:2727) Bug#40434, WL#4209,
WL#4280
View as plain text  
#At file:///D:/source/bzr/mysql-6.0-bug-40434/

 2727 Chuck Bell	2008-11-04
      BUG#40434 : Replication should not be allowed to start if restore is running
      
      Currently, the work of WL#4209 and WL#4280 provide mechanisms to control 
      how replication and backup interact. However, we also need to have a method 
      to prevent replication from starting on a slave if a restore is ongoing on that
      same slave.
      
      This patch prohibits a slave from starting replication when a restore is in
      progress.
modified:
  mysql-test/suite/rpl/r/rpl_backup.result
  mysql-test/suite/rpl/t/rpl_backup.test
  sql/backup/kernel.cc
  sql/mysql_priv.h
  sql/mysqld.cc
  sql/repl_failsafe.cc
  sql/repl_failsafe.h
  sql/share/errmsg.txt
  sql/si_objects.cc
  sql/si_objects.h
  sql/sql_repl.cc

per-file messages:
  mysql-test/suite/rpl/r/rpl_backup.result
    Result file with additional test.
  mysql-test/suite/rpl/t/rpl_backup.test
    Added test for prohibiting a slave to start replication while a restore is
    running.
  sql/backup/kernel.cc
    Added calls to restore_running() to tell server a restore is in progress.
  sql/mysql_priv.h
    Added extern reference for mutex for variable.
  sql/mysqld.cc
    Added mutex for variable. 
    Added calls for initialization of mutex and variable.
  sql/repl_failsafe.cc
    Added methods to init and destroy mutex.
  sql/repl_failsafe.h
    Added declarations for methods to init and destroy mutex.
  sql/share/errmsg.txt
    New error message to tell the user a slave cannot start until the ongoing
    restore is complete.
  sql/si_objects.cc
    Added method to tell server a restore is running.
  sql/si_objects.h
    Method declaration for restore running method.
  sql/sql_repl.cc
    Added code to prohibit slave from starting if the variable allow_slave_start 
    is set to FALSE. This is done when a restore is run via restore_running()
    in si_objects.cc.
=== modified file 'mysql-test/suite/rpl/r/rpl_backup.result'
--- a/mysql-test/suite/rpl/r/rpl_backup.result	2008-10-31 15:31:52 +0000
+++ b/mysql-test/suite/rpl/r/rpl_backup.result	2008-11-04 15:07:18 +0000
@@ -327,6 +327,109 @@ the after position of the master's binlo
 should be 0.
 Delta
 0
+RESET MASTER;
+RESET SLAVE;
+SET DEBUG_SYNC = 'reset';
+SET DEBUG_SYNC = 'restore_before_end SIGNAL restore_running WAIT_FOR proceed';
+RESTORE FROM 'rpl_bup_s3.bak';
+SET DEBUG_SYNC = 'now WAIT_FOR restore_running';
+Try to start the slave while restore is running -- gets error.
+SLAVE START;
+ERROR HY000: Slave cannot start until restore is complete.
+SET DEBUG_SYNC = 'now SIGNAL proceed';
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	#
+Read_Master_Log_Pos	#
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	
+Slave_IO_Running	No
+Slave_SQL_Running	No
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	#
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	0
+Last_IO_Error	
+Last_SQL_Errno	0
+Last_SQL_Error	
+backup_id
+#
+SET DEBUG_SYNC = 'now SIGNAL done';
+SET DEBUG_SYNC = 'now WAIT_FOR done';
+SHOW DATABASES;
+Database
+information_schema
+mysql
+rpl_backup
+test
+SET DEBUG_SYNC = 'reset';
+Try to start the slave after restore is done -- should succeed.
+SLAVE START;
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	#
+Read_Master_Log_Pos	#
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	#
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	0
+Last_IO_Error	
+Last_SQL_Errno	0
+Last_SQL_Error	
+Now stop the slave.
+SLAVE STOP;
 FLUSH BACKUP LOGS;
 PURGE BACKUP LOGS;
 DROP DATABASE rpl_backup;

=== modified file 'mysql-test/suite/rpl/t/rpl_backup.test'
--- a/mysql-test/suite/rpl/t/rpl_backup.test	2008-10-31 15:31:52 +0000
+++ b/mysql-test/suite/rpl/t/rpl_backup.test	2008-11-04 15:07:18 +0000
@@ -375,6 +375,62 @@ eval SELECT $master_after_pos - $master_
 --enable_query_log
 
 #
+# Now test 'slave start' while restore is in progress on slave.
+#
+
+RESET MASTER;
+
+connection slave;
+
+RESET SLAVE;
+
+SET DEBUG_SYNC = 'reset';
+
+connection slave1;
+
+SET DEBUG_SYNC = 'restore_before_end SIGNAL restore_running WAIT_FOR proceed';
+SEND RESTORE FROM 'rpl_bup_s3.bak';
+
+connection slave;
+
+SET DEBUG_SYNC = 'now WAIT_FOR restore_running';
+
+--echo Try to start the slave while restore is running -- gets error.
+--error ER_RESTORE_CANNOT_START_SLAVE
+SLAVE START;
+
+SET DEBUG_SYNC = 'now SIGNAL proceed';
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 6 # 7 # 8 # 9 # 22 # 23 # 33 #
+--query_vertical SHOW SLAVE STATUS
+
+connection slave1;
+--replace_column 1 #
+reap;
+SET DEBUG_SYNC = 'now SIGNAL done';
+
+connection slave;
+
+SET DEBUG_SYNC = 'now WAIT_FOR done';
+
+SHOW DATABASES; 
+
+SET DEBUG_SYNC = 'reset';
+
+--echo Try to start the slave after restore is done -- should succeed.
+SLAVE START;
+--source include/wait_for_slave_to_start.inc
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 6 # 7 # 8 # 9 # 22 # 23 # 33 #
+--query_vertical SHOW SLAVE STATUS
+
+--echo Now stop the slave.
+SLAVE STOP;
+--source include/wait_for_slave_to_stop.inc
+
+#
 # Cleanup
 #
 connection master;

=== modified file 'sql/backup/kernel.cc'
--- a/sql/backup/kernel.cc	2008-10-30 20:02:15 +0000
+++ b/sql/backup/kernel.cc	2008-11-04 15:07:18 +0000
@@ -226,9 +226,13 @@ execute_backup_command(THD *thd, LEX *le
     
     DEBUG_SYNC(thd, "after_backup_start_restore");
 
+    obs::restore_running(TRUE);
+
     res= context.do_restore();      
 
     DEBUG_SYNC(thd, "restore_before_end");
+
+    obs::restore_running(FALSE);
 
     if (res)
       DBUG_RETURN(send_error(context, ER_BACKUP_RESTORE));

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2008-10-22 11:51:28 +0000
+++ b/sql/mysql_priv.h	2008-11-04 15:07:18 +0000
@@ -1998,6 +1998,7 @@ extern ulong slow_launch_threads, slow_l
 extern ulong table_cache_size, table_def_size;
 extern ulong max_connections,max_connect_errors, connect_timeout;
 extern my_bool slave_allow_batching;
+extern my_bool allow_slave_start;
 extern ulong slave_net_timeout, slave_trans_retries;
 extern uint max_user_connections;
 extern ulong what_to_log,flush_time;
@@ -2088,8 +2089,8 @@ extern pthread_mutex_t LOCK_mysql_create
        LOCK_error_log, LOCK_delayed_insert, LOCK_uuid_short,
        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
        LOCK_slave_list, LOCK_active_mi, LOCK_manager, LOCK_global_read_lock,
-       LOCK_global_system_variables, LOCK_user_conn,
-       LOCK_prepared_stmt_count,
+       LOCK_global_system_variables, LOCK_user_conn, LOCK_slave_start,
+       LOCK_prepared_stmt_count, 
        LOCK_bytes_sent, LOCK_bytes_received, LOCK_connection_count;
 #ifdef HAVE_OPENSSL
 extern pthread_mutex_t LOCK_des_key_file;

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2008-10-28 18:14:14 +0000
+++ b/sql/mysqld.cc	2008-11-04 15:07:18 +0000
@@ -544,6 +544,7 @@ ulong query_buff_size, slow_launch_time,
 ulong open_files_limit, max_binlog_size, max_relay_log_size;
 ulong slave_net_timeout, slave_trans_retries;
 my_bool slave_allow_batching;
+my_bool allow_slave_start= TRUE;
 ulong slave_exec_mode_options;
 const char *slave_exec_mode_str= "STRICT";
 ulong thread_cache_size=0, thread_pool_size= 0;
@@ -694,7 +695,7 @@ pthread_mutex_t LOCK_mysql_create_db, LO
 		LOCK_crypt, LOCK_bytes_sent, LOCK_bytes_received,
 	        LOCK_global_system_variables,
                 LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
-                LOCK_connection_count;
+                LOCK_connection_count, LOCK_slave_start;
 
 /**
   The below lock protects access to two global server variables:
@@ -1391,6 +1392,7 @@ void clean_up(bool print_message)
   free_max_user_conn();
 #ifdef HAVE_REPLICATION
   end_slave_list();
+  end_slave_start();
 #endif
   delete binlog_filter;
   delete rpl_filter;
@@ -3920,6 +3922,8 @@ static int init_server_components()
   my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345);
 #ifdef HAVE_REPLICATION
   init_slave_list();
+  init_slave_start();
+  allow_slave_start= TRUE;
 #endif
 
   /* Setup logs */

=== modified file 'sql/repl_failsafe.cc'
--- a/sql/repl_failsafe.cc	2008-08-07 17:52:43 +0000
+++ b/sql/repl_failsafe.cc	2008-11-04 15:07:18 +0000
@@ -236,6 +236,22 @@ void end_slave_list()
   }
 }
 
+/**
+  Initialize mutex for slave start variable.
+*/
+void init_slave_start()
+{
+  pthread_mutex_init(&LOCK_slave_start, MY_MUTEX_INIT_FAST);
+}
+
+/**
+  Destroy mutex for slave start variable.
+*/
+void end_slave_start()
+{
+  pthread_mutex_destroy(&LOCK_slave_start);
+}
+
 static int find_target_pos(LEX_MASTER_INFO *mi, IO_CACHE *log, char *errmsg)
 {
   my_off_t log_pos =	    (my_off_t) mi->pos;

=== modified file 'sql/repl_failsafe.h'
--- a/sql/repl_failsafe.h	2007-08-16 06:52:50 +0000
+++ b/sql/repl_failsafe.h	2008-11-04 15:07:18 +0000
@@ -45,6 +45,8 @@ bool show_slave_hosts(THD* thd);
 int translate_master(THD* thd, LEX_MASTER_INFO* mi, char* errmsg);
 void init_slave_list();
 void end_slave_list();
+void init_slave_start();
+void end_slave_start();
 int register_slave(THD* thd, uchar* packet, uint packet_length);
 void unregister_slave(THD* thd, bool only_mine, bool need_mutex);
 

=== modified file 'sql/share/errmsg.txt'
--- a/sql/share/errmsg.txt	2008-10-30 17:53:24 +0000
+++ b/sql/share/errmsg.txt	2008-11-04 15:07:18 +0000
@@ -6418,3 +6418,5 @@ ER_RESTORE_ON_MASTER
   eng "A restore operation was initiated on the master."
 ER_RESTORE_ON_SLAVE
   eng "A restore operation was attempted on a slave during replication. You must stop the
slave prior to running a restore."
+ER_RESTORE_CANNOT_START_SLAVE
+  eng "Slave cannot start until restore is complete."

=== modified file 'sql/si_objects.cc'
--- a/sql/si_objects.cc	2008-10-30 12:29:54 +0000
+++ b/sql/si_objects.cc	2008-11-04 15:07:18 +0000
@@ -4073,6 +4073,22 @@ int disable_slave_connections(bool disab
 }
 
 /**
+  Set state where restore is running.
+
+  This method tells the server that a restore is in progress.
+  This is used to prohibit slaves from starting once a restore is
+  in progress.
+
+  param[IN] running  TRUE = restore running, FALSE = no restore running
+*/
+void restore_running(bool running)
+{
+  pthread_mutex_lock(&LOCK_slave_start);
+  allow_slave_start= !running;
+  pthread_mutex_unlock(&LOCK_slave_start);
+}
+
+/**
   Write an incident event in the binary log.
 
   This method can be used to issue an incident event to inform the slave

=== modified file 'sql/si_objects.h'
--- a/sql/si_objects.h	2008-10-28 18:14:14 +0000
+++ b/sql/si_objects.h	2008-11-04 15:07:18 +0000
@@ -739,6 +739,11 @@ int num_slaves_attached();
 */
 int disable_slave_connections(bool disable);
 
+/*
+  Set state where restore is running.
+*/
+void restore_running(bool running);
+
 /**
   Enumeration of the incidents that can occur on the master.
 */

=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc	2008-10-28 18:14:14 +0000
+++ b/sql/sql_repl.cc	2008-11-04 15:07:18 +0000
@@ -1000,6 +1000,20 @@ int start_slave(THD* thd , Master_info* 
 
   if (check_access(thd, SUPER_ACL, any_db,0,0,0,0))
     DBUG_RETURN(1);
+
+  /*
+    Ensure there are no restores running on the server.
+  */
+  pthread_mutex_lock(&LOCK_slave_start);
+  bool proceed= allow_slave_start;
+  pthread_mutex_unlock(&LOCK_slave_start);
+  if (!proceed)
+  {
+    slave_errno= ER_RESTORE_CANNOT_START_SLAVE;
+    my_message(slave_errno, ER(slave_errno), MYF(0));
+    DBUG_RETURN(1);
+  }
+
   lock_slave_threads(mi);  // this allows us to cleanly read slave_running
   // Get a mask of _stopped_ threads
   init_thread_mask(&thread_mask,mi,1 /* inverse */);

Thread
bzr commit into mysql-6.0-backup branch (cbell:2727) Bug#40434, WL#4209,WL#4280Chuck Bell4 Nov
  • Re: bzr commit into mysql-6.0-backup branch (cbell:2727) Bug#40434, WL#4209, WL#4280Andrei Elkin21 Nov