List:Commits« Previous MessageNext Message »
From:Chuck Bell Date:December 3 2008 2:54am
Subject:bzr commit into mysql-6.0-backup branch (cbell:2737) Bug#40434
View as plain text  
#At file:///C:/source/bzr/mysql-6.0-bug-40434/ based on revid:rafal.somla@stripped

 2737 Chuck Bell	2008-12-02
      BUG#40434 : Replication should not be allowed to start if restore is running
      
      Currently, the work of WL#4209 and WL#4280 provide mechanisms to control 
      how replication and backup interact. However, we also need to have a method 
      to prevent replication from starting on a slave if a restore is ongoing on that
      same slave.
      
      This patch prohibits a slave from starting replication when a restore is in
      progress.
modified:
  mysql-test/suite/rpl/r/rpl_backup.result
  mysql-test/suite/rpl/t/rpl_backup.test
  sql/backup/kernel.cc
  sql/mysql_priv.h
  sql/mysqld.cc
  sql/repl_failsafe.cc
  sql/repl_failsafe.h
  sql/share/errmsg.txt
  sql/si_objects.cc
  sql/si_objects.h
  sql/sql_repl.cc

per-file messages:
  mysql-test/suite/rpl/r/rpl_backup.result
    Result file with additional test.
  mysql-test/suite/rpl/t/rpl_backup.test
    Added test for prohibiting a slave to start replication while a restore is
    running.
  sql/backup/kernel.cc
    Added calls to restore_running() to tell server a restore is in progress.
  sql/mysql_priv.h
    Added extern reference for mutex for variable.
  sql/mysqld.cc
    Added mutex for variable. 
    Added calls for initialization of mutex and variable.
  sql/repl_failsafe.cc
    Added methods to init and destroy mutex.
  sql/repl_failsafe.h
    Added declarations for methods to init and destroy mutex.
  sql/share/errmsg.txt
    New error message to tell the user a slave cannot start until the ongoing
    restore is complete.
  sql/si_objects.cc
    Added method to tell server a restore is running.
  sql/si_objects.h
    Method declaration for restore running method.
  sql/sql_repl.cc
    Added code to prohibit slave from starting if the variable allow_slave_start 
    is set to FALSE. This is done when a restore is run via restore_running()
    in si_objects.cc.
=== modified file 'mysql-test/suite/rpl/r/rpl_backup.result'
--- a/mysql-test/suite/rpl/r/rpl_backup.result	2008-11-17 09:57:51 +0000
+++ b/mysql-test/suite/rpl/r/rpl_backup.result	2008-12-03 02:53:40 +0000
@@ -327,6 +327,110 @@ the after position of the master's binlo
 should be 0.
 Delta
 0
+RESET MASTER;
+RESET SLAVE;
+SET DEBUG_SYNC = 'reset';
+SET DEBUG_SYNC = 'restore_before_end SIGNAL restore_running WAIT_FOR proceed';
+RESTORE FROM 'rpl_bup_s3.bak' OVERWRITE;
+SET DEBUG_SYNC = 'now WAIT_FOR restore_running';
+Try to start the slave while restore is running -- gets error.
+SLAVE START;
+ERROR HY000: Slave cannot start until restore is complete.
+SET DEBUG_SYNC = 'now SIGNAL proceed';
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	#
+Read_Master_Log_Pos	#
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	
+Slave_IO_Running	No
+Slave_SQL_Running	No
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	#
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	0
+Last_IO_Error	
+Last_SQL_Errno	0
+Last_SQL_Error	
+Restore is now complete.
+backup_id
+#
+SET DEBUG_SYNC = 'now SIGNAL done';
+SET DEBUG_SYNC = 'now WAIT_FOR done';
+SHOW DATABASES;
+Database
+information_schema
+mysql
+rpl_backup
+test
+SET DEBUG_SYNC = 'reset';
+Try to start the slave after restore is done -- should succeed.
+SLAVE START;
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	#
+Read_Master_Log_Pos	#
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	#
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	0
+Last_IO_Error	
+Last_SQL_Errno	0
+Last_SQL_Error	
+Now stop the slave.
+SLAVE STOP;
 FLUSH BACKUP LOGS;
 PURGE BACKUP LOGS;
 DROP DATABASE rpl_backup;

=== modified file 'mysql-test/suite/rpl/t/rpl_backup.test'
--- a/mysql-test/suite/rpl/t/rpl_backup.test	2008-11-17 09:57:51 +0000
+++ b/mysql-test/suite/rpl/t/rpl_backup.test	2008-12-03 02:53:40 +0000
@@ -375,6 +375,63 @@ eval SELECT $master_after_pos - $master_
 --enable_query_log
 
 #
+# Now test 'slave start' while restore is in progress on slave.
+#
+
+RESET MASTER;
+
+connection slave;
+
+RESET SLAVE;
+
+SET DEBUG_SYNC = 'reset';
+
+connection slave1;
+
+SET DEBUG_SYNC = 'restore_before_end SIGNAL restore_running WAIT_FOR proceed';
+SEND RESTORE FROM 'rpl_bup_s3.bak' OVERWRITE;
+
+connection slave;
+
+SET DEBUG_SYNC = 'now WAIT_FOR restore_running';
+
+--echo Try to start the slave while restore is running -- gets error.
+--error ER_RESTORE_CANNOT_START_SLAVE
+SLAVE START;
+
+SET DEBUG_SYNC = 'now SIGNAL proceed';
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 6 # 7 # 8 # 9 # 22 # 23 # 33 #
+--query_vertical SHOW SLAVE STATUS
+
+connection slave1;
+--echo Restore is now complete.
+--replace_column 1 #
+reap;
+SET DEBUG_SYNC = 'now SIGNAL done';
+
+connection slave;
+
+SET DEBUG_SYNC = 'now WAIT_FOR done';
+
+SHOW DATABASES; 
+
+SET DEBUG_SYNC = 'reset';
+
+--echo Try to start the slave after restore is done -- should succeed.
+SLAVE START;
+--source include/wait_for_slave_to_start.inc
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 6 # 7 # 8 # 9 # 22 # 23 # 33 #
+--query_vertical SHOW SLAVE STATUS
+
+--echo Now stop the slave.
+SLAVE STOP;
+--source include/wait_for_slave_to_stop.inc
+
+#
 # Cleanup
 #
 connection master;

=== modified file 'sql/backup/kernel.cc'
--- a/sql/backup/kernel.cc	2008-11-28 10:10:39 +0000
+++ b/sql/backup/kernel.cc	2008-12-03 02:53:40 +0000
@@ -219,10 +219,14 @@ execute_backup_command(THD *thd, LEX *le
     
     DEBUG_SYNC(thd, "after_backup_start_restore");
 
+    obs::restore_running(TRUE);
+
     res= context.do_restore(overwrite);      
 
     DEBUG_SYNC(thd, "restore_before_end");
 
+    obs::restore_running(FALSE);
+
     if (res)
       DBUG_RETURN(send_error(context, ER_BACKUP_RESTORE));
     

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2008-11-06 18:39:27 +0000
+++ b/sql/mysql_priv.h	2008-12-03 02:53:40 +0000
@@ -1988,6 +1988,7 @@ extern ulong slow_launch_threads, slow_l
 extern ulong table_cache_size, table_def_size;
 extern ulong max_connections,max_connect_errors, connect_timeout;
 extern my_bool slave_allow_batching;
+extern my_bool allow_slave_start;
 extern ulong slave_net_timeout, slave_trans_retries;
 extern uint max_user_connections;
 extern ulong what_to_log,flush_time;
@@ -2078,7 +2079,7 @@ extern pthread_mutex_t LOCK_mysql_create
        LOCK_error_log, LOCK_delayed_insert, LOCK_uuid_short,
        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
        LOCK_slave_list, LOCK_active_mi, LOCK_manager, LOCK_global_read_lock,
-       LOCK_global_system_variables, LOCK_user_conn,
+       LOCK_global_system_variables, LOCK_user_conn, LOCK_slave_start,
        LOCK_prepared_stmt_count,
        LOCK_connection_count;
 #ifdef HAVE_OPENSSL

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2008-11-17 11:17:59 +0000
+++ b/sql/mysqld.cc	2008-12-03 02:53:40 +0000
@@ -540,6 +540,7 @@ ulong query_buff_size, slow_launch_time,
 ulong open_files_limit, max_binlog_size, max_relay_log_size;
 ulong slave_net_timeout, slave_trans_retries;
 my_bool slave_allow_batching;
+my_bool allow_slave_start= TRUE;
 ulong slave_exec_mode_options;
 const char *slave_exec_mode_str= "STRICT";
 ulong thread_cache_size=0, thread_pool_size= 0;
@@ -690,7 +691,7 @@ pthread_mutex_t LOCK_mysql_create_db, LO
 		LOCK_crypt,
 	        LOCK_global_system_variables,
                 LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
-                LOCK_connection_count;
+                LOCK_connection_count, LOCK_slave_start;
 
 /**
   The below lock protects access to two global server variables:
@@ -1387,6 +1388,7 @@ void clean_up(bool print_message)
   free_max_user_conn();
 #ifdef HAVE_REPLICATION
   end_slave_list();
+  end_slave_start();
 #endif
   delete binlog_filter;
   delete rpl_filter;
@@ -3914,6 +3916,10 @@ static int init_server_components()
   my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345);
 #ifdef HAVE_REPLICATION
   init_slave_list();
+  init_slave_start();
+  pthread_mutex_lock(&LOCK_slave_start);
+  allow_slave_start= TRUE;
+  pthread_mutex_unlock(&LOCK_slave_start);
 #endif
 
   /* Setup logs */

=== modified file 'sql/repl_failsafe.cc'
--- a/sql/repl_failsafe.cc	2008-08-07 17:52:43 +0000
+++ b/sql/repl_failsafe.cc	2008-12-03 02:53:40 +0000
@@ -236,6 +236,22 @@ void end_slave_list()
   }
 }
 
+/**
+  Initialize mutex for slave start variable.
+*/
+void init_slave_start()
+{
+  pthread_mutex_init(&LOCK_slave_start, MY_MUTEX_INIT_FAST);
+}
+
+/**
+  Destroy mutex for slave start variable.
+*/
+void end_slave_start()
+{
+  pthread_mutex_destroy(&LOCK_slave_start);
+}
+
 static int find_target_pos(LEX_MASTER_INFO *mi, IO_CACHE *log, char *errmsg)
 {
   my_off_t log_pos =	    (my_off_t) mi->pos;

=== modified file 'sql/repl_failsafe.h'
--- a/sql/repl_failsafe.h	2007-08-16 06:52:50 +0000
+++ b/sql/repl_failsafe.h	2008-12-03 02:53:40 +0000
@@ -45,6 +45,8 @@ bool show_slave_hosts(THD* thd);
 int translate_master(THD* thd, LEX_MASTER_INFO* mi, char* errmsg);
 void init_slave_list();
 void end_slave_list();
+void init_slave_start();
+void end_slave_start();
 int register_slave(THD* thd, uchar* packet, uint packet_length);
 void unregister_slave(THD* thd, bool only_mine, bool need_mutex);
 

=== modified file 'sql/share/errmsg.txt'
--- a/sql/share/errmsg.txt	2008-11-28 10:10:39 +0000
+++ b/sql/share/errmsg.txt	2008-12-03 02:53:40 +0000
@@ -6442,3 +6442,5 @@ ER_BACKUP_BACKUP_DBS
   eng "Backing up %u database(s) %.220s"
 ER_BACKUP_RESTORE_DBS
   eng "Restoring %u database(s) %.220s"
+ER_RESTORE_CANNOT_START_SLAVE
+  eng "Slave cannot start until restore is complete."

=== modified file 'sql/si_objects.cc'
--- a/sql/si_objects.cc	2008-10-30 12:29:54 +0000
+++ b/sql/si_objects.cc	2008-12-03 02:53:40 +0000
@@ -4073,6 +4073,22 @@ int disable_slave_connections(bool disab
 }
 
 /**
+  Set state where restore is running.
+
+  This method tells the server that a restore is in progress.
+  This is used to prohibit slaves from starting once a restore is
+  in progress.
+
+  param[IN] running  TRUE = restore running, FALSE = no restore running
+*/
+void restore_running(bool running)
+{
+  pthread_mutex_lock(&LOCK_slave_start);
+  allow_slave_start= !running;
+  pthread_mutex_unlock(&LOCK_slave_start);
+}
+
+/**
   Write an incident event in the binary log.
 
   This method can be used to issue an incident event to inform the slave

=== modified file 'sql/si_objects.h'
--- a/sql/si_objects.h	2008-10-28 18:14:14 +0000
+++ b/sql/si_objects.h	2008-12-03 02:53:40 +0000
@@ -739,6 +739,11 @@ int num_slaves_attached();
 */
 int disable_slave_connections(bool disable);
 
+/*
+  Set state where restore is running.
+*/
+void restore_running(bool running);
+
 /**
   Enumeration of the incidents that can occur on the master.
 */

=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc	2008-10-28 18:14:14 +0000
+++ b/sql/sql_repl.cc	2008-12-03 02:53:40 +0000
@@ -1000,6 +1000,21 @@ int start_slave(THD* thd , Master_info* 
 
   if (check_access(thd, SUPER_ACL, any_db,0,0,0,0))
     DBUG_RETURN(1);
+
+  /*
+    Ensure there are no restores running on the server.
+  */
+  pthread_mutex_lock(&LOCK_slave_start);
+  bool proceed= allow_slave_start;
+  pthread_mutex_unlock(&LOCK_slave_start);
+  printf("WHAT??? %d\n", proceed);
+  if (!proceed)
+  {
+    slave_errno= ER_RESTORE_CANNOT_START_SLAVE;
+    my_message(slave_errno, ER(slave_errno), MYF(0));
+    DBUG_RETURN(1);
+  }
+
   lock_slave_threads(mi);  // this allows us to cleanly read slave_running
   // Get a mask of _stopped_ threads
   init_thread_mask(&thread_mask,mi,1 /* inverse */);

Thread
bzr commit into mysql-6.0-backup branch (cbell:2737) Bug#40434Chuck Bell3 Dec
  • Re: bzr commit into mysql-6.0-backup branch (cbell:2737) Bug#40434Rafal Somla5 Dec
    • Re: bzr commit into mysql-6.0-backup branch (cbell:2737) Bug#40434Chuck Bell10 Dec