List:Commits« Previous MessageNext Message »
From:Chuck Bell Date:December 16 2008 8:54pm
Subject:bzr commit into mysql-6.0-backup branch (charles.bell:2741) Bug#40434
View as plain text  
#At file:///D:/source/bzr/mysql-6.0-bug-40434/

 2741 Chuck Bell	2008-12-16
      BUG#40434 : Replication should not be allowed to start if restore is running
      
      Currently, the work of WL#4209 and WL#4280 provide mechanisms to control 
      how replication and backup interact. However, we also need to have a method 
      to prevent replication from starting on a slave if a restore is ongoing on that
      same slave.
      
      This patch prohibits a slave from starting replication when a restore is in
      progress.
modified:
  mysql-test/lib/mtr_report.pl
  mysql-test/suite/rpl/r/rpl_backup.result
  mysql-test/suite/rpl/t/rpl_backup.test
  sql/backup/kernel.cc
  sql/mysql_priv.h
  sql/mysqld.cc
  sql/share/errmsg.txt
  sql/si_objects.cc
  sql/si_objects.h
  sql/sql_repl.cc
  sql/sql_repl.h

per-file messages:
  mysql-test/lib/mtr_report.pl
    Mask for error trying to start restore while replicating.
  mysql-test/suite/rpl/r/rpl_backup.result
    Result file with additional test.
  mysql-test/suite/rpl/t/rpl_backup.test
    Added test for prohibiting a slave to start replication while a restore is
    running.
  sql/backup/kernel.cc
    Added code to block replication during restore and release
    block after restore is complete.
  sql/mysql_priv.h
    Added extern reference for mutex for variable.
    Added extern reference for 'reason' string for error.
  sql/mysqld.cc
    Added mutex for variable. 
    Added calls for initialization of mutex and variable.
    Added string reference for reason in error message for
    replication start failure.
  sql/share/errmsg.txt
    New error message to tell the user a slave cannot start until the ongoing
    process is complete.
  sql/si_objects.cc
    Added method to tell server a restore is running.
  sql/si_objects.h
    Method declaration for restore running method.
  sql/sql_repl.cc
    Added methods to init and destroy mutex.
    Added code to prohibit slave from starting if the variable allow_slave_start 
    is set to FALSE. This is done when a restore is run via restore_running()
    in si_objects.cc.
  sql/sql_repl.h
    Added declarations for methods to init and destroy mutex.
=== modified file 'mysql-test/lib/mtr_report.pl'
--- a/mysql-test/lib/mtr_report.pl	2008-11-21 15:02:34 +0000
+++ b/mysql-test/lib/mtr_report.pl	2008-12-16 20:54:07 +0000
@@ -377,6 +377,13 @@ sub mtr_report_stats ($) {
 		  /Backup: The MySQL server is running with the /
 		) or
 		
+		# The rpl_backup test will throw an error about running restore
+		# on a slave.
+		($testname eq 'rpl.rpl_backup') and
+		(
+		  /A restore operation was attempted on a slave during replication/
+		) or
+		
 		# The views test triggers errors below on purpose
 		($testname eq 'backup.backup_views') and
 		(

=== modified file 'mysql-test/suite/rpl/r/rpl_backup.result'
--- a/mysql-test/suite/rpl/r/rpl_backup.result	2008-11-17 09:57:51 +0000
+++ b/mysql-test/suite/rpl/r/rpl_backup.result	2008-12-16 20:54:07 +0000
@@ -327,6 +327,110 @@ the after position of the master's binlo
 should be 0.
 Delta
 0
+RESET MASTER;
+RESET SLAVE;
+SET DEBUG_SYNC = 'reset';
+SET DEBUG_SYNC = 'restore_before_end SIGNAL restore_running WAIT_FOR proceed';
+RESTORE FROM 'rpl_bup_s3.bak' OVERWRITE;
+SET DEBUG_SYNC = 'now WAIT_FOR restore_running';
+Try to start the slave while restore is running -- gets error.
+SLAVE START;
+ERROR HY000: Cannot start slave. SLAVE START is blocked by RESTORE.
+SET DEBUG_SYNC = 'now SIGNAL proceed';
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	#
+Read_Master_Log_Pos	#
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	
+Slave_IO_Running	No
+Slave_SQL_Running	No
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	#
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	0
+Last_IO_Error	
+Last_SQL_Errno	0
+Last_SQL_Error	
+Restore is now complete.
+backup_id
+#
+SET DEBUG_SYNC = 'now SIGNAL done';
+SET DEBUG_SYNC = 'now WAIT_FOR done';
+SHOW DATABASES;
+Database
+information_schema
+mysql
+rpl_backup
+test
+SET DEBUG_SYNC = 'reset';
+Try to start the slave after restore is done -- should succeed.
+SLAVE START;
+SHOW SLAVE STATUS;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	MASTER_PORT
+Connect_Retry	1
+Master_Log_File	#
+Read_Master_Log_Pos	#
+Relay_Log_File	#
+Relay_Log_Pos	#
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	#
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+Master_SSL_Verify_Server_Cert	No
+Last_IO_Errno	0
+Last_IO_Error	
+Last_SQL_Errno	0
+Last_SQL_Error	
+Now stop the slave.
+SLAVE STOP;
 FLUSH BACKUP LOGS;
 PURGE BACKUP LOGS;
 DROP DATABASE rpl_backup;

=== modified file 'mysql-test/suite/rpl/t/rpl_backup.test'
--- a/mysql-test/suite/rpl/t/rpl_backup.test	2008-11-17 09:57:51 +0000
+++ b/mysql-test/suite/rpl/t/rpl_backup.test	2008-12-16 20:54:07 +0000
@@ -375,6 +375,63 @@ eval SELECT $master_after_pos - $master_
 --enable_query_log
 
 #
+# Now test 'slave start' while restore is in progress on slave.
+#
+
+RESET MASTER;
+
+connection slave;
+
+RESET SLAVE;
+
+SET DEBUG_SYNC = 'reset';
+
+connection slave1;
+
+SET DEBUG_SYNC = 'restore_before_end SIGNAL restore_running WAIT_FOR proceed';
+SEND RESTORE FROM 'rpl_bup_s3.bak' OVERWRITE;
+
+connection slave;
+
+SET DEBUG_SYNC = 'now WAIT_FOR restore_running';
+
+--echo Try to start the slave while restore is running -- gets error.
+--error ER_RESTORE_CANNOT_START_SLAVE
+SLAVE START;
+
+SET DEBUG_SYNC = 'now SIGNAL proceed';
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 6 # 7 # 8 # 9 # 22 # 23 # 33 #
+--query_vertical SHOW SLAVE STATUS
+
+connection slave1;
+--echo Restore is now complete.
+--replace_column 1 #
+reap;
+SET DEBUG_SYNC = 'now SIGNAL done';
+
+connection slave;
+
+SET DEBUG_SYNC = 'now WAIT_FOR done';
+
+SHOW DATABASES; 
+
+SET DEBUG_SYNC = 'reset';
+
+--echo Try to start the slave after restore is done -- should succeed.
+SLAVE START;
+--source include/wait_for_slave_to_start.inc
+
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 6 # 7 # 8 # 9 # 22 # 23 # 33 #
+--query_vertical SHOW SLAVE STATUS
+
+--echo Now stop the slave.
+SLAVE STOP;
+--source include/wait_for_slave_to_stop.inc
+
+#
 # Cleanup
 #
 connection master;

=== modified file 'sql/backup/kernel.cc'
--- a/sql/backup/kernel.cc	2008-12-10 15:53:06 +0000
+++ b/sql/backup/kernel.cc	2008-12-16 20:54:07 +0000
@@ -208,12 +208,6 @@ execute_backup_command(THD *thd, LEX *le
   case SQLCOM_RESTORE:
   {
 
-    /*
-      Restore cannot be run on a slave while connected to a master.
-    */
-    if (obs::is_slave())
-      DBUG_RETURN(send_error(context, ER_RESTORE_ON_SLAVE));
-
     Restore_info *info= context.prepare_for_restore(backupdir, lex->backup_dir, 
                                                     thd->query);
     
@@ -701,6 +695,20 @@ Backup_restore_ctx::prepare_for_restore(
 {
   using namespace backup;  
 
+  /*
+    Block replication from starting.
+  */
+  obs::block_replication(TRUE, "RESTORE");
+
+  /*
+    Restore cannot be run on a slave while connected to a master.
+  */
+  if (obs::is_slave())
+  {
+    fatal_error(report_error(ER_RESTORE_ON_SLAVE));
+    return NULL;
+  }
+
   if (m_error)
     return NULL;
   
@@ -934,6 +942,11 @@ int Backup_restore_ctx::close()
   obs::disable_slave_connections(FALSE);
 
   /*
+    Allow replication to start after restore is complete.
+  */
+  obs::block_replication(FALSE, "");
+
+  /*
     Turn binlog back on iff it was turned off earlier.
   */
   if (m_engage_binlog)

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2008-12-16 11:51:34 +0000
+++ b/sql/mysql_priv.h	2008-12-16 20:54:07 +0000
@@ -1976,6 +1976,8 @@ extern ulong slow_launch_threads, slow_l
 extern ulong table_cache_size, table_def_size;
 extern ulong max_connections,max_connect_errors, connect_timeout;
 extern my_bool slave_allow_batching;
+extern my_bool allow_slave_start;
+extern LEX_STRING reason_slave_blocked;
 extern ulong slave_net_timeout, slave_trans_retries;
 extern uint max_user_connections;
 extern ulong what_to_log,flush_time;
@@ -2066,7 +2068,7 @@ extern pthread_mutex_t LOCK_mysql_create
        LOCK_error_log, LOCK_delayed_insert, LOCK_uuid_short,
        LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
        LOCK_slave_list, LOCK_active_mi, LOCK_manager, LOCK_global_read_lock,
-       LOCK_global_system_variables, LOCK_user_conn,
+       LOCK_global_system_variables, LOCK_user_conn, LOCK_slave_start,
        LOCK_prepared_stmt_count,
        LOCK_connection_count;
 #ifdef HAVE_OPENSSL

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2008-12-10 12:57:51 +0000
+++ b/sql/mysqld.cc	2008-12-16 20:54:07 +0000
@@ -540,6 +540,8 @@ ulong query_buff_size, slow_launch_time,
 ulong open_files_limit, max_binlog_size, max_relay_log_size;
 ulong slave_net_timeout, slave_trans_retries;
 my_bool slave_allow_batching;
+my_bool allow_slave_start= TRUE;
+LEX_STRING reason_slave_blocked;
 ulong slave_exec_mode_options;
 const char *slave_exec_mode_str= "STRICT";
 ulong thread_cache_size=0, thread_pool_size= 0;
@@ -690,7 +692,7 @@ pthread_mutex_t LOCK_mysql_create_db, LO
 		LOCK_crypt,
 	        LOCK_global_system_variables,
                 LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
-                LOCK_connection_count;
+                LOCK_connection_count, LOCK_slave_start;
 
 /**
   The below lock protects access to two global server variables:
@@ -1381,6 +1383,7 @@ void clean_up(bool print_message)
   free_max_user_conn();
 #ifdef HAVE_REPLICATION
   end_slave_list();
+  end_slave_start();
 #endif
   delete binlog_filter;
   delete rpl_filter;
@@ -3886,6 +3889,7 @@ static int init_server_components()
   my_uuid_init((ulong) (my_rnd(&sql_rand))*12345,12345);
 #ifdef HAVE_REPLICATION
   init_slave_list();
+  init_slave_start();
 #endif
 
   /* Setup logs */

=== modified file 'sql/share/errmsg.txt'
--- a/sql/share/errmsg.txt	2008-12-15 09:22:24 +0000
+++ b/sql/share/errmsg.txt	2008-12-16 20:54:07 +0000
@@ -6447,4 +6447,5 @@ ER_BACKUP_RESTORE_DBS
   eng "Restoring %u database(s) %.220s"
 ER_BACKUP_SYNCHRONIZE
   eng "Backup failed to synchronize table images."
-
+ER_RESTORE_CANNOT_START_SLAVE
+  eng "Cannot start slave. SLAVE START is blocked by %-.64s."

=== modified file 'sql/si_objects.cc'
--- a/sql/si_objects.cc	2008-12-06 00:02:44 +0000
+++ b/sql/si_objects.cc	2008-12-16 20:54:07 +0000
@@ -3137,6 +3137,28 @@ int disable_slave_connections(bool disab
 }
 
 /**
+  Set state where replication is blocked from starting.
+
+  This method tells the server that a process that requires replication
+  to be turned off while the operation is in progress.
+  This is used to prohibit slaves from starting.
+
+  @param[in] block  TRUE = block slave start, FALSE = do not block
+  @param[in] reason  Reason for the block
+*/
+void block_replication(bool block, char *reason)
+{
+  pthread_mutex_lock(&LOCK_slave_start);
+  allow_slave_start= !block;
+  if (block)
+  {
+    reason_slave_blocked.length= strlen(reason);
+    reason_slave_blocked.str= reason;
+  }
+  pthread_mutex_unlock(&LOCK_slave_start);
+}
+
+/**
   Write an incident event in the binary log.
 
   This method can be used to issue an incident event to inform the slave

=== modified file 'sql/si_objects.h'
--- a/sql/si_objects.h	2008-12-04 23:14:30 +0000
+++ b/sql/si_objects.h	2008-12-16 20:54:07 +0000
@@ -509,6 +509,12 @@ int num_slaves_attached();
 */
 int disable_slave_connections(bool disable);
 
+/*
+  Set state where replication is blocked (TRUE) or not blocked (FALSE)
+  from starting. Include reason for feedback to user.
+*/
+void block_replication(bool block, char *reason);
+
 /**
   Enumeration of the incidents that can occur on the master.
 */

=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc	2008-11-24 20:46:11 +0000
+++ b/sql/sql_repl.cc	2008-12-16 20:54:07 +0000
@@ -992,6 +992,27 @@ err:
   DBUG_VOID_RETURN;
 }
 
+/**
+  Initialize mutex for slave start variable.
+*/
+void init_slave_start()
+{
+  pthread_mutex_init(&LOCK_slave_start, MY_MUTEX_INIT_FAST);
+  pthread_mutex_lock(&LOCK_slave_start);
+  allow_slave_start= TRUE;
+  reason_slave_blocked.length= 0;
+  reason_slave_blocked.str= "";
+  pthread_mutex_unlock(&LOCK_slave_start);
+}
+
+/**
+  Destroy mutex for slave start variable.
+*/
+void end_slave_start()
+{
+  pthread_mutex_destroy(&LOCK_slave_start);
+}
+
 int start_slave(THD* thd , Master_info* mi,  bool net_report)
 {
   int slave_errno= 0;
@@ -1000,6 +1021,25 @@ int start_slave(THD* thd , Master_info* 
 
   if (check_access(thd, SUPER_ACL, any_db,0,0,0,0))
     DBUG_RETURN(1);
+
+
+  /*
+    Ensure there are no restores running on the server.
+  */
+  pthread_mutex_lock(&LOCK_slave_start);
+  bool proceed= allow_slave_start;
+  bool success= TRUE;
+  if (!proceed)
+  {
+    slave_errno= ER_RESTORE_CANNOT_START_SLAVE;
+    if (net_report)
+      my_error(slave_errno, MYF(0), reason_slave_blocked);
+    success= FALSE;
+  }
+  pthread_mutex_unlock(&LOCK_slave_start);
+  if (!success)
+    DBUG_RETURN(1);
+
   lock_slave_threads(mi);  // this allows us to cleanly read slave_running
   // Get a mask of _stopped_ threads
   init_thread_mask(&thread_mask,mi,1 /* inverse */);

=== modified file 'sql/sql_repl.h'
--- a/sql/sql_repl.h	2008-05-09 10:27:23 +0000
+++ b/sql/sql_repl.h	2008-12-16 20:54:07 +0000
@@ -52,6 +52,8 @@ bool show_binlogs(THD* thd);
 extern int init_master_info(Master_info* mi);
 void kill_zombie_dump_threads(uint32 slave_server_id);
 int check_binlog_magic(IO_CACHE* log, const char** errmsg);
+void init_slave_start();
+void end_slave_start();
 
 typedef struct st_load_file_info
 {

Thread
bzr commit into mysql-6.0-backup branch (charles.bell:2741) Bug#40434Chuck Bell16 Dec
  • Re: bzr commit into mysql-6.0-backup branch (charles.bell:2741)Bug#40434Rafal Somla18 Dec