MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Rafal Somla Date:November 25 2009 10:37am
Subject:bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895) Bug#47940
View as plain text  
#At file:///ext/mysql/bzr/backup/bug47940/ based on revid:rafal.somla@stripped

 2895 Rafal Somla	2009-11-25
      Bug#47940 - Backup: Implementation of save_vp_info() violates its specification.
      
      This patch fixes problems in the backup synchronization logic:
      
      - In case of error, all drivers which have ben locked, should be 
        unlocked before we abort operation.
      
      - Potentially time consuming error reporting should not be done inside
        the synchronization phase, but only after all drivers have been
        unlocked.
     @ mysql-test/suite/backup/r/backup_errors_debug_3.result
        This test case triggers errors during unlock() of backup drivers. Since two
        drivers are used, the unlock error should be reported twice. This patch
        fixes it.
     @ sql/backup/data_backup.cc
        - Add new LOCKED driver state.
        - Add Scheduler::report_vp_errors() method.
        - Add Scheduler members m_failed_lock and m_vp_info_error which
          store information about errors encountered in the synchronization
          phase.
        - Remove error reporting from time critical methods.
        - Refactoring of the synchronization phase.
        - Add new operator and fix comments in Pump_iterator class.
        - Refactor Pump_iterator::lock()/unlock() methods: make sure all 
          drivers which have been locked are unlocked in case of error.
        - Refactor Backup_pump methods: remove error reporting and 
          handle the new LOCKED state.

    modified:
      mysql-test/suite/backup/r/backup_errors_debug_3.result
      sql/backup/data_backup.cc
=== modified file 'mysql-test/suite/backup/r/backup_errors_debug_3.result'
--- a/mysql-test/suite/backup/r/backup_errors_debug_3.result	2009-11-24 16:47:23 +0000
+++ b/mysql-test/suite/backup/r/backup_errors_debug_3.result	2009-11-25 10:37:26 +0000
@@ -626,6 +626,7 @@ ERROR HY000: Can't unlock DEBUG TEST bac
 SELECT * FROM mysql.backup_progress WHERE error_num <> 0;
 backup_id	object	error_num	notes
 #		####	Can't unlock DEBUG TEST backup driver after creating the validity point
+#		####	Can't unlock DEBUG TEST backup driver after creating the validity point
 PURGE BACKUP LOGS;
 #
 # Turn off debug SESSION.

=== modified file 'sql/backup/data_backup.cc'
--- a/sql/backup/data_backup.cc	2009-10-21 13:32:24 +0000
+++ b/sql/backup/data_backup.cc	2009-11-25 10:37:26 +0000
@@ -42,6 +42,7 @@ struct backup_state {
               WAITING,    ///< Waiting for others to finish init ph. (phase 2).
               PREPARING,  ///< Preparing for @c lock() call (phase 3).
               READY,      ///< Ready for @c lock() call (phase 4)
+              LOCKED,     ///< After @c lock() and before @c unlock().
               FINISHING,  ///< Final data transfer (phase 7).
               DONE,       ///< Backup complete.
               SHUT_DOWN,  ///< After @c end() call.
@@ -67,6 +68,7 @@ struct backup_state {
       name[WAITING]=    "WAITING";
       name[PREPARING]=  "PREPARING";
       name[READY]=      "READY";
+      name[LOCKED]=     "LOCKED";
       name[FINISHING]=  "FINISHING";
       name[DONE]=       "DONE";
       name[SHUT_DOWN]=  "SHUT DOWN";
@@ -256,6 +258,7 @@ public:
   int prepare();
   int lock();
   int unlock();
+  int report_vp_errors();
 
   uint  init_count;     ///< Nr of drivers sending init data.
   uint  prepare_count;  ///< Nr of drivers preparing for lock.
@@ -286,11 +289,15 @@ private:
   Output_stream &m_str; ///< Stream to which we write.
   bool   cancelled;     ///< True if backup process was cancelled.
 
+  Backup_pump *m_failed_lock; ///< If not NULL, points at driver which failed in lock() call.
+  bool   m_vp_info_error;  ///< TRUE if errors were detected while collecting VP info in the synchronization phase.
+
   Scheduler(Output_stream &s, Logger &log)
     :init_count(0), prepare_count(0), finish_count(0),
     m_pumps(NULL), m_last(NULL), m_log(log),
     m_count(0), m_total(0), m_init_left(0), m_known_count(0),
-    m_str(s), cancelled(FALSE)
+    m_str(s), cancelled(FALSE),
+    m_failed_lock(NULL), m_vp_info_error(FALSE)
   {}
 
   void move_pump_to_end(const Pump_iterator&);
@@ -481,12 +488,7 @@ int save_vp_info(Backup_info &info)
 
     DBUG_EXECUTE_IF("ER_BACKUP_BINLOG", ret= TRUE;);
 
-    if (ret)
-    {
-      info.m_log.report_error(ER_BACKUP_BINLOG);
-      ret= TRUE;
-    }
-    else
+    if (!ret)
       info.save_binlog_info(binlog_start_pos);
   }
 
@@ -547,8 +549,10 @@ int write_table_data(THD* thd, Backup_in
   */
   if (info.snap_count() == 0 || info.table_count() == 0) // nothing to backup
   {
-    int res= save_vp_info(info);    // Logs errors.
-    if (!res)
+    int res= save_vp_info(info);
+    if (res)
+      info.m_log.report_error(ER_BACKUP_BINLOG);
+    else
       report_vp_info(info);
     DBUG_RETURN(res);
   }
@@ -743,20 +747,23 @@ int write_table_data(THD* thd, Backup_in
     /*
       Note: we do not detect/react to process interruptions during the
       synchronization. We let the protocol to go through and check for
-      possible interruptions only at the end, i.e., after sch.unlock().
+      possible interruptions and errors only at the end, i.e., after 
+      sch.unlock().
     */
-    if (sch.lock())    // Logs errors.
-      goto error;
+    if(!sch.lock())
+    {
+      if(save_vp_info(info))
+        sch.m_vp_info_error= TRUE;
+      DEBUG_SYNC(thd, "before_backup_data_unlock");
+      sch.unlock();
+    }
 
-    if (save_vp_info(info))
+    if(sch.report_vp_errors())
       goto error;
 
-    DEBUG_SYNC(thd, "before_backup_data_unlock");
-    if (sch.unlock() || log.report_killed())    // Logs errors.
+    if(log.report_killed())
       goto error;
 
-    // Unblock commits.
-    DEBUG_SYNC(thd, "before_backup_unblock_commit");
     unblock_commits(thd);
     commits_blocked= FALSE;
     if (log.report_killed())
@@ -807,27 +814,33 @@ namespace backup {
 
 class Scheduler::Pump_iterator
 {
+  LIST  *pumps;  ///< The list of pumps.
+
 public:
 
-  LIST  *pumps;  ///< The list of pumps.
+  /// Return pointer to the current pump.
+  operator Pump*()
+  {
+    return pumps? static_cast<Pump*>(pumps->data) : NULL;
+  }
 
-  /// The next operator.
+  /// Access the current pump.
   Pump* operator->()
   {
     return pumps? static_cast<Pump*>(pumps->data) : NULL;
   }
 
-  /// The increment operator.
+  /// Move to the next pump.
   void  operator++()
   {
     if(pumps) pumps= pumps->next;
   }
 
-  /// Check to see if pumps list exist and has data.
+  /// Returns FALSE if there are no more pumps in the sequence.
   operator bool() const
   { return pumps && pumps->data; }
 
-  /// The comparison operator.
+  /// The assign operator.
   void operator=(const Pump_iterator &p)
   { pumps= p.pumps; }
 
@@ -839,6 +852,7 @@ public:
   Pump_iterator(const Scheduler &sch) :pumps(sch.m_pumps)
   {}
 
+  friend class Scheduler; // So that it can manipulate the pump queue.
 };
 
 
@@ -1126,9 +1140,12 @@ int Scheduler::lock()
   DBUG_PRINT("backup_data",("calling lock() for all drivers"));
 
   for (Pump_iterator it(*this); it; ++it)
-   if (it->lock())    // Logs errors
+   if (it->lock())
    {
-     remove_pump(it);  // Never errors.
+     // Save pointer to the failed driver.
+     m_failed_lock= it;
+     // Before returning, unlock all successfully locked drivers.
+     unlock();
      return ERROR;
    }
 
@@ -1145,19 +1162,67 @@ int Scheduler::unlock()
 {
   DBUG_ASSERT(!cancelled);
   DBUG_PRINT("backup_data",("calling unlock() for all drivers"));
+  int ret= 0;
 
   for(Pump_iterator it(*this); it; ++it)
   {
-    if (it->unlock())   // Logs errors.
-    {
-      remove_pump(it);  // Never errors.
-      return ERROR;
-    }
+    if (it->state != backup_state::LOCKED)
+      continue;
+
+    if (it->unlock())
+      ret= ERROR;
+
     if (it->state == backup_state::FINISHING)
       finish_count++;
   }
 
-  return 0;
+  return ret;
+}
+
+
+/**
+  Report errors which could happen during the synchronization phase (between
+  @c lock() and @c unlock() calls).
+
+  This method will correctly report errors only if called directly after 
+  @c unlock() method (or failed @c lock() method). It does nothing if no errors 
+  were detected. If @c m_failed_lock is set, an error about failing @c lock()
+  call is reported. If @c m_vp_info_error is set, an error during collection of
+  VP info is reported. Any drivers in ERROR state (apart from the one pointed
+  by @c m_failed_lock) are assumed to be failed on @c unlock() call. The
+  appropriate error is reported for each such driver.
+
+  @return 0 if no errors were detected, non-zero otherwise.
+*/ 
+
+int Scheduler::report_vp_errors()
+{
+  int ret= 0;
+
+  // First, report lock() error if there was a problem.
+  if (m_failed_lock)
+  {
+    ret= ERROR;
+    m_log.report_error(ER_BACKUP_CREATE_VP, m_failed_lock->m_name);
+  }
+
+  // Now report errors in collecting VP info.
+  if (m_vp_info_error)
+  {
+    ret= ERROR;
+    m_log.report_error(ER_BACKUP_BINLOG);
+  }
+
+  // Finally, report errors during unlock() calls.
+
+  for (Pump_iterator it(*this); it; ++it)
+    if (it->state == backup_state::ERROR && it != m_failed_lock)
+    {
+      ret= ERROR;
+      m_log.report_error(ER_BACKUP_UNLOCK_DRIVER, it->m_name);
+    }
+
+  return ret;
 }
 
 
@@ -1303,6 +1368,7 @@ int Backup_pump::prepare()
 int Backup_pump::lock()
 {
   DBUG_PRINT("backup_data",(" locking %s", m_name));
+  state= backup_state::LOCKED;
 
   int res= m_drv->lock();
 
@@ -1310,15 +1376,9 @@ int Backup_pump::lock()
   DBUG_EXECUTE_IF("ER_BACKUP_CREATE_VP", res= ERROR;);
 
   if (res == ERROR)
-  {
     state= backup_state::ERROR;
-    if (m_log)
-      m_log->report_error(log_level::ERROR,
-                          ER_BACKUP_CREATE_VP, m_name);
-    return ERROR;
-  }
 
-  return 0;
+  return res;  
 }
 
 
@@ -1327,7 +1387,11 @@ int Backup_pump::lock()
 int Backup_pump::unlock()
 {
   DBUG_PRINT("backup_data",(" unlocking %s, goes to FINISHING state", m_name));
-  state= backup_state::FINISHING;
+
+  if (state == backup_state::ERROR)
+    return ERROR;
+
+  DBUG_ASSERT(state == backup_state::LOCKED);
 
   int res= m_drv->unlock();
 
@@ -1335,20 +1399,20 @@ int Backup_pump::unlock()
   DBUG_EXECUTE_IF("ER_BACKUP_UNLOCK_DRIVER", res= ERROR;);
 
   if (res == ERROR)
-  {
     state= backup_state::ERROR;
-    if (m_log)
-      m_log->report_error(log_level::ERROR,
-                          ER_BACKUP_UNLOCK_DRIVER, m_name);
-    return ERROR;
-  }
+  else
+    state= backup_state::FINISHING;
 
-  return 0;
+  return res;
 }
 
 
 int Backup_pump::cancel()
 {
+  // If driver has reported error, do nothing (only free() should be called).
+  if (state == backup_state::ERROR)
+    return 0;
+
   if (ERROR == m_drv->cancel())
   {
     state= backup_state::ERROR;


Attachment: [text/bzr-bundle] bzr/rafal.somla@sun.com-20091125103726-0prteeaorsc42m1t.bundle
Thread
bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895) Bug#47940Rafal Somla25 Nov
  • Re: bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895)Bug#47940Charles Bell25 Nov
    • Re: bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895)Bug#47940Rafal Somla26 Nov
    • Re: bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895)Bug#47940Charles Bell1 Dec
  • Re: bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895)Bug#47940Ingo Strüwing28 Nov
    • Re: bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895)Bug#47940Rafal Somla2 Dec
      • Re: bzr commit into mysql-6.0-backup branch (Rafal.Somla:2895)Bug#47940Ingo Strüwing2 Dec