List:Commits« Previous MessageNext Message »
From:Luis Soares Date:May 24 2010 4:43pm
Subject:bzr commit into mysql-5.1-bugteam branch (luis.soares:3395) Bug#53657
View as plain text  
#At file:///home/lsoares/Workspace/bzr/work/bugfixing/53657/mysql-5.1-bugteam-latest/ based on revid:mattias.jonsson@stripped

 3395 Luis Soares	2010-05-24
      BUG#53657: Slave crashed with error 22 when trying to lock mutex
                 at mf_iocache.c, line 1722
      
      The slave crashed while two threads: IO thread and user thread
      raced for the same mutex (the append_buffer_lock protecting the
      relay log's IO_CACHE). The IO thread was trying to flush the
      cache, and for that was grabbing the append_buffer_lock. 
      
      However, the other thread was closing and reopening the relay log
      when the IO thread tried to lock. Closing and reopening the log
      includes destroying and reinitialising the IO_CACHE
      mutex. Therefore, the IO thread tried to lock a destroyed mutex.
      
      We fix this by backporting patch for BUG#50364 which fixed this
      bug in mysql server 5.5+. The patch deploys missing
      synchronization when flush_master_info is called and the relay
      log is flushed by the IO thread. In detail the patch backports
      revision (from mysql-trunk):
      - luis.soares@stripped
      
      This patch already includes the post-push fix also in BUG#50364:
      - luis.soares@stripped

    modified:
      sql/repl_failsafe.cc
      sql/rpl_mi.cc
      sql/rpl_mi.h
      sql/rpl_rli.cc
      sql/slave.cc
      sql/sql_repl.cc
=== modified file 'sql/repl_failsafe.cc'
--- a/sql/repl_failsafe.cc	2010-01-22 10:58:21 +0000
+++ b/sql/repl_failsafe.cc	2010-05-24 16:43:27 +0000
@@ -976,7 +976,7 @@ bool load_master_data(THD* thd)
           host was specified; there could have been a problem when replication
           started, which led to relay log's IO_CACHE to not be inited.
         */
-        if (flush_master_info(active_mi, 0))
+        if (flush_master_info(active_mi, FALSE, FALSE))
           sql_print_error("Failed to flush master info file");
       }
       mysql_free_result(master_status_res);

=== modified file 'sql/rpl_mi.cc'
--- a/sql/rpl_mi.cc	2007-12-14 13:21:37 +0000
+++ b/sql/rpl_mi.cc	2010-05-24 16:43:27 +0000
@@ -312,7 +312,7 @@ file '%s')", fname);
   mi->inited = 1;
   // now change cache READ -> WRITE - must do this before flush_master_info
   reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1);
-  if ((error=test(flush_master_info(mi, 1))))
+  if ((error=test(flush_master_info(mi, TRUE, TRUE))))
     sql_print_error("Failed to flush master info file");
   pthread_mutex_unlock(&mi->data_lock);
   DBUG_RETURN(error);
@@ -338,10 +338,13 @@ err:
      1 - flush master info failed
      0 - all ok
 */
-int flush_master_info(Master_info* mi, bool flush_relay_log_cache)
+int flush_master_info(Master_info* mi, 
+                      bool flush_relay_log_cache, 
+                      bool need_lock_relay_log)
 {
   IO_CACHE* file = &mi->file;
   char lbuf[22];
+  int err= 0;
 
   DBUG_ENTER("flush_master_info");
   DBUG_PRINT("enter",("master_pos: %ld", (long) mi->master_log_pos));
@@ -358,9 +361,23 @@ int flush_master_info(Master_info* mi, b
     When we come to this place in code, relay log may or not be initialized;
     the caller is responsible for setting 'flush_relay_log_cache' accordingly.
   */
-  if (flush_relay_log_cache &&
-      flush_io_cache(mi->rli.relay_log.get_log_file()))
-    DBUG_RETURN(2);
+  if (flush_relay_log_cache)
+  {
+    pthread_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
+    IO_CACHE *log_file= mi->rli.relay_log.get_log_file();
+
+    if (need_lock_relay_log)
+      pthread_mutex_lock(log_lock);
+
+    safe_mutex_assert_owner(log_lock);
+    err= flush_io_cache(log_file);
+
+    if (need_lock_relay_log)
+      pthread_mutex_unlock(log_lock);
+
+    if (err)
+      DBUG_RETURN(2);
+  }
 
   /*
     We flushed the relay log BEFORE the master.info file, because if we crash

=== modified file 'sql/rpl_mi.h'
--- a/sql/rpl_mi.h	2007-08-16 06:52:50 +0000
+++ b/sql/rpl_mi.h	2010-05-24 16:43:27 +0000
@@ -108,7 +108,8 @@ int init_master_info(Master_info* mi, co
 		     bool abort_if_no_master_info_file,
 		     int thread_mask);
 void end_master_info(Master_info* mi);
-int flush_master_info(Master_info* mi, bool flush_relay_log_cache);
-
+int flush_master_info(Master_info* mi, 
+                      bool flush_relay_log_cache, 
+                      bool need_lock_relay_log);
 #endif /* HAVE_REPLICATION */
 #endif /* RPL_MI_H */

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2009-12-14 16:50:22 +0000
+++ b/sql/rpl_rli.cc	2010-05-24 16:43:27 +0000
@@ -120,7 +120,7 @@ int init_relay_log_info(Relay_log_info* 
   /*
     The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE.
     Note that the I/O thread flushes it to disk after writing every
-    event, in flush_master_info(mi, 1).
+    event, in flush_master_info(mi, 1, ?).
   */
 
   /*

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2010-05-04 09:41:28 +0000
+++ b/sql/slave.cc	2010-05-24 16:43:27 +0000
@@ -1480,7 +1480,7 @@ static void write_ignored_events_info_to
                    " to the relay log, SHOW SLAVE STATUS may be"
                    " inaccurate");
       rli->relay_log.harvest_bytes_written(&rli->log_space_total);
-      if (flush_master_info(mi, 1))
+      if (flush_master_info(mi, TRUE, TRUE))
         sql_print_error("Failed to flush master info file");
       delete ev;
     }
@@ -2731,7 +2731,7 @@ Stopping slave I/O thread due to out-of-
                    "could not queue event from master");
         goto err;
       }
-      if (flush_master_info(mi, 1))
+      if (flush_master_info(mi, TRUE, TRUE))
       {
         sql_print_error("Failed to flush master info file");
         goto err;

=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc	2010-05-04 09:17:20 +0000
+++ b/sql/sql_repl.cc	2010-05-24 16:43:27 +0000
@@ -1282,7 +1282,7 @@ bool change_master(THD* thd, Master_info
     Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never
     a slave before).
   */
-  if (flush_master_info(mi, 0))
+  if (flush_master_info(mi, FALSE, FALSE))
   {
     my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file");
     unlock_slave_threads(mi);


Attachment: [text/bzr-bundle] bzr/luis.soares@sun.com-20100524164327-ctu2hegi8zt3qsct.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (luis.soares:3395) Bug#53657Luis Soares24 May