List:Maria Storage Engine« Previous MessageNext Message »
From:Guilhem Bichot Date:December 5 2008 10:22pm
Subject:bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2717) Bug#41159
View as plain text  
#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/ based on revid:guilhem@stripped

 2717 Guilhem Bichot	2008-12-05
      Fix for BUG#41159 "Maria: deadlock between checkpoint and maria_write() when extending data file".
      No testcase (concurrency, tested by pushbuild2).
modified:
  storage/maria/ma_bitmap.c
  storage/maria/ma_blockrec.c
  storage/maria/ma_checkpoint.c
  storage/maria/ma_close.c
  storage/maria/ma_open.c
  storage/maria/maria_def.h

per-file messages:
  storage/maria/ma_bitmap.c
    guard against concurrent flush of bitmap by checkpoint: we must have close_lock here
  storage/maria/ma_blockrec.c
    comment fixed for new behaviour
  storage/maria/ma_checkpoint.c
    The checkpoint section which looks at the share's content (bitmap, state) needs to be protected from the possible
    my_free-ing done by a concurrent maria_close(); intern_lock is not enough as
    both maria_close() and checkpoint have to release it in the middle, to avoid deadlocks.
    So the protection is done with close_lock. in_checkpoint is now protected by close_lock
    in places where it was protected by intern_lock.
  storage/maria/ma_close.c
    hold close_lock in maria_close() from start to end, to guard against checkpoint trying to flush
    bitmap while we have my_free'd its structures, for example. intern_lock was not enough as
    both maria_close() and checkpoint have to release it in the middle, to avoid deadlocks.
  storage/maria/ma_open.c
    initialize new mutex
  storage/maria/maria_def.h
    comment.
    new mutex protecting the close of a MARIA_SHARE, from _start_ to _end_ of it.
=== modified file 'storage/maria/ma_bitmap.c'
--- a/storage/maria/ma_bitmap.c	2008-10-17 13:37:07 +0000
+++ b/storage/maria/ma_bitmap.c	2008-12-05 22:21:45 +0000
@@ -260,6 +260,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *sha
 my_bool _ma_bitmap_end(MARIA_SHARE *share)
 {
   my_bool res= _ma_bitmap_flush(share);
+  safe_mutex_assert_owner(&share->close_lock);
   pthread_mutex_destroy(&share->bitmap.bitmap_lock);
   pthread_cond_destroy(&share->bitmap.bitmap_cond);
   delete_dynamic(&share->bitmap.pinned_pages);

=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c	2008-12-05 21:11:46 +0000
+++ b/storage/maria/ma_blockrec.c	2008-12-05 22:21:45 +0000
@@ -453,7 +453,7 @@ my_bool _ma_once_end_block_record(MARIA_
   {
     /*
       We de-assign the id even though index has not been flushed, this is ok
-      as intern_lock serializes us with a Checkpoint looking at our share.
+      as close_lock serializes us with a Checkpoint looking at our share.
     */
     translog_deassign_id_from_share(share);
   }

=== modified file 'storage/maria/ma_checkpoint.c'
--- a/storage/maria/ma_checkpoint.c	2008-08-28 18:52:23 +0000
+++ b/storage/maria/ma_checkpoint.c	2008-12-05 22:21:45 +0000
@@ -784,10 +784,8 @@ static int collect_tables(LEX_STRING *st
         !(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP))
     {
       /*
-        Why we didn't take intern_lock above: table had in_checkpoint==0 so no
-        thread could set in_checkpoint. And no thread needs to know that we
-        are setting in_checkpoint, because only maria_close() needs it and
-        cannot run now as we hold THR_LOCK_maria.
+        Apart from us, only maria_close() reads/sets in_checkpoint but cannot
+        run now as we hold THR_LOCK_maria.
       */
       /*
         This table is relevant for checkpoint and not already seen. Mark it,
@@ -887,7 +885,10 @@ static int collect_tables(LEX_STRING *st
     my_bool ignore_share;
     if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
     {
-      /* No need for a mutex to read the above, only us can write this flag */
+      /*
+        No need for a mutex to read the above, only us can write *this* bit of
+        the in_checkpoint bitmap
+      */
       continue;
     }
     /**
@@ -956,6 +957,13 @@ static int collect_tables(LEX_STRING *st
 
     /* OS file descriptors are ints which we stored in 4 bytes */
     compile_time_assert(sizeof(int) <= 4);
+    /*
+      Protect against maria_close() (which does some memory freeing in
+      MARIA_SHARE and MARIA_FILE_BITMAP) with close_lock. intern_lock is not
+      sufficient as we, as well as maria_close(), are going to unlock
+      intern_lock in the middle of manipulating the table.
+    */
+    pthread_mutex_lock(&share->close_lock);
     pthread_mutex_lock(&share->intern_lock);
     /*
       Tables in a normal state have their two file descriptors open.
@@ -1045,6 +1053,20 @@ static int collect_tables(LEX_STRING *st
           each checkpoint if the table was once written and then not anymore.
         */
       }
+    }
+    /*
+      _ma_bitmap_flush_all() may wait, so don't keep intern_lock as
+      otherwise this would deadlock with allocate_and_write_block_record()
+      calling _ma_set_share_data_file_length()
+    */
+    pthread_mutex_unlock(&share->intern_lock);
+    
+    if (!ignore_share)
+    {
+      /*
+        share->bitmap is valid because it's destroyed under close_lock which
+        we hold.
+      */
       if (_ma_bitmap_flush_all(share))
       {
         sync_error= 1;
@@ -1057,23 +1079,28 @@ static int collect_tables(LEX_STRING *st
       Clean up any unused states.
       TODO: Only do this call if there has been # (10?) ended transactions
       since last call.
+      We had to release intern_lock to respect lock order with LOCK_trn_list.
     */
-    pthread_mutex_unlock(&share->intern_lock);
     _ma_remove_not_visible_states_with_lock(share);
-    pthread_mutex_lock(&share->intern_lock);
 
     if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
     {
-      /* maria_close() left us to free the share */
-      pthread_mutex_unlock(&share->intern_lock);
+      /*
+        maria_close() left us to free the share. When it run it set share->id
+        to 0. As it run before we locked close_lock, we should have seen this
+        and so this assertion should be true:
+      */
+      DBUG_ASSERT(ignore_share);
       pthread_mutex_destroy(&share->intern_lock);
+      pthread_mutex_unlock(&share->close_lock);
+      pthread_mutex_destroy(&share->close_lock);
       my_free((uchar *)share, MYF(0));
     }
     else
     {
       /* share goes back to normal state */
       share->in_checkpoint= 0;
-      pthread_mutex_unlock(&share->intern_lock);
+      pthread_mutex_unlock(&share->close_lock);
     }
 
     /*

=== modified file 'storage/maria/ma_close.c'
--- a/storage/maria/ma_close.c	2008-12-02 22:02:52 +0000
+++ b/storage/maria/ma_close.c	2008-12-05 22:21:45 +0000
@@ -47,6 +47,7 @@ int maria_close(register MARIA_HA *info)
     if (maria_lock_database(info,F_UNLCK))
       error=my_errno;
   }
+  pthread_mutex_lock(&share->close_lock);
   pthread_mutex_lock(&share->intern_lock);
 
   if (share->options & HA_OPTION_READ_ONLY_DATA)
@@ -161,10 +162,17 @@ int maria_close(register MARIA_HA *info)
   }
   pthread_mutex_unlock(&THR_LOCK_maria);
   pthread_mutex_unlock(&share->intern_lock);
+  pthread_mutex_unlock(&share->close_lock);
   if (share_can_be_freed)
   {
     (void) pthread_mutex_destroy(&share->intern_lock);
+    (void) pthread_mutex_destroy(&share->close_lock);
     my_free((uchar *)share, MYF(0));
+    /*
+      If share cannot be freed, it's because checkpoint has previously
+      recorded to include this share in the checkpoint and so is soon going to
+      look at some of its content (share->in_checkpoint/id/last_version).
+    */
   }
   my_free(info->ftparser_param, MYF(MY_ALLOW_ZERO_PTR));
   if (info->dfile.file >= 0)

=== modified file 'storage/maria/ma_open.c'
--- a/storage/maria/ma_open.c	2008-12-02 15:29:59 +0000
+++ b/storage/maria/ma_open.c	2008-12-05 22:21:45 +0000
@@ -819,6 +819,7 @@ MARIA_HA *maria_open(const char *name, i
     pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
     pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
     pthread_cond_init(&share->key_del_cond, 0);
+    pthread_mutex_init(&share->close_lock, MY_MUTEX_INIT_FAST);
     for (i=0; i<keys; i++)
       VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
     VOID(my_rwlock_init(&share->mmap_lock, NULL));

=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h	2008-11-24 07:48:21 +0000
+++ b/storage/maria/maria_def.h	2008-12-05 22:21:45 +0000
@@ -362,7 +362,10 @@ typedef struct st_maria_share
   myf write_flag;
   enum data_file_type data_file_type;
   enum pagecache_page_type page_type;   /* value depending transactional */
-  uint8 in_checkpoint;               /**< if Checkpoint looking at table */
+  /**
+     if Checkpoint looking at table; protected by close_lock or THR_LOCK_maria
+  */
+  uint8 in_checkpoint;
   my_bool temporary;
   /* Below flag is needed to make log tables work with concurrent insert */
   my_bool is_log_table;
@@ -389,6 +392,7 @@ typedef struct st_maria_share
   pthread_mutex_t intern_lock;		/* Locking for use with _locking */
   pthread_mutex_t key_del_lock;
   pthread_cond_t  key_del_cond;
+  pthread_mutex_t close_lock;     /**< _always_ held while closing table */
 #endif
   my_off_t mmaped_length;
   uint nonmmaped_inserts;		/* counter of writing in

Thread
bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2717) Bug#41159Guilhem Bichot5 Dec
  • Re: bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2717)Bug#41159Sergei Golubchik8 Dec