List:Maria Storage Engine« Previous MessageNext Message »
From:Guilhem Bichot Date:October 14 2008 9:16am
Subject:bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2676) Bug#39210
View as plain text  
#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/

 2676 Guilhem Bichot	2008-10-14
      Fix for BUG#39210 "Maria deadlock in _ma_bitmap_wait_or_flush". It was a thread
      which nobody woke up (see comment of ma_bitmap.c). No testcase, this requires
      multiple threads and is automatically tested at push time by maria_stress.yy (pushbuild2).
modified:
  storage/maria/ma_bitmap.c
  storage/maria/ma_blockrec.c
  storage/maria/ma_blockrec.h

per-file messages:
  storage/maria/ma_bitmap.c
    * _ma_bitmap_wait_or_flush() didn't publish that it was waiting for bitmap to not
    be over-allocated (i.e. didn't modify bitmap->flush_all_requested) so nobody
    (_ma_bitmap_flushable(), _ma_bitmap_release_unused()) knew it had to wake it up
    => it stalled (BUG#39210). In fact the wait in _ma_bitmap_wait_or_flush()
    is not needed, it's ok if this function sends the over-allocated bitmap to page
    cache and keeps pin on it (_ma_bitmap_unpin_all() will unpin it later, and
    the one who added _ma_bitmap_wait_or_flush() didn't know it). Function
    is thus deleted, as _ma_bitmap_flush() can do its job.
    * After fixing that, test runs longer and BUG 39665 happens, which looks like
    a separate page cache bug.
    * Smaller changes: _ma_bitmap_flush_all() called write_changed_bitmap() even
    though it might not be changed; added some DBUG calls in functions; split
    assertions.
    * In _ma_bitmap_release_unused(), it's more logical to test non_flushable_state
    than now_transactional to know if we have to decrement non_flushable
    (it's exactly per the definition of non_flushable_state).
  storage/maria/ma_blockrec.c
    _ma_bitmap_wait_or_flush() is not needed.
  storage/maria/ma_blockrec.h
    _ma_bitmap_wait_or_flush() is not needed.
=== modified file 'storage/maria/ma_bitmap.c'
--- a/storage/maria/ma_bitmap.c	2008-04-03 13:40:25 +0000
+++ b/storage/maria/ma_bitmap.c	2008-10-14 09:16:41 +0000
@@ -281,6 +281,14 @@ my_bool _ma_bitmap_end(MARIA_SHARE *shar
     by this thread (ie, checking the changed flag is ok). The reason we
     check it again in the mutex is that if someone else did a flush at the
     same time, we don't have to do the write.
+    This is also ok for _ma_scan_init_block_record() which does not want to
+    miss rows: it cares only for committed rows, that is, rows for which there
+    was a commit before our transaction started; as commit and transaction's
+    start are protected by the same LOCK_trn_list mutex, we see memory at
+    least as new as at other transaction's commit time, so if the committed
+    rows caused bitmap->changed to be true, we see it; if we see 0 it really
+    means a flush happened since then. So, it's ok to read without bitmap's
+    mutex.
 
   RETURN
     0    ok
@@ -305,38 +313,6 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *sh
 }
 
 
-/*
-  @brief Send updated bitmap to the page cache if bitmap is free
-
-  @note
-  This is used by reader threads which don't unpin things
-*/
-
-my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share)
-{
-  my_bool res= 0;
-  MARIA_FILE_BITMAP *bitmap= &share->bitmap;
-  DBUG_ENTER("_ma_bitmap_flush");
-  if (bitmap->changed)
-  {
-    pthread_mutex_lock(&bitmap->bitmap_lock);
-    while (bitmap->non_flushable && bitmap->changed)
-    {
-      DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
-      pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
-    }
-    if (bitmap->changed)
-    {
-      bitmap->changed= 0;
-      res= write_changed_bitmap(share, bitmap);
-    }
-    pthread_mutex_unlock(&bitmap->bitmap_lock);
-  }
-  DBUG_RETURN(res);
-}
-
-
-
 /**
    Dirty-page filtering criteria for bitmap pages
 
@@ -386,8 +362,11 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
       unpinned. We keep the mutex to preserve this situation, and flush to the
       file.
     */
-    res= write_changed_bitmap(share, bitmap);
-    bitmap->changed= FALSE;
+    if (bitmap->changed)
+    {
+      res= write_changed_bitmap(share, bitmap);
+      bitmap->changed= FALSE;
+    }
     /*
       We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
       pages have been flushed. That's a condition of correctness of
@@ -424,6 +403,8 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
 
   @return Operation status
     @retval   0   ok
+
+  @note This unpins pages pinned by other threads.
 */
 
 static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
@@ -2139,8 +2120,8 @@ my_bool _ma_bitmap_set_full_page_bits(MA
    function first waits for the flush to be done.
 
    @note
-   info->non_flushable_state is set to 1 if we have incremented
-   bitmap->info->non_flushable and not yet decremented it.
+   this sets info->non_flushable_state to 1 if we have incremented
+   bitmap->non_flushable and not yet decremented it.
 
    @param  share               Table's share
    @param  non_flushable_inc   Increment of MARIA_FILE_BITMAP::non_flushable
@@ -2151,20 +2132,21 @@ void _ma_bitmap_flushable(MARIA_HA *info
 {
   MARIA_SHARE *share= info->s;
   MARIA_FILE_BITMAP *bitmap;
+  DBUG_ENTER("_ma_bitmap_flushable");
 
   /*
     Not transactional tables are never automaticly flushed and needs no
     protection
   */
   if (!share->now_transactional)
-    return;
+    DBUG_VOID_RETURN;
 
   bitmap= &share->bitmap;
   if (non_flushable_inc == -1)
   {
     pthread_mutex_lock(&bitmap->bitmap_lock);
-    DBUG_ASSERT((int) bitmap->non_flushable > 0 &&
-                info->non_flushable_state == 1);
+    DBUG_ASSERT((int) bitmap->non_flushable > 0);
+    DBUG_ASSERT(info->non_flushable_state == 1);
     info->non_flushable_state= 0;
     if (--bitmap->non_flushable == 0)
     {
@@ -2182,14 +2164,15 @@ void _ma_bitmap_flushable(MARIA_HA *info
     }
     DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
     pthread_mutex_unlock(&bitmap->bitmap_lock);
-    return;
+    DBUG_VOID_RETURN;
   }
-  DBUG_ASSERT(non_flushable_inc == 1 && info->non_flushable_state == 0);
+  DBUG_ASSERT(non_flushable_inc == 1);
+  DBUG_ASSERT(info->non_flushable_state == 0);
   /* It is a read without mutex because only an optimization */
   if (unlikely(bitmap->flush_all_requested))
   {
     /*
-      _ma_bitmap_flush_all() is waiting for the bitmap to become
+      Some other thread is waiting for the bitmap to become
       flushable. Not the moment to make the bitmap unflushable or more
       unflushable; let's rather back off and wait. If we didn't do this, with
       multiple writers, there may always be one thread causing the bitmap to
@@ -2214,6 +2197,7 @@ void _ma_bitmap_flushable(MARIA_HA *info
   bitmap->non_flushable++;
   info->non_flushable_state= 1;
   DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2321,10 +2305,10 @@ my_bool _ma_bitmap_release_unused(MARIA_
       goto err;
   }
 
-  if (info->s->now_transactional)
+  /* This duplicates ma_bitmap_flushable(-1) except it already has mutex */
+  if (info->non_flushable_state)
   {
-    DBUG_ASSERT((int) bitmap->non_flushable >= 0 &&
-                info->non_flushable_state);
+    DBUG_ASSERT((int) bitmap->non_flushable >= 0);
     info->non_flushable_state= 0;
     if (--bitmap->non_flushable == 0)
     {

=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c	2008-10-09 20:03:54 +0000
+++ b/storage/maria/ma_blockrec.c	2008-10-14 09:16:41 +0000
@@ -4982,10 +4982,12 @@ my_bool _ma_scan_init_block_record(MARIA
   info->scan.bitmap_pos= info->scan.bitmap_end;
   info->scan.bitmap_page= (pgcache_page_no_t) 0 - share->bitmap.pages_covered;
   /*
-    We have to flush bitmap as we will read the bitmap from the page cache
-    while scanning rows
+    We need to flush what's in memory (bitmap.map) to page cache otherwise, as
+    we are going to read bitmaps from page cache in table scan (see
+    _ma_scan_block_record()), we may miss recently inserted rows (bitmap page
+    in page cache would be too old).
   */
-  DBUG_RETURN(_ma_bitmap_wait_or_flush(info->s));
+  DBUG_RETURN(_ma_bitmap_wait_flush(info->s));
 }
 
 

=== modified file 'storage/maria/ma_blockrec.h'
--- a/storage/maria/ma_blockrec.h	2008-04-10 02:26:36 +0000
+++ b/storage/maria/ma_blockrec.h	2008-10-14 09:16:41 +0000
@@ -182,7 +182,6 @@ maria_page_get_lsn(uchar *page, pgcache_
 my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
 my_bool _ma_bitmap_end(MARIA_SHARE *share);
 my_bool _ma_bitmap_flush(MARIA_SHARE *share);
-my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share);
 my_bool _ma_bitmap_flush_all(MARIA_SHARE *share);
 void _ma_bitmap_reset_cache(MARIA_SHARE *share);
 my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,

Thread
bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2676) Bug#39210Guilhem Bichot14 Oct