#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/
2677 Guilhem Bichot 2008-10-13
Fix for BUG#39210 "Maria deadlock in _ma_bitmap_wait_or_flush". It was actually a story of
threads which nobody woke up (see comment of ma_blockrec.c). No testcase, this requires
multiple threads and is automatically tested at push time by maria_stress.yy (pushbuild2).
modified:
storage/maria/ma_bitmap.c
storage/maria/ma_blockrec.c
storage/maria/maria_def.h
per-file messages:
storage/maria/ma_bitmap.c
_ma_bitmap_wait_or_flush() didn't publish that it was waiting for bitmap to not
be over-allocated (i.e. didn't modify bitmap->flush_all_requested) so nobody
(_ma_bitmap_flushable(), _ma_bitmap_release_unused()) knew it had to wake it up => it stalled.
After fixing that, this still stalled sometimes, because there can be two concurrent
threads waiting for the bitmap to not be over-allocated (full table scan and checkpoint) so
bitmap->flush_all_requested must be a counter, not just a boolean.
After fixing that, this still stalled sometimes, because _ma_bitmap_flushable(1)
(going to over-allocate the bitmap) intentionally waits for those who are waiting
for the bitmap to not be over-allocated: in return, those must wake up
_ma_bitmap_flushable(1), but _ma_bitmap_wait_or_flush() didn't do so.
After fixing that, a thread which unpins bitmap pages in _ma_bitmap_unpin_all() sometimes
hit an assertion in the page cache which states that you can unpin/unlock only what
*you* have pinned/locked. Fixed by changing the pagecache_unlock_by_link() call
in _ma_bitmap_unpin_all().
After fixing that, we hit BUG 39665 :)
storage/maria/ma_blockrec.c
split assertion in two, I see the second half fire (filed as BUG 39665)
storage/maria/maria_def.h
At one moment, there can be more than one thread waiting for the bitmap to not be over-allocated
(table scan and checkpoint are those threads).
=== modified file 'storage/maria/ma_bitmap.c'
--- a/storage/maria/ma_bitmap.c 2008-10-13 15:50:28 +0000
+++ b/storage/maria/ma_bitmap.c 2008-10-13 21:03:05 +0000
@@ -306,32 +306,50 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *sh
}
-/*
- @brief Send updated bitmap to the page cache if bitmap is free
-
- @note
- This is used by reader threads which don't unpin things
+/**
+ Wait for bitmap page to not be over-allocated and send it to page cache
*/
my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share)
{
my_bool res= 0;
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
- DBUG_ENTER("_ma_bitmap_flush");
+ DBUG_ENTER("_ma_bitmap_wait_or_flush");
+ /*
+ We need to flush what's in memory (bitmap.map) to page cache otherwise, as
+ we are going to read bitmaps from page cache in table scan (see
+ _ma_scan_block_record()), we may miss recently inserted rows (page in page
+ cache would be too old). This matters only for committed rows, that is,
+ rows for which there was a commit before our transaction started; as
+ commit and transaction's start are protected by the same mutex, we see
+ memory at least as new as at other transaction's commit time, so if the
+ committed rows caused bitmap->changed to be true, we see it; if we see 0
+ it really means a flush happened since then. So, read without bitmap's
+ mutex is safe here.
+ */
if (bitmap->changed)
{
pthread_mutex_lock(&bitmap->bitmap_lock);
+ /*
+ The following wait is not for correctness (we could just go directly
+ into write_changed_bitmap() and thus pin the unflushable bitmap page),
+ but as pin means write-lock it could hurt concurrency.
+ */
+ bitmap->flush_all_requested++;
while (bitmap->non_flushable && bitmap->changed)
{
DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
}
+ bitmap->flush_all_requested--;
if (bitmap->changed)
{
bitmap->changed= 0;
res= write_changed_bitmap(share, bitmap);
}
pthread_mutex_unlock(&bitmap->bitmap_lock);
+ DBUG_PRINT("info", ("bitmap flusher waking up others"));
+ pthread_cond_broadcast(&bitmap->bitmap_cond);
}
DBUG_RETURN(res);
}
@@ -373,7 +391,7 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
pthread_mutex_lock(&bitmap->bitmap_lock);
if (bitmap->changed)
{
- bitmap->flush_all_requested= TRUE;
+ bitmap->flush_all_requested++;
#ifndef WRONG_BITMAP_FLUSH
while (bitmap->non_flushable > 0)
{
@@ -381,14 +399,18 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
}
#endif
+ bitmap->flush_all_requested--;
/*
Bitmap is in a flushable state: its contents in memory are reflected by
log records (complete REDO-UNDO groups) and all bitmap pages are
unpinned. We keep the mutex to preserve this situation, and flush to the
file.
*/
- res= write_changed_bitmap(share, bitmap);
- bitmap->changed= FALSE;
+ if (bitmap->changed)
+ {
+ res= write_changed_bitmap(share, bitmap);
+ bitmap->changed= FALSE;
+ }
/*
We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
pages have been flushed. That's a condition of correctness of
@@ -405,7 +427,6 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
&bitmap->pages_covered) &
PCFLUSH_PINNED_AND_ERROR)
res= TRUE;
- bitmap->flush_all_requested= FALSE;
/*
Some well-behaved threads may be waiting for flush_all_requested to
become false, wake them up.
@@ -425,6 +446,8 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
@return Operation status
@retval 0 ok
+
+ @note This unlocks and unpins pages locked and pinned by other threads.
*/
static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
@@ -438,7 +461,7 @@ static void _ma_bitmap_unpin_all(MARIA_S
while (pinned_page-- != page_link)
pagecache_unlock_by_link(share->pagecache, pinned_page->link,
pinned_page->unlock, PAGECACHE_UNPIN,
- LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE, FALSE);
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE, TRUE);
bitmap->pinned_pages.elements= 0;
DBUG_VOID_RETURN;
}
@@ -2140,8 +2163,8 @@ my_bool _ma_bitmap_set_full_page_bits(MA
function first waits for the flush to be done.
@note
- info->non_flushable_state is set to 1 if we have incremented
- bitmap->info->non_flushable and not yet decremented it.
+ this sets info->non_flushable_state to 1 if we have incremented
+ bitmap->non_flushable and not yet decremented it.
@param share Table's share
@param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable
@@ -2152,20 +2175,21 @@ void _ma_bitmap_flushable(MARIA_HA *info
{
MARIA_SHARE *share= info->s;
MARIA_FILE_BITMAP *bitmap;
+ DBUG_ENTER("_ma_bitmap_flushable");
/*
Not transactional tables are never automaticly flushed and needs no
protection
*/
if (!share->now_transactional)
- return;
+ DBUG_VOID_RETURN;
bitmap= &share->bitmap;
if (non_flushable_inc == -1)
{
pthread_mutex_lock(&bitmap->bitmap_lock);
- DBUG_ASSERT((int) bitmap->non_flushable > 0 &&
- info->non_flushable_state == 1);
+ DBUG_ASSERT((int) bitmap->non_flushable > 0);
+ DBUG_ASSERT(info->non_flushable_state == 1);
info->non_flushable_state= 0;
if (--bitmap->non_flushable == 0)
{
@@ -2183,14 +2207,15 @@ void _ma_bitmap_flushable(MARIA_HA *info
}
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
pthread_mutex_unlock(&bitmap->bitmap_lock);
- return;
+ DBUG_VOID_RETURN;
}
- DBUG_ASSERT(non_flushable_inc == 1 && info->non_flushable_state == 0);
+ DBUG_ASSERT(non_flushable_inc == 1);
+ DBUG_ASSERT(info->non_flushable_state == 0);
/* It is a read without mutex because only an optimization */
if (unlikely(bitmap->flush_all_requested))
{
/*
- _ma_bitmap_flush_all() is waiting for the bitmap to become
+ Some other thread is waiting for the bitmap to become
flushable. Not the moment to make the bitmap unflushable or more
unflushable; let's rather back off and wait. If we didn't do this, with
multiple writers, there may always be one thread causing the bitmap to
@@ -2215,6 +2240,7 @@ void _ma_bitmap_flushable(MARIA_HA *info
bitmap->non_flushable++;
info->non_flushable_state= 1;
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+ DBUG_VOID_RETURN;
}
@@ -2322,10 +2348,12 @@ my_bool _ma_bitmap_release_unused(MARIA_
goto err;
}
- if (info->s->now_transactional)
+ /*
+ This duplicates ma_bitmap_flushable(-1) except that it already has mutex.
+ */
+ if (info->non_flushable_state)
{
- DBUG_ASSERT((int) bitmap->non_flushable >= 0 &&
- info->non_flushable_state);
+ DBUG_ASSERT((int) bitmap->non_flushable >= 0);
info->non_flushable_state= 0;
if (--bitmap->non_flushable == 0)
{
=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c 2008-10-13 15:50:28 +0000
+++ b/storage/maria/ma_blockrec.c 2008-10-13 21:03:05 +0000
@@ -5141,7 +5141,9 @@ restart_record_read:
if (end_of_data > info->scan.dir_end ||
offset < PAGE_HEADER_SIZE || length < share->base.min_block_length)
{
- DBUG_ASSERT(0);
+ DBUG_ASSERT(!(end_of_data > info->scan.dir_end));
+ DBUG_ASSERT(!(offset < PAGE_HEADER_SIZE));
+ DBUG_ASSERT(!(length < share->base.min_block_length));
goto err;
}
#endif
=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h 2008-10-09 20:03:54 +0000
+++ b/storage/maria/maria_def.h 2008-10-13 21:03:05 +0000
@@ -242,7 +242,7 @@ typedef struct st_maria_file_bitmap
pgcache_page_no_t page; /* Page number for current bitmap */
uint used_size; /* Size of bitmap head that is not 0 */
my_bool changed; /* 1 if page needs to be flushed */
- my_bool flush_all_requested; /**< If _ma_bitmap_flush_all waiting */
+ uint flush_all_requested; /**< count of bitmap flushers */
uint non_flushable; /**< 0 if bitmap and log are in sync */
PAGECACHE_FILE file; /* datafile where bitmap is stored */
| Thread |
|---|
| • bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2677) Bug#39210 | Guilhem Bichot | 13 Oct |