List:Maria Storage Engine« Previous MessageNext Message »
From:Michael Widenius Date:August 26 2008 12:35pm
Subject:bzr commit into MySQL/Maria:mysql-maria branch (monty:2666) Bug#36499
View as plain text  
#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/

 2666 Michael Widenius	2008-08-26
      Fix for Bug#36499 Maria: potential deadlock
      This was done by introducing another mutex for handling the key_del link
      I also renamed all key_del variables to start with key_del prefix
modified:
  storage/maria/ma_close.c
  storage/maria/ma_key_recover.c
  storage/maria/ma_key_recover.h
  storage/maria/ma_open.c
  storage/maria/ma_page.c
  storage/maria/ma_write.c
  storage/maria/maria_def.h

per-file messages:
  storage/maria/ma_close.c
    Rename of key_del variables
  storage/maria/ma_key_recover.c
    Changed key_del to be protexted by it's own mutex: key_del_lock
    Rename of key_del variables
    Removed comment for old bug
  storage/maria/ma_key_recover.h
    Rename of key_del variables
  storage/maria/ma_open.c
    Initialization for new key_del_lock mutex
    Renamed intern_cond to key_del_cond as it was only used for protection of key_del
  storage/maria/ma_page.c
    Rename of key_del variables
  storage/maria/ma_write.c
    Rename of key_del variables
  storage/maria/maria_def.h
    Rename of key_del variables
    Added key_del_lock
=== modified file 'storage/maria/ma_close.c'
--- a/storage/maria/ma_close.c	2008-08-18 22:21:22 +0000
+++ b/storage/maria/ma_close.c	2008-08-26 12:34:57 +0000
@@ -33,7 +33,7 @@ int maria_close(register MARIA_HA *info)
                       (uint) share->tot_locks));
 
   /* Check that we have unlocked key delete-links properly */
-  DBUG_ASSERT(info->used_key_del == 0);
+  DBUG_ASSERT(info->key_del_used == 0);
 
   pthread_mutex_lock(&THR_LOCK_maria);
   if (info->lock_type == F_EXTRA_LCK)

=== modified file 'storage/maria/ma_key_recover.c'
--- a/storage/maria/ma_key_recover.c	2008-07-09 09:02:27 +0000
+++ b/storage/maria/ma_key_recover.c	2008-08-26 12:34:57 +0000
@@ -203,16 +203,6 @@ my_bool write_hook_for_undo_key(enum tra
     (struct st_msg_to_write_hook_for_undo_key *) hook_arg;
 
   *msg->root= msg->value;
-  /**
-    @todo BUG
-    so we have log mutex and then intern_lock.
-    While in checkpoint we have intern_lock and then log mutex, like when we
-    flush bitmap (flushing bitmap pages can call hook which takes log mutex);
-    and in _ma_update_state_lsns_sub() this is the same.
-    So we can deadlock.
-    Another one is that in translog_assign_id_to_share() we have intern_lock
-    and then log mutex.
-  */
   _ma_fast_unlock_key_del(tbl_info);
   return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
 }
@@ -1209,14 +1199,14 @@ my_bool _ma_apply_undo_key_delete(MARIA_
   @note
     To allow higher concurrency in the common case where we do inserts
     and we don't have any linked blocks we do the following:
-    - Mark in info->used_key_del that we are not using key_del
+    - Mark in info->key_del_used that we are not using key_del
     - Return at once (without marking key_del as used)
 
-    This is safe as we in this case don't write current_key_del into
+    This is safe as we in this case don't write key_del_current into
     the redo log and during recover we are not updating key_del.
 
   @retval 1  Use page at end of file
-  @retval 0  Use page at share->current_key_del
+  @retval 0  Use page at share->key_del_current
 */
 
 my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
@@ -1224,68 +1214,72 @@ my_bool _ma_lock_key_del(MARIA_HA *info,
   MARIA_SHARE *share= info->s;
 
   /*
-    info->used_key_del is 0 initially.
+    info->key_del_used is 0 initially.
     If the caller needs a block (_ma_new()), we look at the free list:
     - looks empty? then caller will create a new block at end of file and
-    remember (through info->used_key_del==2) that it will not change
+    remember (through info->key_del_used==2) that it will not change
     state.key_del and does not need to wake up waiters as nobody will wait for
     it.
     - non-empty? then we wait for other users of the state.key_del list to
-    have finished, then we lock this list (through share->used_key_del==1)
+    have finished, then we lock this list (through share->key_del_used==1)
     because we need to prevent some other thread to also read state.key_del
-    and use the same page as ours. We remember through info->used_key_del==1
+    and use the same page as ours. We remember through info->key_del_used==1
     that we will have to set state.key_del at unlock time and wake up
     waiters.
     If the caller wants to free a block (_ma_dispose()), "empty" and
     "non-empty" are treated as "non-empty" is treated above.
-    When we are ready to unlock, we copy share->current_key_del into
+    When we are ready to unlock, we copy share->key_del_current into
     state.key_del. Unlocking happens when writing the UNDO log record, that
     can make a long lock time.
     Why we wrote "*looks* empty": because we are looking at state.key_del
-    which may be slightly old (share->current_key_del may be more recent and
+    which may be slightly old (share->key_del_current may be more recent and
     exact): when we want a new page, we tolerate to treat "there was no free
     page 1 millisecond ago"  as "there is no free page". It's ok to non-pop
     (_ma_new(), page will be found later anyway) but it's not ok to non-push
     (_ma_dispose(), page would be lost).
-    When we leave this function, info->used_key_del is always 1 or 2.
+    When we leave this function, info->key_del_used is always 1 or 2.
   */
-  if (info->used_key_del != 1)
+  if (info->key_del_used != 1)
   {
-    pthread_mutex_lock(&share->intern_lock);
+    pthread_mutex_lock(&share->key_del_lock);
     if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end)
     {
-      pthread_mutex_unlock(&share->intern_lock);
-      info->used_key_del= 2;                  /* insert-with-append */
+      pthread_mutex_unlock(&share->key_del_lock);
+      info->key_del_used= 2;                  /* insert-with-append */
       return 1;
     }
 #ifdef THREAD
-    while (share->used_key_del)
-      pthread_cond_wait(&share->intern_cond, &share->intern_lock);
+    while (share->key_del_used)
+      pthread_cond_wait(&share->key_del_cond, &share->key_del_lock);
 #endif
-    info->used_key_del= 1;
-    share->used_key_del= 1;
-    share->current_key_del= share->state.key_del;
-    pthread_mutex_unlock(&share->intern_lock);
+    info->key_del_used= 1;
+    share->key_del_used= 1;
+    share->key_del_current= share->state.key_del;
+    pthread_mutex_unlock(&share->key_del_lock);
   }
-  return share->current_key_del == HA_OFFSET_ERROR;
+  return share->key_del_current == HA_OFFSET_ERROR;
 }
 
 
 /**
   @brief copy changes to key_del and unlock it
+
+  @notes
+  In case of many threads using the maria table, we always have a lock
+  on the translog when comming here.
 */
 
 void _ma_unlock_key_del(MARIA_HA *info)
 {
-  DBUG_ASSERT(info->used_key_del);
-  if (info->used_key_del == 1)                  /* Ignore insert-with-append */
+  DBUG_ASSERT(info->key_del_used);
+  if (info->key_del_used == 1)                  /* Ignore insert-with-append */
   {
     MARIA_SHARE *share= info->s;
-    pthread_mutex_lock(&share->intern_lock);
-    share->used_key_del= 0;
-    share->state.key_del= share->current_key_del;
-    pthread_mutex_unlock(&share->intern_lock);
-    pthread_cond_signal(&share->intern_cond);
+    pthread_mutex_lock(&share->key_del_lock);
+    share->key_del_used= 0;
+    share->state.key_del= share->key_del_current;
+    pthread_mutex_unlock(&share->key_del_lock);
+    pthread_cond_signal(&share->key_del_cond);
   }
-  info->used_key_del= 0;
+  info->key_del_used= 0;
 }

=== modified file 'storage/maria/ma_key_recover.h'
--- a/storage/maria/ma_key_recover.h	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_key_recover.h	2008-08-26 12:34:57 +0000
@@ -114,6 +114,6 @@ extern my_bool _ma_lock_key_del(MARIA_HA
 extern void _ma_unlock_key_del(MARIA_HA *info);
 static inline void _ma_fast_unlock_key_del(MARIA_HA *info)
 {
-  if (info->used_key_del)
+  if (info->key_del_used)
     _ma_unlock_key_del(info);
 }

=== modified file 'storage/maria/ma_open.c'
--- a/storage/maria/ma_open.c	2008-08-25 11:49:47 +0000
+++ b/storage/maria/ma_open.c	2008-08-26 12:34:57 +0000
@@ -807,8 +807,9 @@ MARIA_HA *maria_open(const char *name, i
     }
 #ifdef THREAD
     thr_lock_init(&share->lock);
-    VOID(pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST));
-    VOID(pthread_cond_init(&share->intern_cond, 0));
+    pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
+    pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
+    pthread_cond_init(&share->key_del_cond, 0);
     for (i=0; i<keys; i++)
       VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
     VOID(my_rwlock_init(&share->mmap_lock, NULL));
@@ -1215,6 +1216,13 @@ uint _ma_state_info_write(MARIA_SHARE *s
                            (should only be needed after ALTER TABLE
                            ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE).
 
+   @notes
+     For transactional multiuser tables, this function is called
+     with intern_lock & translog_lock or when the last thread who
+     is using the table is closing it.
+     Because of the translog_lock we don't need to have a lock on
+     key_del_lock.
+
    @return Operation status
      @retval 0      OK
      @retval 1      Error

=== modified file 'storage/maria/ma_page.c'
--- a/storage/maria/ma_page.c	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_page.c	2008-08-26 12:34:57 +0000
@@ -223,8 +223,8 @@ int _ma_dispose(register MARIA_HA *info,
 
   (void) _ma_lock_key_del(info, 0);
 
-  old_link= share->current_key_del;
-  share->current_key_del= pos;
+  old_link= share->key_del_current;
+  share->key_del_current= pos;
   page_no= pos / block_size;
   bzero(buff, share->keypage_header);
   _ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR);
@@ -347,7 +347,7 @@ my_off_t _ma_new(register MARIA_HA *info
   else
   {
     uchar *buff;
-    pos= share->current_key_del;                /* Protected */
+    pos= share->key_del_current;                /* Protected */
     DBUG_ASSERT(share->pagecache->block_size == block_size);
     if (!(buff= pagecache_read(share->pagecache,
                                &share->kfile,
@@ -362,15 +362,15 @@ my_off_t _ma_new(register MARIA_HA *info
         (single linked list):
       */
 #ifndef DBUG_OFF
-      my_off_t current_key_del;
+      my_off_t key_del_current;
 #endif
-      share->current_key_del= mi_sizekorr(buff+share->keypage_header);
+      share->key_del_current= mi_sizekorr(buff+share->keypage_header);
 #ifndef DBUG_OFF
-      current_key_del= share->current_key_del;
-      DBUG_ASSERT(current_key_del != share->state.key_del &&
-                  (current_key_del != 0) &&
-                  ((current_key_del == HA_OFFSET_ERROR) ||
-                   (current_key_del <=
+      key_del_current= share->key_del_current;
+      DBUG_ASSERT(key_del_current != share->state.key_del &&
+                  (key_del_current != 0) &&
+                  ((key_del_current == HA_OFFSET_ERROR) ||
+                   (key_del_current <=
                     (share->state.state.key_file_length - block_size))));
 #endif
     }

=== modified file 'storage/maria/ma_write.c'
--- a/storage/maria/ma_write.c	2008-08-25 11:49:47 +0000
+++ b/storage/maria/ma_write.c	2008-08-26 12:34:57 +0000
@@ -1757,11 +1757,11 @@ my_bool _ma_log_new(MARIA_HA *info, my_o
   page_store(log_data + FILEID_STORE_SIZE, page);
 
   /* Store link to next unused page */
-  if (info->used_key_del == 2)
+  if (info->key_del_used == 2)
     page= 0;                                    /* key_del not changed */
   else
-    page= ((share->current_key_del == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
-           share->current_key_del / share->block_size);
+    page= ((share->key_del_current == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
+           share->key_del_current / share->block_size);
 
   page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
   key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr);

=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h	2008-08-25 11:49:47 +0000
+++ b/storage/maria/maria_def.h	2008-08-26 12:34:57 +0000
@@ -336,7 +336,7 @@ typedef struct st_maria_share
   size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
   size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
   invalidator_by_filename invalidator;	/* query cache invalidator */
-  my_off_t current_key_del;		/* delete links for index pages */
+  my_off_t key_del_current;		/* delete links for index pages */
   ulong this_process;			/* processid */
   ulong last_process;			/* For table-change-check */
   ulong last_version;			/* Version on start */
@@ -380,12 +380,13 @@ typedef struct st_maria_share
   */
   my_bool now_transactional;
   my_bool have_versioning;
-  my_bool used_key_del;                         /* != 0 if key_del is locked */
+  my_bool key_del_used;                         /* != 0 if key_del is locked */
 #ifdef THREAD
   THR_LOCK lock;
   void (*lock_restore_status)(void *);
   pthread_mutex_t intern_lock;		/* Locking for use with _locking */
-  pthread_cond_t intern_cond;
+  pthread_mutex_t key_del_lock;
+  pthread_cond_t  key_del_cond;
 #endif
   my_off_t mmaped_length;
   uint nonmmaped_inserts;		/* counter of writing in
@@ -534,7 +535,7 @@ struct st_maria_handler
   int save_lastinx;
   uint preload_buff_size;		/* When preloading indexes */
   uint16 last_used_keyseg;              /* For MARIAMRG */
-  uint8 used_key_del;                   /* != 0 if key_del is used */
+  uint8 key_del_used;                   /* != 0 if key_del is used */
   my_bool was_locked;			/* Was locked in panic */
   my_bool append_insert_at_end;		/* Set if concurrent insert */
   my_bool quick_mode;

Thread
bzr commit into MySQL/Maria:mysql-maria branch (monty:2666) Bug#36499Michael Widenius26 Aug