List:Maria Storage Engine« Previous MessageNext Message »
From:Michael Widenius Date:December 9 2008 12:36pm
Subject:bzr commit into MySQL/Maria:mysql-maria branch (monty:2707)
View as plain text  
#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/

 2707 Michael Widenius	2008-12-09 [merge]
      Merge with main Maria tree
modified:
  storage/maria/ha_maria.cc
  storage/maria/ma_bitmap.c
  storage/maria/ma_blockrec.c
  storage/maria/ma_checkpoint.c
  storage/maria/ma_close.c
  storage/maria/ma_open.c
  storage/maria/ma_recovery.c
  storage/maria/ma_state.c
  storage/maria/ma_state.h
  storage/maria/maria_def.h
  storage/maria/trnman.c
  storage/maria/trnman_public.h

=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc	2008-12-03 04:07:50 +0000
+++ b/storage/maria/ha_maria.cc	2008-12-09 09:56:02 +0000
@@ -2365,6 +2365,10 @@ int ha_maria::external_lock(THD *thd, in
         We always re-enable, don't rely on thd->transaction.on as it is
         sometimes reset to true after unlocking (see mysql_truncate() for a
         partitioned table based on Maria).
+        Note that we can come here without having an exclusive lock on the
+        table, for example in this case:
+        external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
+        abortion; external_lock(F_UNLCK).
       */
       if (_ma_reenable_logging_for_table(file, TRUE))
         DBUG_RETURN(1);

=== modified file 'storage/maria/ma_bitmap.c'
--- a/storage/maria/ma_bitmap.c	2008-10-17 13:37:07 +0000
+++ b/storage/maria/ma_bitmap.c	2008-12-09 09:56:02 +0000
@@ -260,6 +260,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *sha
 my_bool _ma_bitmap_end(MARIA_SHARE *share)
 {
   my_bool res= _ma_bitmap_flush(share);
+  safe_mutex_assert_owner(&share->close_lock);
   pthread_mutex_destroy(&share->bitmap.bitmap_lock);
   pthread_cond_destroy(&share->bitmap.bitmap_cond);
   delete_dynamic(&share->bitmap.pinned_pages);

=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c	2008-12-05 21:11:46 +0000
+++ b/storage/maria/ma_blockrec.c	2008-12-09 09:56:02 +0000
@@ -453,7 +453,7 @@ my_bool _ma_once_end_block_record(MARIA_
   {
     /*
       We de-assign the id even though index has not been flushed, this is ok
-      as intern_lock serializes us with a Checkpoint looking at our share.
+      as close_lock serializes us with a Checkpoint looking at our share.
     */
     translog_deassign_id_from_share(share);
   }

=== modified file 'storage/maria/ma_checkpoint.c'
--- a/storage/maria/ma_checkpoint.c	2008-08-28 18:52:23 +0000
+++ b/storage/maria/ma_checkpoint.c	2008-12-09 09:56:02 +0000
@@ -784,10 +784,8 @@ static int collect_tables(LEX_STRING *st
         !(share->in_checkpoint & MARIA_CHECKPOINT_SEEN_IN_LOOP))
     {
       /*
-        Why we didn't take intern_lock above: table had in_checkpoint==0 so no
-        thread could set in_checkpoint. And no thread needs to know that we
-        are setting in_checkpoint, because only maria_close() needs it and
-        cannot run now as we hold THR_LOCK_maria.
+        Apart from us, only maria_close() reads/sets in_checkpoint but cannot
+        run now as we hold THR_LOCK_maria.
       */
       /*
         This table is relevant for checkpoint and not already seen. Mark it,
@@ -887,7 +885,10 @@ static int collect_tables(LEX_STRING *st
     my_bool ignore_share;
     if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
     {
-      /* No need for a mutex to read the above, only us can write this flag */
+      /*
+        No need for a mutex to read the above, only us can write *this* bit of
+        the in_checkpoint bitmap
+      */
       continue;
     }
     /**
@@ -956,6 +957,14 @@ static int collect_tables(LEX_STRING *st
 
     /* OS file descriptors are ints which we stored in 4 bytes */
     compile_time_assert(sizeof(int) <= 4);
+    /*
+      Protect against maria_close() (which does some memory freeing in
+      MARIA_FILE_BITMAP) with close_lock. intern_lock is not
+      sufficient as we, as well as maria_close(), are going to unlock
+      intern_lock in the middle of manipulating the table. Serializing us and
+      maria_close() should help avoid problems.
+    */
+    pthread_mutex_lock(&share->close_lock);
     pthread_mutex_lock(&share->intern_lock);
     /*
       Tables in a normal state have their two file descriptors open.
@@ -1045,6 +1054,20 @@ static int collect_tables(LEX_STRING *st
           each checkpoint if the table was once written and then not anymore.
         */
       }
+    }
+    /*
+      _ma_bitmap_flush_all() may wait, so don't keep intern_lock as
+      otherwise this would deadlock with allocate_and_write_block_record()
+      calling _ma_set_share_data_file_length()
+    */
+    pthread_mutex_unlock(&share->intern_lock);
+    
+    if (!ignore_share)
+    {
+      /*
+        share->bitmap is valid because it's destroyed under close_lock which
+        we hold.
+      */
       if (_ma_bitmap_flush_all(share))
       {
         sync_error= 1;
@@ -1057,23 +1080,28 @@ static int collect_tables(LEX_STRING *st
       Clean up any unused states.
       TODO: Only do this call if there has been # (10?) ended transactions
       since last call.
+      We had to release intern_lock to respect lock order with LOCK_trn_list.
     */
-    pthread_mutex_unlock(&share->intern_lock);
-    _ma_remove_not_visible_states_with_lock(share);
-    pthread_mutex_lock(&share->intern_lock);
+    _ma_remove_not_visible_states_with_lock(share, FALSE);
 
     if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
     {
-      /* maria_close() left us to free the share */
-      pthread_mutex_unlock(&share->intern_lock);
+      /*
+        maria_close() left us free the share. When it run it set share->id
+        to 0. As it run before we locked close_lock, we should have seen this
+        and so this assertion should be true:
+      */
+      DBUG_ASSERT(ignore_share);
       pthread_mutex_destroy(&share->intern_lock);
+      pthread_mutex_unlock(&share->close_lock);
+      pthread_mutex_destroy(&share->close_lock);
       my_free((uchar *)share, MYF(0));
     }
     else
     {
       /* share goes back to normal state */
       share->in_checkpoint= 0;
-      pthread_mutex_unlock(&share->intern_lock);
+      pthread_mutex_unlock(&share->close_lock);
     }
 
     /*

=== modified file 'storage/maria/ma_close.c'
--- a/storage/maria/ma_close.c	2008-12-02 22:02:52 +0000
+++ b/storage/maria/ma_close.c	2008-12-09 09:56:02 +0000
@@ -47,6 +47,7 @@ int maria_close(register MARIA_HA *info)
     if (maria_lock_database(info,F_UNLCK))
       error=my_errno;
   }
+  pthread_mutex_lock(&share->close_lock);
   pthread_mutex_lock(&share->intern_lock);
 
   if (share->options & HA_OPTION_READ_ONLY_DATA)
@@ -125,22 +126,30 @@ int maria_close(register MARIA_HA *info)
     }
 #endif
     DBUG_ASSERT(share->now_transactional == share->base.born_transactional);
-    if (share->in_checkpoint == MARIA_CHECKPOINT_LOOKS_AT_ME)
+    /*
+      We assign -1 because checkpoint does not need to flush (in case we
+      have concurrent checkpoint if no then we do not need it here also)
+    */
+    share->kfile.file= -1;
+
+    /*
+      Remember share->history for future opens
+
+      We have to unlock share->intern_lock then lock it after
+      LOCK_trn_list (trnman_lock()) to avoid dead locks.
+    */
+    pthread_mutex_unlock(&share->intern_lock);
+    _ma_remove_not_visible_states_with_lock(share, TRUE);
+    pthread_mutex_lock(&share->intern_lock);
+
+    if (share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME)
     {
-      share->kfile.file= -1; /* because Checkpoint does not need to flush */
       /* we cannot my_free() the share, Checkpoint would see a bad pointer */
       share->in_checkpoint|= MARIA_CHECKPOINT_SHOULD_FREE_ME;
     }
     else
       share_can_be_freed= TRUE;
 
-    /*
-      Remember share->history for future opens
-      Here we take share->intern_lock followed by trans_lock but this is
-      safe as no other thread one can use 'share' here.
-    */
-    share->state_history= _ma_remove_not_visible_states(share->state_history,
-                                                        1, 0);
     if (share->state_history)
     {
       MARIA_STATE_HISTORY_CLOSED *history;
@@ -161,10 +170,17 @@ int maria_close(register MARIA_HA *info)
   }
   pthread_mutex_unlock(&THR_LOCK_maria);
   pthread_mutex_unlock(&share->intern_lock);
+  pthread_mutex_unlock(&share->close_lock);
   if (share_can_be_freed)
   {
     (void) pthread_mutex_destroy(&share->intern_lock);
+    (void) pthread_mutex_destroy(&share->close_lock);
     my_free((uchar *)share, MYF(0));
+    /*
+      If share cannot be freed, it's because checkpoint has previously
+      recorded to include this share in the checkpoint and so is soon going to
+      look at some of its content (share->in_checkpoint/id/last_version).
+    */
   }
   my_free(info->ftparser_param, MYF(MY_ALLOW_ZERO_PTR));
   if (info->dfile.file >= 0)

=== modified file 'storage/maria/ma_open.c'
--- a/storage/maria/ma_open.c	2008-12-02 15:29:59 +0000
+++ b/storage/maria/ma_open.c	2008-12-09 09:56:02 +0000
@@ -819,6 +819,7 @@ MARIA_HA *maria_open(const char *name, i
     pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
     pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
     pthread_cond_init(&share->key_del_cond, 0);
+    pthread_mutex_init(&share->close_lock, MY_MUTEX_INIT_FAST);
     for (i=0; i<keys; i++)
       VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
     VOID(my_rwlock_init(&share->mmap_lock, NULL));

=== modified file 'storage/maria/ma_recovery.c'
--- a/storage/maria/ma_recovery.c	2008-12-05 21:11:46 +0000
+++ b/storage/maria/ma_recovery.c	2008-12-09 09:56:02 +0000
@@ -3276,6 +3276,8 @@ void _ma_tmp_disable_logging_for_table(M
 /**
    Re-enables logging for a table which had it temporarily disabled.
 
+   Only the thread which disabled logging is allowed to reenable it.
+
    @param  info            table
    @param  flush_pages     if function needs to flush pages first
 */

=== modified file 'storage/maria/ma_state.c'
--- a/storage/maria/ma_state.c	2008-12-08 18:31:16 +0000
+++ b/storage/maria/ma_state.c	2008-12-09 12:36:51 +0000
@@ -168,7 +168,6 @@ MARIA_STATE_HISTORY
 
   if (all && parent == &org_history->next)
   {
-    DBUG_ASSERT(trnman_is_locked == 0);
     /* There is only one state left. Delete this if it's visible for all */
     if (last_trid < trnman_get_min_trid())
     {
@@ -183,6 +182,11 @@ MARIA_STATE_HISTORY
 /**
    @brief Remove not used state history
 
+   @param share          Maria table information
+   @param all            1 if we should delete the first state if it's
+                         visible for all.  For the moment this is only used
+                         on close() of table.
+
    @notes
    share and trnman are not locked.
 
@@ -191,14 +195,19 @@ MARIA_STATE_HISTORY
    takes share->intern_lock.
 */
 
-void _ma_remove_not_visible_states_with_lock(MARIA_SHARE *share)
+void _ma_remove_not_visible_states_with_lock(MARIA_SHARE *share,
+                                             my_bool all)
 {
-  trnman_lock();
+  my_bool is_lock_trman;
+  if ((is_lock_trman= trman_is_inited()))
+    trnman_lock();
+
   pthread_mutex_lock(&share->intern_lock);
-  share->state_history=  _ma_remove_not_visible_states(share->state_history, 0,
-                                                       1);
+  share->state_history=  _ma_remove_not_visible_states(share->state_history,
+                                                       all, 1);
   pthread_mutex_unlock(&share->intern_lock);
-  trnman_unlock();
+  if (is_lock_trman)
+    trnman_unlock();
 }
 
 

=== modified file 'storage/maria/ma_state.h'
--- a/storage/maria/ma_state.h	2008-11-03 13:53:22 +0000
+++ b/storage/maria/ma_state.h	2008-12-08 20:09:59 +0000
@@ -78,6 +78,7 @@ my_bool _ma_trnman_end_trans_hook(TRN *t
 my_bool _ma_row_visible_always(MARIA_HA *info);
 my_bool _ma_row_visible_non_transactional_table(MARIA_HA *info);
 my_bool _ma_row_visible_transactional_table(MARIA_HA *info);
-void _ma_remove_not_visible_states_with_lock(struct st_maria_share *share);
+void _ma_remove_not_visible_states_with_lock(struct st_maria_share *share,
+                                             my_bool all);
 void _ma_remove_table_from_trnman(struct st_maria_share *share, TRN *trn);
 void _ma_reset_history(struct st_maria_share *share);

=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h	2008-11-24 07:48:21 +0000
+++ b/storage/maria/maria_def.h	2008-12-09 09:56:02 +0000
@@ -353,7 +353,7 @@ typedef struct st_maria_share
   PAGECACHE_FILE kfile;			/* Shared keyfile */
   File data_file;			/* Shared data file */
   int mode;				/* mode of file on open */
-  uint reopen;				/* How many times reopened */
+  uint reopen;				/* How many times opened */
   uint in_trans;                        /* Number of references by trn */
   uint w_locks, r_locks, tot_locks;	/* Number of read/write locks */
   uint block_size;			/* block_size of keyfile & data file*/
@@ -362,7 +362,10 @@ typedef struct st_maria_share
   myf write_flag;
   enum data_file_type data_file_type;
   enum pagecache_page_type page_type;   /* value depending transactional */
-  uint8 in_checkpoint;               /**< if Checkpoint looking at table */
+  /**
+     if Checkpoint looking at table; protected by close_lock or THR_LOCK_maria
+  */
+  uint8 in_checkpoint;
   my_bool temporary;
   /* Below flag is needed to make log tables work with concurrent insert */
   my_bool is_log_table;
@@ -386,9 +389,20 @@ typedef struct st_maria_share
 #ifdef THREAD
   THR_LOCK lock;
   void (*lock_restore_status)(void *);
-  pthread_mutex_t intern_lock;		/* Locking for use with _locking */
+  /**
+    Protects kfile, dfile, most members of the state, state disk writes,
+    versioning information (like in_trans, state_history).
+    @todo find the exhaustive list.
+  */
+  pthread_mutex_t intern_lock;	
   pthread_mutex_t key_del_lock;
   pthread_cond_t  key_del_cond;
+  /**
+    _Always_ held while closing table; prevents checkpoint from looking at
+    structures freed during closure (like bitmap). If you need close_lock and
+    intern_lock, lock them in this order.
+  */
+  pthread_mutex_t close_lock;
 #endif
   my_off_t mmaped_length;
   uint nonmmaped_inserts;		/* counter of writing in

=== modified file 'storage/maria/trnman.c'
--- a/storage/maria/trnman.c	2008-12-09 10:29:33 +0000
+++ b/storage/maria/trnman.c	2008-12-09 12:36:51 +0000
@@ -881,6 +881,7 @@ my_bool trnman_exists_active_transaction
 
   if (!trnman_is_locked)
     pthread_mutex_lock(&LOCK_trn_list);
+  safe_mutex_assert_owner(&LOCK_trn_list);
   for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
   {
     /*
@@ -909,6 +910,7 @@ void trnman_lock()
   pthread_mutex_lock(&LOCK_trn_list);
 }
 
+
 /**
    unlock transaction list
 */
@@ -917,3 +919,13 @@ void trnman_unlock()
 {
   pthread_mutex_unlock(&LOCK_trn_list);
 }
+
+
+/**
+  Is trman initialized
+*/
+
+my_bool trman_is_inited()
+{
+  return (short_trid_to_active_trn != NULL);
+}

=== modified file 'storage/maria/trnman_public.h'
--- a/storage/maria/trnman_public.h	2008-08-07 20:57:25 +0000
+++ b/storage/maria/trnman_public.h	2008-12-08 20:09:59 +0000
@@ -69,5 +69,6 @@ my_bool trnman_exists_active_transaction
 #define transid_korr(P) uint6korr(P)
 void trnman_lock();
 void trnman_unlock();
+my_bool trman_is_inited();
 C_MODE_END
 #endif

Thread
bzr commit into MySQL/Maria:mysql-maria branch (monty:2707) Michael Widenius9 Dec