List:Commits« Previous MessageNext Message »
From:Guilhem Bichot Date:October 20 2008 1:04pm
Subject:bzr commit into mysql-6.0 branch (guilhem:2865)
View as plain text  
#At file:///home/mysql_src/tmp/monty/repo/mysql-6.0-maria-tmp/

 2865 Guilhem Bichot	2008-10-20 [merge]
      Merge of 5.1-maria into 6.0
added:
  mysql-test/suite/maria/r/maria_partition.result
  mysql-test/suite/maria/t/maria_partition.test
  storage/maria/unittest/ma_pagecache_rwconsist2.c
modified:
  mysql-test/suite/maria/r/maria.result
  mysql-test/suite/maria/t/maria.test
  storage/maria/ha_maria.cc
  storage/maria/ma_bitmap.c
  storage/maria/ma_close.c
  storage/maria/ma_create.c
  storage/maria/ma_loghandler.c
  storage/maria/ma_pagecache.c
  storage/maria/ma_recovery.c
  storage/maria/ma_static.c
  storage/maria/unittest/CMakeLists.txt
  storage/maria/unittest/Makefile.am

=== modified file 'mysql-test/suite/maria/r/maria.result'
--- a/mysql-test/suite/maria/r/maria.result	2008-10-20 12:42:30 +0000
+++ b/mysql-test/suite/maria/r/maria.result	2008-10-20 13:03:34 +0000
@@ -2599,11 +2599,6 @@ ALTER TABLE t1 CHANGE c d varchar(10);
 affected rows: 0
 info: Records: 0  Duplicates: 0  Warnings: 0
 drop table t1;
-create table t1 (s1 int);
-insert into t1 values (1);
-alter table t1 partition by list (s1) (partition p1 values in (2));
-ERROR HY000: Table has no partition for value 1
-drop table t1;
 create table t1 (c1 int);
 create table t2 (c1 int);
 lock table t1 read, t2 read;

=== added file 'mysql-test/suite/maria/r/maria_partition.result'
--- a/mysql-test/suite/maria/r/maria_partition.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/r/maria_partition.result	2008-10-15 12:44:31 +0000
@@ -0,0 +1,12 @@
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_page_checksum=0;
+set global maria_log_file_size=4294967295;
+drop table if exists t1,t2;
+drop view if exists v1;
+SET SQL_WARNINGS=1;
+create table t1 (s1 int);
+insert into t1 values (1);
+alter table t1 partition by list (s1) (partition p1 values in (2));
+ERROR HY000: Table has no partition for value 1
+drop table t1;

=== modified file 'mysql-test/suite/maria/t/maria.test'
--- a/mysql-test/suite/maria/t/maria.test	2008-10-20 12:42:30 +0000
+++ b/mysql-test/suite/maria/t/maria.test	2008-10-20 13:03:34 +0000
@@ -1835,12 +1835,6 @@ delete from t1;
 unlock tables;
 drop table t1;
 
---disable_result_log
---disable_query_log
-eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum;
---enable_result_log
---enable_query_log
-
 #
 # Bug#39243 SELECT WHERE does not find row
 # (Problem with skip_row)
@@ -1886,16 +1880,6 @@ ALTER TABLE t1 CHANGE c d varchar(10);
 drop table t1;
 
 #
-# Bug #39227 Maria: crash with ALTER TABLE PARTITION
-#
-
-create table t1 (s1 int);
-insert into t1 values (1);
---error ER_NO_PARTITION_FOR_GIVEN_VALUE
-alter table t1 partition by list (s1) (partition p1 values in (2));
-drop table t1;
-
-#
 # Bug #39226 Maria: crash with FLUSH TABLES WITH READ LOCK after LOCK TABLES
 
 create table t1 (c1 int);
@@ -1906,3 +1890,10 @@ lock table t1 read, t2 read;
 flush tables with read lock;
 unlock tables;
 drop table t1, t2;
+
+# Set defaults back
+--disable_result_log
+--disable_query_log
+eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum;
+--enable_result_log
+--enable_query_log

=== added file 'mysql-test/suite/maria/t/maria_partition.test'
--- a/mysql-test/suite/maria/t/maria_partition.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/t/maria_partition.test	2008-10-15 12:44:31 +0000
@@ -0,0 +1,35 @@
+# Maria tests which require partitioning enabled
+
+--source include/have_partition.inc
+-- source include/have_maria.inc
+
+let $default_engine=`select @@global.storage_engine`;
+let $default_checksum=`select @@global.maria_page_checksum`;
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_page_checksum=0;
+set global maria_log_file_size=4294967295;
+
+# Initialise
+--disable_warnings
+drop table if exists t1,t2;
+drop view if exists v1;
+--enable_warnings
+SET SQL_WARNINGS=1;
+
+#
+# Bug #39227 Maria: crash with ALTER TABLE PARTITION
+#
+
+create table t1 (s1 int);
+insert into t1 values (1);
+--error ER_NO_PARTITION_FOR_GIVEN_VALUE
+alter table t1 partition by list (s1) (partition p1 values in (2));
+drop table t1;
+
+# Set defaults back
+--disable_result_log
+--disable_query_log
+eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum;
+--enable_result_log
+--enable_query_log

=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc	2008-10-20 09:16:47 +0000
+++ b/storage/maria/ha_maria.cc	2008-10-20 13:03:34 +0000
@@ -1681,6 +1681,7 @@ void ha_maria::start_bulk_insert(ha_rows
   THD *thd= current_thd;
   ulong size= min(thd->variables.read_buff_size,
                   (ulong) (table->s->avg_row_length * rows));
+  MARIA_SHARE *share= file->s;
   DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
                       (ulong) rows, size));
 
@@ -1688,8 +1689,8 @@ void ha_maria::start_bulk_insert(ha_rows
   if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
     maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);
 
-  can_enable_indexes= (maria_is_all_keys_active(file->s->state.key_map,
-                                                file->s->base.keys));
+  can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
+                                                share->base.keys));
   bulk_insert_single_undo= BULK_INSERT_NONE;
 
   if (!(specialflag & SPECIAL_SAFE_MODE))
@@ -1701,8 +1702,17 @@ void ha_maria::start_bulk_insert(ha_rows
        we don't want to update the key statistics based of only a few rows.
        Index file rebuild requires an exclusive lock, so if versioning is on
        don't do it (see how ha_maria::store_lock() tries to predict repair).
+       We can repair index only if we have an exclusive (TL_WRITE) lock. To
+       see if table is empty, we shouldn't rely on the old records' count from
+       our transaction's start (if that old count is 0 but now there are
+       records in the table, we would wrongly destroy them).
+       So we need to look at share->state.state.records.
+       As a safety net for now, we don't remove the test of
+       file->state->records, because there is uncertainty on what will happen
+       during repair if the two states disagree.
     */
-    if (file->state->records == 0 && can_enable_indexes &&
+    if ((file->state->records == 0) &&
+        (share->state.state.records == 0) && can_enable_indexes &&
         (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
         (file->lock.type == TL_WRITE))
     {
@@ -1711,7 +1721,7 @@ void ha_maria::start_bulk_insert(ha_rows
          is more costly (flushes, syncs) than a row write.
       */
       maria_disable_non_unique_index(file, rows);
-      if (file->s->now_transactional)
+      if (share->now_transactional)
       {
         bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
         write_log_record_for_bulk_insert(file);

=== modified file 'storage/maria/ma_bitmap.c'
--- a/storage/maria/ma_bitmap.c	2008-10-14 15:18:14 +0000
+++ b/storage/maria/ma_bitmap.c	2008-10-17 13:37:07 +0000
@@ -2168,8 +2168,8 @@ void _ma_bitmap_flushable(MARIA_HA *info
   }
   DBUG_ASSERT(non_flushable_inc == 1);
   DBUG_ASSERT(info->non_flushable_state == 0);
-  /* It is a read without mutex because only an optimization */
-  if (unlikely(bitmap->flush_all_requested))
+  pthread_mutex_lock(&bitmap->bitmap_lock);
+  while (unlikely(bitmap->flush_all_requested))
   {
     /*
       Some other thread is waiting for the bitmap to become
@@ -2182,21 +2182,13 @@ void _ma_bitmap_flushable(MARIA_HA *info
       our thread), it is not going to increase it more so is not going to come
       here.
     */
-    pthread_mutex_lock(&bitmap->bitmap_lock);
-    while (bitmap->flush_all_requested)
-    {
-      DBUG_PRINT("info", ("waiting for bitmap flusher"));
-      pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
-    }
-    pthread_mutex_unlock(&bitmap->bitmap_lock);
+    DBUG_PRINT("info", ("waiting for bitmap flusher"));
+    pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
   }
-  /*
-    Ok to set without mutex: we didn't touch the bitmap's content yet; when we
-    touch it we will take the mutex.
-  */
   bitmap->non_flushable++;
   info->non_flushable_state= 1;
   DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+  pthread_mutex_unlock(&bitmap->bitmap_lock);
   DBUG_VOID_RETURN;
 }
 
@@ -2308,7 +2300,7 @@ my_bool _ma_bitmap_release_unused(MARIA_
   /* This duplicates ma_bitmap_flushable(-1) except it already has mutex */
   if (info->non_flushable_state)
   {
-    DBUG_ASSERT((int) bitmap->non_flushable >= 0);
+    DBUG_ASSERT(((int) (bitmap->non_flushable)) > 0);
     info->non_flushable_state= 0;
     if (--bitmap->non_flushable == 0)
     {

=== modified file 'storage/maria/ma_close.c'
--- a/storage/maria/ma_close.c	2008-10-20 09:16:47 +0000
+++ b/storage/maria/ma_close.c	2008-10-20 13:03:34 +0000
@@ -107,7 +107,7 @@ int maria_close(register MARIA_HA *info)
         File must be synced as it is going out of the maria_open_list and so
         becoming unknown to future Checkpoints.
       */
-      if (!share->temporary && my_sync(share->kfile.file, MYF(MY_WME)))
+      if (share->now_transactional && my_sync(share->kfile.file, MYF(MY_WME)))
         error= my_errno;
       if (my_close(share->kfile.file, MYF(0)))
         error= my_errno;

=== modified file 'storage/maria/ma_create.c'
--- a/storage/maria/ma_create.c	2008-10-20 09:16:47 +0000
+++ b/storage/maria/ma_create.c	2008-10-20 13:03:34 +0000
@@ -1349,7 +1349,8 @@ int _ma_update_state_lsns_sub(MARIA_SHAR
     int res;
     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
     /* table name is logged only for information */
-    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    share->open_file_name.str;
+    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=
+      (uchar *)(share->open_file_name.str);
     log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
       share->open_file_name.length + 1;
     if ((res= translog_write_record(&lsn, LOGREC_IMPORTED_TABLE,

=== modified file 'storage/maria/ma_loghandler.c'
--- a/storage/maria/ma_loghandler.c	2008-10-20 09:16:47 +0000
+++ b/storage/maria/ma_loghandler.c	2008-10-20 13:03:34 +0000
@@ -7717,7 +7717,8 @@ int translog_assign_id_to_share(MARIA_HA
       is not realpath-ed, etc) which is good: the log can be moved to another
       directory and continue working.
     */
-    log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name.str;
+    log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
+      (uchar *)share->open_file_name.str;
     log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
       share->open_file_name.length + 1;
     /*

=== modified file 'storage/maria/ma_pagecache.c'
--- a/storage/maria/ma_pagecache.c	2008-10-20 09:16:47 +0000
+++ b/storage/maria/ma_pagecache.c	2008-10-20 13:03:34 +0000
@@ -2775,7 +2775,7 @@ void pagecache_unlock(PAGECACHE *pagecac
   inc_counter_for_resize_op(pagecache);
   /* See NOTE for pagecache_unlock about registering requests */
   block= find_block(pagecache, file, pageno, 0, 0,
-                    test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st);
+                    pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
   PCBLOCK_INFO(block);
   DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
   if (first_REDO_LSN_for_page)
@@ -2943,15 +2943,16 @@ void pagecache_unlock_by_link(PAGECACHE 
   DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
   DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
   DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
+  pagecache_pthread_mutex_lock(&pagecache->cache_lock);
   if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
       lock == PAGECACHE_LOCK_READ_UNLOCK)
   {
     if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
       DBUG_ASSERT(0);                         /* should not happend */
+    pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
     DBUG_VOID_RETURN;
   }
 
-  pagecache_pthread_mutex_lock(&pagecache->cache_lock);
   /*
     As soon as we keep lock cache can be used, and we have lock because want
     unlock.
@@ -3080,6 +3081,146 @@ void pagecache_unpin_by_link(PAGECACHE *
   DBUG_VOID_RETURN;
 }
 
+/* description of how to change lock before and after read/write */
+struct rw_lock_change
+{
+  my_bool need_lock_change; /* need changing of lock at the end */
+  enum pagecache_page_lock new_lock; /* lock at the beginning */
+  enum pagecache_page_lock unlock_lock; /* lock at the end */
+};
+
+/* description of how to change pin before and after read/write */
+struct rw_pin_change
+{
+  enum pagecache_page_pin new_pin; /* pin status at the beginning */
+  enum pagecache_page_pin unlock_pin; /* pin status at the end */
+};
+
+/**
+  Depending on the lock which the user wants in pagecache_read(), we
+  need to acquire a first type of lock at start of pagecache_read(), and
+  downgrade it to a second type of lock at end. For example, if user
+  asked for no lock (PAGECACHE_LOCK_LEFT_UNLOCKED) this translates into
+  taking first a read lock PAGECACHE_LOCK_READ (to rightfully block on
+  existing write locks) then read then unlock the lock i.e. change lock
+  to PAGECACHE_LOCK_READ_UNLOCK (the "1" below tells that a change is
+  needed).
+*/ 
+
+static struct rw_lock_change lock_to_read[8]=
+{
+  { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
+    1,
+    PAGECACHE_LOCK_READ, PAGECACHE_LOCK_READ_UNLOCK
+  },
+  { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
+    0,
+    PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_LEFT_READLOCKED
+  },
+  { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
+    0,
+    PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_LEFT_WRITELOCKED
+  },
+  { /*PAGECACHE_LOCK_READ*/
+    1,
+    PAGECACHE_LOCK_READ, PAGECACHE_LOCK_LEFT_READLOCKED
+  },
+  { /*PAGECACHE_LOCK_WRITE*/
+    1,
+    PAGECACHE_LOCK_WRITE, PAGECACHE_LOCK_LEFT_WRITELOCKED
+  },
+  { /*PAGECACHE_LOCK_READ_UNLOCK*/
+    1,
+    PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_READ_UNLOCK
+  },
+  { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
+    1,
+    PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_UNLOCK
+  },
+  { /*PAGECACHE_LOCK_WRITE_TO_READ*/
+    1,
+    PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_TO_READ
+  }
+};
+
+/**
+  Two sets of pin modes (every as for lock upper but for pinning). The
+  difference between sets if whether we are going to provide caller with
+  reference on the block or not
+*/
+
+static struct rw_pin_change lock_to_pin[2][8]=
+{
+  {
+    { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED,
+    },
+    { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE*/
+      PAGECACHE_PIN,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ_UNLOCK*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_UNPIN
+    },
+    { /*PAGECACHE_LOCK_WRITE_TO_READ*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_UNPIN
+    }
+  },
+  {
+    { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED,
+    },
+    { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ*/
+      PAGECACHE_PIN,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE*/
+      PAGECACHE_PIN,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ_UNLOCK*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_UNPIN
+    },
+    { /*PAGECACHE_LOCK_WRITE_TO_READ*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_PIN_LEFT_PINNED,
+    }
+  }
+};
+
 
 /*
   @brief Read a block of data from a cached file into a buffer;
@@ -3096,34 +3237,11 @@ void pagecache_unpin_by_link(PAGECACHE *
   @return address from where the data is placed if successful, 0 - otherwise.
 
   @note Pin will be chosen according to lock parameter (see lock_to_pin)
-*/
-static enum pagecache_page_pin lock_to_pin[2][8]=
-{
-  {
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
-    PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
-    PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
-    PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_TO_READ*/
-  },
-  {
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
-    PAGECACHE_PIN               /*PAGECACHE_LOCK_READ*/,
-    PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
-    PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_WRITE_TO_READ*/
-  }
-};
 
-
-/**
   @note 'buff', if not NULL, must be long-aligned.
+
+  @note  If buff==0 then we provide reference on the page so should keep the
+  page pinned.
 */
 
 uchar *pagecache_read(PAGECACHE *pagecache,
@@ -3136,21 +3254,26 @@ uchar *pagecache_read(PAGECACHE *pagecac
                       PAGECACHE_BLOCK_LINK **page_link)
 {
   my_bool error= 0;
-  enum pagecache_page_pin pin= lock_to_pin[test(buff==0)][lock];
+  enum pagecache_page_pin
+    new_pin= lock_to_pin[buff==0][lock].new_pin,
+    unlock_pin= lock_to_pin[buff==0][lock].unlock_pin;
   PAGECACHE_BLOCK_LINK *fake_link;
   my_bool reg_request;
 #ifndef DBUG_OFF
   char llbuf[22];
   DBUG_ENTER("pagecache_read");
   DBUG_PRINT("enter", ("fd: %u  page: %s  buffer: 0x%lx level: %u  "
-                       "t:%s  %s  %s",
+                       "t:%s  (%d)%s->%s  %s->%s",
                        (uint) file->file, ullstr(pageno, llbuf),
                        (ulong) buff, level,
                        page_cache_page_type_str[type],
-                       page_cache_page_lock_str[lock],
-                       page_cache_page_pin_str[pin]));
-  DBUG_ASSERT(buff != 0 || (buff == 0 && (pin == PAGECACHE_PIN ||
-                                          pin == PAGECACHE_PIN_LEFT_PINNED)));
+                       lock_to_read[lock].need_lock_change,
+                       page_cache_page_lock_str[lock_to_read[lock].new_lock],
+                       page_cache_page_lock_str[lock_to_read[lock].unlock_lock],
+                       page_cache_page_pin_str[new_pin],
+                       page_cache_page_pin_str[unlock_pin]));
+  DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN ||
+                                          unlock_pin == PAGECACHE_PIN_LEFT_PINNED)));
   DBUG_ASSERT(pageno < ((ULL(1)) << 40));
 #endif
 
@@ -3177,10 +3300,10 @@ restart:
     inc_counter_for_resize_op(pagecache);
     pagecache->global_cache_r_requests++;
     /* See NOTE for pagecache_unlock about registering requests. */
-    reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
-                  (pin == PAGECACHE_PIN));
+    reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+                  (new_pin == PAGECACHE_PIN));
     block= find_block(pagecache, file, pageno, level,
-                      test(lock == PAGECACHE_LOCK_WRITE),
+                      lock == PAGECACHE_LOCK_WRITE,
                       reg_request, &page_st);
     DBUG_PRINT("info", ("Block type: %s current type %s",
                         page_cache_page_type_str[block->type],
@@ -3214,7 +3337,8 @@ restart:
         block->type == PAGECACHE_EMPTY_PAGE)
       block->type= type;
 
-    if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
+    if (make_lock_and_pin(pagecache, block, lock_to_read[lock].new_lock,
+                          new_pin, FALSE))
     {
       /*
         We failed to write lock the block, cache is unlocked,
@@ -3262,12 +3386,20 @@ restart:
     }
 
     remove_reader(block);
+    if (lock_to_read[lock].need_lock_change)
+    {
+      if (make_lock_and_pin(pagecache, block,
+                            lock_to_read[lock].unlock_lock,
+                            unlock_pin, FALSE))
+        DBUG_ASSERT(0);
+    }
     /*
       Link the block into the LRU chain if it's the last submitted request
       for the block and block will not be pinned.
       See NOTE for pagecache_unlock about registering requests.
     */
-    if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+    if (unlock_pin == PAGECACHE_PIN_LEFT_UNPINNED ||
+        unlock_pin == PAGECACHE_UNPIN)
       unreg_request(pagecache, block, 1);
     else
       *page_link= block;
@@ -3485,6 +3617,18 @@ void pagecache_add_level_by_link(PAGECAC
   write locked before) or PAGECACHE_LOCK_WRITE (delete will write
   lock page before delete)
 */
+static enum pagecache_page_pin lock_to_pin_one_phase[8]=
+{
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+  PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
+  PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
+  PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+  PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_TO_READ*/
+};
+
 my_bool pagecache_delete(PAGECACHE *pagecache,
                          PAGECACHE_FILE *file,
                          pgcache_page_no_t pageno,
@@ -3492,7 +3636,7 @@ my_bool pagecache_delete(PAGECACHE *page
                          my_bool flush)
 {
   my_bool error= 0;
-  enum pagecache_page_pin pin= lock_to_pin[0][lock];
+  enum pagecache_page_pin pin= lock_to_pin_one_phase[lock];
   DBUG_ENTER("pagecache_delete");
   DBUG_PRINT("enter", ("fd: %u  page: %lu  %s  %s",
                        (uint) file->file, (ulong) pageno,
@@ -3608,15 +3752,7 @@ my_bool pagecache_delete_pages(PAGECACHE
   @retval 1 Error.
 */
 
-/* description of how to change lock before and after write */
-struct write_lock_change
-{
-  int need_lock_change; /* need changing of lock at the end of write */
-  enum pagecache_page_lock new_lock; /* lock at the beginning */
-  enum pagecache_page_lock unlock_lock; /* lock at the end */
-};
-
-static struct write_lock_change write_lock_change_table[]=
+static struct rw_lock_change write_lock_change_table[]=
 {
   {1,
    PAGECACHE_LOCK_WRITE,
@@ -3640,14 +3776,8 @@ static struct write_lock_change write_lo
    PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/
 };
 
-/* description of how to change pin before and after write */
-struct write_pin_change
-{
-  enum pagecache_page_pin new_pin; /* pin status at the beginning */
-  enum pagecache_page_pin unlock_pin; /* pin status at the end */
-};
 
-static struct write_pin_change write_pin_change_table[]=
+static struct rw_pin_change write_pin_change_table[]=
 {
   {PAGECACHE_PIN_LEFT_PINNED,
    PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
@@ -3729,10 +3859,10 @@ restart:
     reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
                   (pin == PAGECACHE_PIN));
     block= find_block(pagecache, file, pageno, level,
-                      test(write_mode != PAGECACHE_WRITE_DONE &&
-                           lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
-                           lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
-                           lock != PAGECACHE_LOCK_WRITE_TO_READ),
+                      (write_mode != PAGECACHE_WRITE_DONE &&
+                       lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
+                       lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
+                       lock != PAGECACHE_LOCK_WRITE_TO_READ),
                       reg_request, &page_st);
     if (!block)
     {

=== modified file 'storage/maria/ma_recovery.c'
--- a/storage/maria/ma_recovery.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_recovery.c	2008-10-20 13:03:34 +0000
@@ -3292,13 +3292,13 @@ my_bool _ma_reenable_logging_for_table(M
       /*
         We are going to change callbacks; if a page is flushed at this moment
         this can cause race conditions, that's one reason to flush pages
-        now. Other reasons: a checkpoint could be running and miss pages. As
+        now. Other reasons: a checkpoint could be running and miss pages; the
+        pages have type PAGECACHE_PLAIN_PAGE which should not remain. As
         there are no REDOs for pages, them, bitmaps and the state also have to
-        be flushed and synced. Leaving non-dirty pages in cache is ok, when
-        they become dirty again they will have their type corrected.
+        be flushed and synced.
       */
       if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
-                                FLUSH_KEEP, FLUSH_KEEP) ||
+                                FLUSH_RELEASE, FLUSH_RELEASE) ||
           _ma_state_info_write(share, 1|4) ||
           _ma_sync_table_files(info))
         DBUG_RETURN(1);

=== modified file 'storage/maria/ma_static.c'
--- a/storage/maria/ma_static.c	2008-10-14 21:23:33 +0000
+++ b/storage/maria/ma_static.c	2008-10-16 08:54:53 +0000
@@ -14,14 +14,6 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
 
-static int always_valid(const char *filename __attribute__((unused)))
-{
-  return 0;
-}
-
-int (*maria_test_invalid_symlink)(const char *filename)= always_valid;
-
-
 /*
   Static variables for MARIA library. All definied here for easy making of
   a shared library
@@ -106,3 +98,10 @@ uint32 maria_readnext_vec[]=
   SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER,
   SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER
 };
+
+static int always_valid(const char *filename __attribute__((unused)))
+{
+  return 0;
+}
+
+int (*maria_test_invalid_symlink)(const char *filename)= always_valid;

=== modified file 'storage/maria/unittest/CMakeLists.txt'
--- a/storage/maria/unittest/CMakeLists.txt	2008-06-13 09:47:17 +0000
+++ b/storage/maria/unittest/CMakeLists.txt	2008-10-16 19:44:12 +0000
@@ -94,3 +94,5 @@ SET_TARGET_PROPERTIES(ma_pagecache_consi
 	PROPERTIES COMPILE_FLAGS "${ma_pagecache_common_cppflags} -DTEST_PAGE_SIZE=65536 -DTEST_WRITERS")
 ADD_EXECUTABLE(ma_pagecache_rwconsist_1k-t ma_pagecache_rwconsist.c)
 SET_TARGET_PROPERTIES(ma_pagecache_rwconsist_1k-t PROPERTIES COMPILE_FLAGS "-DTEST_PAGE_SIZE=1024")
+ADD_EXECUTABLE(ma_pagecache_rwconsist2_1k-t ma_pagecache_rwconsist2.c)
+SET_TARGET_PROPERTIES(ma_pagecache_rwconsist2_1k-t PROPERTIES COMPILE_FLAGS "-DTEST_PAGE_SIZE=1024")

=== modified file 'storage/maria/unittest/Makefile.am'
--- a/storage/maria/unittest/Makefile.am	2008-06-13 09:47:17 +0000
+++ b/storage/maria/unittest/Makefile.am	2008-10-16 19:44:12 +0000
@@ -39,6 +39,7 @@ noinst_PROGRAMS =	ma_control_file-t trnm
 			ma_pagecache_consist_1kWR-t \
 			ma_pagecache_consist_64kWR-t \
                         ma_pagecache_rwconsist_1k-t \
+                        ma_pagecache_rwconsist2_1k-t \
 			ma_test_loghandler-t \
                         ma_test_loghandler_multigroup-t \
 			ma_test_loghandler_multithread-t \
@@ -101,6 +102,8 @@ ma_pagecache_consist_64kWR_t_CPPFLAGS =	
 
 ma_pagecache_rwconsist_1k_t_SOURCES =	ma_pagecache_rwconsist.c
 ma_pagecache_rwconsist_1k_t_CPPFLAGS = -DTEST_PAGE_SIZE=1024
+ma_pagecache_rwconsist2_1k_t_SOURCES =	ma_pagecache_rwconsist2.c
+ma_pagecache_rwconsist2_1k_t_CPPFLAGS = -DTEST_PAGE_SIZE=1024
 
 # the generic lock manager may not be used in the end and lockman1-t crashes,
 # and lockman2-t takes at least quarter an hour,

=== added file 'storage/maria/unittest/ma_pagecache_rwconsist2.c'
--- a/storage/maria/unittest/ma_pagecache_rwconsist2.c	1970-01-01 00:00:00 +0000
+++ b/storage/maria/unittest/ma_pagecache_rwconsist2.c	2008-10-20 13:03:34 +0000
@@ -0,0 +1,360 @@
+/* Copyright (C) 2006-2008 MySQL AB, 2008 Sun Microsystems, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+/**
+  @file this unit tests consistence of  long block writing under write lock
+  and simultaneous reading of this block with read request without read lock
+  requirement.
+*/
+
+/*
+  TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in
+  my_atomic-t.c (see BUG#22320).
+*/
+
+#include <tap.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "test_file.h"
+#include <tap.h>
+
+#define PCACHE_SIZE (TEST_PAGE_SIZE*1024*8)
+
+#ifndef DBUG_OFF
+static const char* default_dbug_option;
+#endif
+
+
+#define SLEEP my_sleep(5)
+
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count= 0;
+static PAGECACHE pagecache;
+
+static uint number_of_readers= 5;
+static uint number_of_writers= 5;
+static uint number_of_read_tests= 20000;
+static uint number_of_write_tests= 1000;
+static uint report_divisor= 50;
+
+/**
+  @brief Dummy pagecache callback.
+*/
+
+static my_bool
+dummy_callback(uchar *page __attribute__((unused)),
+               pgcache_page_no_t page_no __attribute__((unused)),
+               uchar* data_ptr __attribute__((unused)))
+{
+  return 0;
+}
+
+
+/**
+  @brief Dummy pagecache callback.
+*/
+
+static void
+dummy_fail_callback(uchar* data_ptr __attribute__((unused)))
+{
+  return;
+}
+
+
+/**
+  @brief Checks page consistency
+
+  @param buff            pointer to the page content
+  @param task            task ID
+*/
+void check_page(uchar *buff, int task)
+{
+  uint i;
+  DBUG_ENTER("check_page");
+
+  for (i= 1; i < TEST_PAGE_SIZE; i++)
+  {
+    if (buff[0] != buff[i])
+      goto err;
+  }
+  DBUG_VOID_RETURN;
+err:
+  diag("Task %d char #%u '%u' != '%u'", task, i, (uint) buff[0],
+       (uint) buff[i]);
+  DBUG_PRINT("err", ("try to flush"));
+  exit(1);
+}
+
+
+
+void reader(int num)
+{
+  unsigned char buff[TEST_PAGE_SIZE];
+  uint i;
+
+  for (i= 0; i < number_of_read_tests; i++)
+  {
+    if (i % report_divisor == 0)
+      diag("Reader %d - %u", num, i);
+    pagecache_read(&pagecache, &file1, 0, 3, buff,
+                   PAGECACHE_PLAIN_PAGE,
+                   PAGECACHE_LOCK_LEFT_UNLOCKED,
+                   NULL);
+    check_page(buff, num);
+  }
+}
+
+
+void writer(int num)
+{
+  uint i;
+  uchar *buff;
+  PAGECACHE_BLOCK_LINK *link;
+
+  for (i= 0; i < number_of_write_tests; i++)
+  {
+    uchar c= (uchar) rand() % 256;
+
+    if (i % report_divisor == 0)
+      diag("Writer %d - %u", num, i);
+    buff= pagecache_read(&pagecache, &file1, 0, 3, NULL,
+                         PAGECACHE_PLAIN_PAGE,
+                         PAGECACHE_LOCK_WRITE,
+                         &link);
+
+    check_page(buff, num);
+    bfill(buff, TEST_PAGE_SIZE / 2, c);
+    SLEEP;
+    bfill(buff + TEST_PAGE_SIZE/2, TEST_PAGE_SIZE / 2, c);
+    check_page(buff, num);
+    pagecache_unlock_by_link(&pagecache, link,
+                             PAGECACHE_LOCK_WRITE_UNLOCK,
+                             PAGECACHE_UNPIN, 0, 0, 1, FALSE);
+    SLEEP;
+  }
+}
+
+
+static void *test_thread_reader(void *arg)
+{
+  int param=*((int*) arg);
+  my_thread_init();
+  {
+    DBUG_ENTER("test_reader");
+
+    DBUG_PRINT("enter", ("param: %d", param));
+
+    reader(param);
+
+    DBUG_PRINT("info", ("Thread %s ended", my_thread_name()));
+    pthread_mutex_lock(&LOCK_thread_count);
+    ok(1, "reader%d: done", param);
+    thread_count--;
+    pthread_cond_signal(&COND_thread_count); /* Tell main we are ready */
+    pthread_mutex_unlock(&LOCK_thread_count);
+    free((uchar*) arg);
+    my_thread_end();
+  }
+  return 0;
+}
+
+
+static void *test_thread_writer(void *arg)
+{
+  int param=*((int*) arg);
+  my_thread_init();
+  {
+    DBUG_ENTER("test_writer");
+
+    writer(param);
+
+    DBUG_PRINT("info", ("Thread %s ended", my_thread_name()));
+    pthread_mutex_lock(&LOCK_thread_count);
+    ok(1, "writer%d: done", param);
+    thread_count--;
+    pthread_cond_signal(&COND_thread_count); /* Tell main we are ready */
+    pthread_mutex_unlock(&LOCK_thread_count);
+    free((uchar*) arg);
+    my_thread_end();
+  }
+  return 0;
+}
+
+
+int main(int argc __attribute__((unused)),
+         char **argv __attribute__((unused)))
+{
+  pthread_t tid;
+  pthread_attr_t thr_attr;
+  int *param, error, pagen;
+
+  MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+  default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace";
+#else
+  default_dbug_option= "d:t:i:O,/tmp/test_pagecache_consist.trace";
+#endif
+  if (argc > 1)
+  {
+    DBUG_SET(default_dbug_option);
+    DBUG_SET_INITIAL(default_dbug_option);
+  }
+#endif
+
+  {
+  DBUG_ENTER("main");
+  DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+  plan(number_of_writers + number_of_readers);
+  SKIP_BIG_TESTS(number_of_writers + number_of_readers)
+  {
+
+  if ((file1.file= my_open(file1_name,
+                           O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+  {
+    diag( "Got error during file1 creation from open() (errno: %d)\n",
+	    errno);
+    exit(1);
+  }
+  pagecache_file_init(file1, &dummy_callback, &dummy_callback,
+                      &dummy_fail_callback, &dummy_callback, NULL);
+  DBUG_PRINT("info", ("file1: %d", file1.file));
+  if (my_chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO, MYF(MY_WME)))
+    exit(1);
+  my_pwrite(file1.file, (const uchar*) "test file", 9, 0, MYF(0));
+
+  if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+  {
+    diag( "COND_thread_count: %d from pthread_cond_init (errno: %d)\n",
+	    error, errno);
+    exit(1);
+  }
+  if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+  {
+    diag( "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n",
+	    error, errno);
+    exit(1);
+  }
+
+  if ((error= pthread_attr_init(&thr_attr)))
+  {
+    diag("Got error: %d from pthread_attr_init (errno: %d)\n",
+	    error,errno);
+    exit(1);
+  }
+  if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+  {
+    diag(
+	    "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+	    error,errno);
+    exit(1);
+  }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+  (void)(thr_setconcurrency(2));
+#endif
+
+  if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+                             TEST_PAGE_SIZE, 0)) == 0)
+  {
+    diag("Got error: init_pagecache() (errno: %d)\n",
+            errno);
+    exit(1);
+  }
+  DBUG_PRINT("info", ("Page cache %d pages", pagen));
+  {
+    unsigned char *buffr= malloc(TEST_PAGE_SIZE);
+    memset(buffr, '\0', TEST_PAGE_SIZE);
+    pagecache_write(&pagecache, &file1, 0, 3, buffr,
+                    PAGECACHE_PLAIN_PAGE,
+                    PAGECACHE_LOCK_LEFT_UNLOCKED,
+                    PAGECACHE_PIN_LEFT_UNPINNED,
+                    PAGECACHE_WRITE_DELAY,
+                    0, LSN_IMPOSSIBLE);
+  }
+  pthread_mutex_lock(&LOCK_thread_count);
+
+  while (number_of_readers != 0 || number_of_writers != 0)
+  {
+    if (number_of_readers != 0)
+    {
+      param=(int*) malloc(sizeof(int));
+      *param= number_of_readers + number_of_writers;
+      if ((error= pthread_create(&tid, &thr_attr, test_thread_reader,
+                                 (void*) param)))
+      {
+        diag("Got error: %d from pthread_create (errno: %d)\n",
+                error,errno);
+        exit(1);
+      }
+      thread_count++;
+      number_of_readers--;
+    }
+    if (number_of_writers != 0)
+    {
+      param=(int*) malloc(sizeof(int));
+      *param= number_of_writers + number_of_readers;
+      if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+                                 (void*) param)))
+      {
+        diag("Got error: %d from pthread_create (errno: %d)\n",
+                error,errno);
+        exit(1);
+      }
+      thread_count++;
+      number_of_writers--;
+    }
+  }
+  DBUG_PRINT("info", ("Thread started"));
+  pthread_mutex_unlock(&LOCK_thread_count);
+
+  pthread_attr_destroy(&thr_attr);
+
+  /* wait finishing */
+  pthread_mutex_lock(&LOCK_thread_count);
+  while (thread_count)
+  {
+    if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count)))
+      diag("COND_thread_count: %d from pthread_cond_wait\n", error);
+  }
+  pthread_mutex_unlock(&LOCK_thread_count);
+  DBUG_PRINT("info", ("thread ended"));
+
+  end_pagecache(&pagecache, 1);
+  DBUG_PRINT("info", ("Page cache ended"));
+
+  if (my_close(file1.file, MYF(0)) != 0)
+  {
+    diag( "Got error during file1 closing from close() (errno: %d)\n",
+	    errno);
+    exit(1);
+  }
+  my_delete(file1_name, MYF(0));
+
+  DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
+  DBUG_PRINT("info", ("Program end"));
+  } /* SKIP_BIG_TESTS */
+  my_end(0);
+
+  return exit_status();
+  }
+}
+
+#include "../ma_check_standalone.h"

Thread
bzr commit into mysql-6.0 branch (guilhem:2865) Guilhem Bichot20 Oct