List:Commits« Previous MessageNext Message »
From:Guilhem Bichot Date:October 20 2008 9:14pm
Subject:bzr commit into mysql-6.0 branch (guilhem:2742)
View as plain text  
#At file:///home/mysql_src/bzrrepos/mysql-6.0-maria2/

 2742 Guilhem Bichot	2008-10-20 [merge]
      Merge of 6.0-maria into 6.0-main (for push into 6.0-maria)
removed:
  unittest/mysys/CMakeLists.txt
  win/build-vs9.bat.moved
  win/build-vs9_x64.bat.moved
added:
  include/waiting_threads.h
  mysql-test/suite/maria/
  mysql-test/suite/maria/r/
  mysql-test/suite/maria/r/maria_partition.result
  mysql-test/suite/maria/t/
  mysql-test/suite/maria/t/maria_partition.test
  mysys/waiting_threads.c
  storage/maria/unittest/ma_pagecache_rwconsist2.c
  unittest/mysys/CMakeLists.txt
  unittest/mysys/lf-t.c
  unittest/mysys/thr_template.c
  unittest/mysys/waiting_threads-t.c
renamed:
  mysql-test/r/maria-autozerofill.result =>
mysql-test/suite/maria/r/maria-autozerofill.result
  mysql-test/r/maria-big.result => mysql-test/suite/maria/r/maria-big.result
  mysql-test/r/maria-big2.result => mysql-test/suite/maria/r/maria-big2.result
  mysql-test/r/maria-connect.result => mysql-test/suite/maria/r/maria-connect.result
  mysql-test/r/maria-gis-rtree-dynamic.result =>
mysql-test/suite/maria/r/maria-gis-rtree-dynamic.result
  mysql-test/r/maria-gis-rtree-trans.result =>
mysql-test/suite/maria/r/maria-gis-rtree-trans.result
  mysql-test/r/maria-gis-rtree.result =>
mysql-test/suite/maria/r/maria-gis-rtree.result
  mysql-test/r/maria-mvcc.result => mysql-test/suite/maria/r/maria-mvcc.result
  mysql-test/r/maria-no-logging.result =>
mysql-test/suite/maria/r/maria-no-logging.result
  mysql-test/r/maria-page-checksum.result =>
mysql-test/suite/maria/r/maria-page-checksum.result
  mysql-test/r/maria-preload.result => mysql-test/suite/maria/r/maria-preload.result
  mysql-test/r/maria-purge.result => mysql-test/suite/maria/r/maria-purge.result
  mysql-test/r/maria-recover.result => mysql-test/suite/maria/r/maria-recover.result
  mysql-test/r/maria-recovery-big.result =>
mysql-test/suite/maria/r/maria-recovery-big.result
  mysql-test/r/maria-recovery-bitmap.result =>
mysql-test/suite/maria/r/maria-recovery-bitmap.result
  mysql-test/r/maria-recovery-rtree-ft.result =>
mysql-test/suite/maria/r/maria-recovery-rtree-ft.result
  mysql-test/r/maria-recovery.result => mysql-test/suite/maria/r/maria-recovery.result
  mysql-test/r/maria-recovery2.result =>
mysql-test/suite/maria/r/maria-recovery2.result
  mysql-test/r/maria.result => mysql-test/suite/maria/r/maria.result
  mysql-test/r/maria2.result => mysql-test/suite/maria/r/maria2.result
  mysql-test/r/maria3.result => mysql-test/suite/maria/r/maria3.result
  mysql-test/r/maria_notembedded.result =>
mysql-test/suite/maria/r/maria_notembedded.result
  mysql-test/r/ps_maria.result => mysql-test/suite/maria/r/ps_maria.result
  mysql-test/t/maria-autozerofill.test =>
mysql-test/suite/maria/t/maria-autozerofill.test
  mysql-test/t/maria-big.test => mysql-test/suite/maria/t/maria-big.test
  mysql-test/t/maria-big2.test => mysql-test/suite/maria/t/maria-big2.test
  mysql-test/t/maria-connect.test => mysql-test/suite/maria/t/maria-connect.test
  mysql-test/t/maria-gis-rtree-dynamic.test =>
mysql-test/suite/maria/t/maria-gis-rtree-dynamic.test
  mysql-test/t/maria-gis-rtree-trans.test =>
mysql-test/suite/maria/t/maria-gis-rtree-trans.test
  mysql-test/t/maria-gis-rtree.test => mysql-test/suite/maria/t/maria-gis-rtree.test
  mysql-test/t/maria-mvcc.test => mysql-test/suite/maria/t/maria-mvcc.test
  mysql-test/t/maria-no-logging.test => mysql-test/suite/maria/t/maria-no-logging.test
  mysql-test/t/maria-page-checksum.test =>
mysql-test/suite/maria/t/maria-page-checksum.test
  mysql-test/t/maria-preload.test => mysql-test/suite/maria/t/maria-preload.test
  mysql-test/t/maria-purge.test => mysql-test/suite/maria/t/maria-purge.test
  mysql-test/t/maria-recover-master.opt =>
mysql-test/suite/maria/t/maria-recover-master.opt
  mysql-test/t/maria-recover.test => mysql-test/suite/maria/t/maria-recover.test
  mysql-test/t/maria-recovery-big-master.opt =>
mysql-test/suite/maria/t/maria-recovery-big-master.opt
  mysql-test/t/maria-recovery-big.test =>
mysql-test/suite/maria/t/maria-recovery-big.test
  mysql-test/t/maria-recovery-bitmap-master.opt =>
mysql-test/suite/maria/t/maria-recovery-bitmap-master.opt
  mysql-test/t/maria-recovery-bitmap.test =>
mysql-test/suite/maria/t/maria-recovery-bitmap.test
  mysql-test/t/maria-recovery-master.opt =>
mysql-test/suite/maria/t/maria-recovery-master.opt
  mysql-test/t/maria-recovery-rtree-ft-master.opt =>
mysql-test/suite/maria/t/maria-recovery-rtree-ft-master.opt
  mysql-test/t/maria-recovery-rtree-ft.test =>
mysql-test/suite/maria/t/maria-recovery-rtree-ft.test
  mysql-test/t/maria-recovery.test => mysql-test/suite/maria/t/maria-recovery.test
  mysql-test/t/maria-recovery2-master.opt =>
mysql-test/suite/maria/t/maria-recovery2-master.opt
  mysql-test/t/maria-recovery2.test => mysql-test/suite/maria/t/maria-recovery2.test
  mysql-test/t/maria.test => mysql-test/suite/maria/t/maria.test
  mysql-test/t/maria2.test => mysql-test/suite/maria/t/maria2.test
  mysql-test/t/maria3.test => mysql-test/suite/maria/t/maria3.test
  mysql-test/t/maria_notembedded.test =>
mysql-test/suite/maria/t/maria_notembedded.test
  mysql-test/t/ps_maria.test => mysql-test/suite/maria/t/ps_maria.test
modified:
  .bzrignore
  client/mysqltest.c
  configure.in
  include/Makefile.am
  include/atomic/generic-msvc.h
  include/lf.h
  include/maria.h
  include/my_alloc.h
  include/my_base.h
  include/my_global.h
  include/my_handler.h
  include/my_pthread.h
  include/my_sys.h
  include/myisam.h
  include/mysql.h.pp
  mysql-test/lib/mtr_cases.pl
  mysql-test/lib/mtr_report.pl
  mysql-test/mysql-test-run.pl
  mysql-test/r/subselect_debug.result
  mysql-test/r/sync_frm_basic.result
  mysql-test/suite/funcs_1/datadict/processlist_priv.inc
  mysql-test/suite/funcs_1/datadict/processlist_val.inc
  mysql-test/suite/funcs_1/r/processlist_val_no_prot.result
  mysql-test/t/merge.test
  mysql-test/t/subselect_debug.test
  mysql-test/t/sync_frm_basic.test
  mysys/CMakeLists.txt*
  mysys/Makefile.am
  mysys/array.c
  mysys/lf_alloc-pin.c
  mysys/lf_hash.c
  mysys/my_static.c
  mysys/my_thr_init.c
  sql-common/client.c
  sql/ha_partition.cc
  sql/mysql_priv.h
  sql/mysqld.cc
  sql/set_var.cc
  sql/sql_base.cc
  sql/sql_class.cc
  sql/sql_class.h
  sql/sql_insert.cc
  sql/sql_parse.cc
  sql/sql_prepare.cc
  sql/sql_table.cc
  sql/sql_view.cc
  storage/falcon/Makefile.am
  storage/maria/ha_maria.cc
  storage/maria/ma_bitmap.c
  storage/maria/ma_blockrec.c
  storage/maria/ma_blockrec.h
  storage/maria/ma_check.c
  storage/maria/ma_checkpoint.c
  storage/maria/ma_close.c
  storage/maria/ma_commit.c
  storage/maria/ma_create.c
  storage/maria/ma_delete.c
  storage/maria/ma_delete_table.c
  storage/maria/ma_extra.c
  storage/maria/ma_init.c
  storage/maria/ma_key_recover.c
  storage/maria/ma_key_recover.h
  storage/maria/ma_loghandler.c
  storage/maria/ma_open.c
  storage/maria/ma_page.c
  storage/maria/ma_pagecache.c
  storage/maria/ma_pagecache.h
  storage/maria/ma_preload.c
  storage/maria/ma_recovery.c
  storage/maria/ma_rename.c
  storage/maria/ma_search.c
  storage/maria/ma_state.c
  storage/maria/ma_static.c
  storage/maria/ma_write.c
  storage/maria/maria_chk.c
  storage/maria/maria_def.h
  storage/maria/trnman.c
  storage/maria/trnman.h
  storage/maria/trnman_public.h
  storage/maria/unittest/CMakeLists.txt
  storage/maria/unittest/Makefile.am
  storage/maria/unittest/ma_pagecache_rwconsist.c
  storage/maria/unittest/ma_pagecache_single.c
  storage/maria/unittest/ma_test_loghandler-t.c
  storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
  storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
  storage/maria/unittest/ma_test_loghandler_multigroup-t.c
  storage/maria/unittest/ma_test_loghandler_multithread-t.c
  storage/maria/unittest/ma_test_loghandler_noflush-t.c
  storage/maria/unittest/ma_test_loghandler_nologs-t.c
  storage/maria/unittest/ma_test_loghandler_pagecache-t.c
  storage/maria/unittest/ma_test_loghandler_purge-t.c
  storage/maria/unittest/trnman-t.c
  storage/myisam/ha_myisam.cc
  storage/myisam/mi_check.c
  storage/myisam/mi_examine_log.c
  storage/myisam/mi_open.c
  storage/myisam/mi_page.c
  storage/myisam/mi_search.c
  storage/myisam/mi_test_all.sh
  storage/myisam/rt_index.c
  unittest/mysys/Makefile.am
  unittest/mysys/my_atomic-t.c
  mysql-test/suite/maria/r/maria.result
  mysql-test/suite/maria/r/maria3.result
  mysql-test/suite/maria/t/maria.test
  mysql-test/suite/maria/t/maria3.test

=== modified file '.bzrignore'
--- a/.bzrignore	2008-10-01 12:02:28 +0000
+++ b/.bzrignore	2008-10-20 19:13:22 +0000
@@ -1898,3 +1898,4 @@ mysql-test/tps.log
 libmysqld/event_parse_data.cc
 client/transaction.h
 libmysqld/transaction.cc
+libmysqld/rpl_handler.cc

=== modified file 'client/mysqltest.c'
--- a/client/mysqltest.c	2008-08-05 23:26:18 +0000
+++ b/client/mysqltest.c	2008-10-20 09:16:47 +0000
@@ -2784,7 +2784,7 @@ void do_mkdir(struct st_command *command
   int error;
   static DYNAMIC_STRING ds_dirname;
   const struct command_arg mkdir_args[] = {
-    "dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to create"
+    {"dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to create"}
   };
   DBUG_ENTER("do_mkdir");
 
@@ -2814,7 +2814,7 @@ void do_rmdir(struct st_command *command
   int error;
   static DYNAMIC_STRING ds_dirname;
   const struct command_arg rmdir_args[] = {
-    "dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to remove"
+    { "dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to remove" }
   };
   DBUG_ENTER("do_rmdir");
 

=== modified file 'configure.in'
--- a/configure.in	2008-10-09 12:50:21 +0000
+++ b/configure.in	2008-10-20 19:13:22 +0000
@@ -10,6 +10,7 @@ AC_CANONICAL_SYSTEM
 # remember to also update version.c in ndb.
 # When changing major version number please also check switch statement
 # in mysqlbinlog::check_master_version().
+
 AM_INIT_AUTOMAKE(mysql, 6.0.8-alpha)
 AM_CONFIG_HEADER([include/config.h:config.h.in])
 
@@ -257,8 +258,6 @@ test -z "$INSTALL_SCRIPT" && INSTALL_SCR
 
 # Not critical since the generated file is distributed
 AC_CHECK_PROGS(YACC, ['bison -y -p MYSQL'])
-AC_CHECK_PROG(PDFMANUAL, pdftex, manual.pdf)
-AC_CHECK_PROG(DVIS,      tex,    manual.dvi)
 
 #check the return type of sprintf
 AC_MSG_CHECKING("return type of sprintf")
@@ -1759,44 +1758,43 @@ fi
 AC_ARG_WITH([atomic-ops],
 	    AC_HELP_STRING([--with-atomic-ops=rwlocks|smp|up],
 	    [Implement atomic operations using pthread rwlocks or atomic CPU
-             instructions for multi-processor (default) or uniprocessor
-             configuration]), , [with_atomic_ops=smp])
+             instructions for multi-processor or uniprocessor
+             configuration. By default gcc built-in sync functions are used,
+             if available and 'smp' configuration otherwise.]))
 case "$with_atomic_ops" in
   "up") AC_DEFINE([MY_ATOMIC_MODE_DUMMY], [1],
                   [Assume single-CPU mode, no concurrency]) ;;
   "rwlocks") AC_DEFINE([MY_ATOMIC_MODE_RWLOCKS], [1],
                   [Use pthread rwlocks for atomic ops]) ;;
   "smp") ;;
+  "")
+    AC_CACHE_CHECK([whether the compiler provides atomic builtins],
+                   [mysql_cv_gcc_atomic_builtins], [AC_TRY_RUN([
+      int main()
+      {
+        int foo= -10; int bar= 10;
+        if (!__sync_fetch_and_add(&foo, bar) || foo)
+          return -1;
+        bar= __sync_lock_test_and_set(&foo, bar);
+        if (bar || foo != 10)
+          return -1;
+        bar= __sync_val_compare_and_swap(&bar, foo, 15);
+        if (bar)
+          return -1;
+        return 0;
+      }
+    ], [mysql_cv_gcc_atomic_builtins=yes_but_disabled],
+       [mysql_cv_gcc_atomic_builtins=no],
+       [mysql_cv_gcc_atomic_builtins=no])])
+
+    if test "x$mysql_cv_gcc_atomic_builtins" = xyes; then
+      AC_DEFINE(HAVE_GCC_ATOMIC_BUILTINS, 1,
+                [Define to 1 if compiler provides atomic builtins.])
+    fi
+   ;;
    *) AC_MSG_ERROR(["$with_atomic_ops" is not a valid value for --with-atomic-ops]) ;;
 esac
 
-AC_CACHE_CHECK([whether the compiler provides atomic builtins],
-               [mysql_cv_gcc_atomic_builtins], [AC_TRY_RUN([
-  int main()
-  {
-    int foo= -10; int bar= 10;
-    if (!__sync_fetch_and_add(&foo, bar) || foo)
-      return -1;
-    bar= __sync_lock_test_and_set(&foo, bar);
-    if (bar || foo != 10)
-      return -1;
-    bar= __sync_val_compare_and_swap(&bar, foo, 15);
-    if (bar)
-      return -1;
-    return 0;
-  }
-], [mysql_cv_gcc_atomic_builtins=yes],
-   [mysql_cv_gcc_atomic_builtins=no],
-   [mysql_cv_gcc_atomic_builtins=no])])
-
-if test "x$mysql_cv_gcc_atomic_builtins" = disabled_xyes; then
-  AC_DEFINE(HAVE_GCC_ATOMIC_BUILTINS, 1,
-            [Define to 1 if compiler provides atomic builtins.])
-fi
-
-# Check if we have the atomic_* functions on Solaris
-AC_CHECK_FUNC(atomic_cas_32, AC_DEFINE([HAVE_SOLARIS_ATOMIC], [1], [Define to 1 if
Solaris support atomic functions.]))
-
 # Force static compilation to avoid linking problems/get more speed
 AC_ARG_WITH(mysqld-ldflags,
     [  --with-mysqld-ldflags   Extra linking arguments for mysqld],
@@ -2746,14 +2744,12 @@ if test "$with_server" = "yes" -o "$THRE
 then
   AC_DEFINE([THREAD], [1],
             [Define if you want to have threaded code. This may be undef on client code])
-  # Avoid _PROGRAMS names
-  THREAD_LOBJECTS="thr_alarm.o thr_lock.o thr_mutex.o thr_rwlock.o my_pthread.o
my_thr_init.o mf_keycache.o"
-  AC_SUBST(THREAD_LOBJECTS)
   server_scripts="mysqld_safe mysql_install_db"
   sql_server_dirs="strings mysys dbug extra regex"
 
   sql_server="vio sql"
 fi
+AM_CONDITIONAL(THREAD, test "$with_server" = "yes" -o "$THREAD_SAFE_CLIENT" != "no")
 
 # "innochecksum" is not in the "innobase/" subdirectory, but should be switched
 AM_CONDITIONAL([BUILD_INNODB_TOOLS], [test X"$with_plugin_innobase" = Xyes])

=== modified file 'include/Makefile.am'
--- a/include/Makefile.am	2008-09-12 08:58:52 +0000
+++ b/include/Makefile.am	2008-10-20 19:13:22 +0000
@@ -37,9 +37,8 @@ noinst_HEADERS =	config-win.h config-net
 			mysql_version.h.in my_handler.h my_time.h \
 			my_vle.h my_user.h my_atomic.h atomic/nolock.h \
 			atomic/rwlock.h atomic/x86-gcc.h atomic/generic-msvc.h \
-			atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h \
-			wqueue.h
-
+                        atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h \
+                        wqueue.h waiting_threads.h
 EXTRA_DIST =        mysql.h.pp mysql/plugin.h.pp
 
 # Remove built files and the symlinked directories

=== modified file 'include/atomic/generic-msvc.h'
--- a/include/atomic/generic-msvc.h	2008-08-29 17:38:04 +0000
+++ b/include/atomic/generic-msvc.h	2008-10-20 09:16:47 +0000
@@ -56,11 +56,11 @@ C_MODE_END
 #endif /*_M_IX86*/
 
 #define MY_ATOMIC_MODE "msvc-intrinsics"
-#define IL_EXCHG_ADD32(A,B)    InterlockedExchangeAdd((LONG volatile*)A,B)
-#define IL_COMP_EXCHG32(A,B,C) InterlockedCompareExchange((LONG volatile*)A,B,C)
-#define IL_COMP_EXCHGptr       InterlockedCompareExchangePointer
-#define IL_EXCHG32(A,B)        InterlockedExchange((LONG volatile*)A,B)
-#define IL_EXCHGptr            InterlockedExchangePointer
+#define IL_EXCHG_ADD32(X,Y) InterlockedExchangeAdd((volatile LONG *)(X),(Y))
+#define IL_COMP_EXCHG32(X,Y,Z) InterlockedCompareExchange((volatile LONG *)(X),(Y),(Z))
+#define IL_COMP_EXCHGptr InterlockedCompareExchangePointer
+#define IL_EXCHG32       InterlockedExchange
+#define IL_EXCHGptr      InterlockedExchangePointer
 #define make_atomic_add_body(S) \
   v= IL_EXCHG_ADD ## S (a, v)
 #define make_atomic_cas_body(S)                                 \

=== modified file 'include/lf.h'
--- a/include/lf.h	2007-12-18 22:22:55 +0000
+++ b/include/lf.h	2008-07-29 14:10:24 +0000
@@ -110,7 +110,7 @@ typedef struct {
 typedef struct {
   void * volatile pin[LF_PINBOX_PINS];
   LF_PINBOX *pinbox;
-  void  *stack_ends_here;
+  void  **stack_ends_here;
   void  *purgatory;
   uint32 purgatory_count;
   uint32 volatile link;
@@ -166,8 +166,8 @@ void lf_pinbox_init(LF_PINBOX *pinbox, u
 void lf_pinbox_destroy(LF_PINBOX *pinbox);
 
 lock_wrap(lf_pinbox_get_pins, LF_PINS *,
-          (LF_PINBOX *pinbox, void *stack_end),
-          (pinbox, stack_end),
+          (LF_PINBOX *pinbox),
+          (pinbox),
           &pinbox->pinarray.lock)
 lock_wrap_void(lf_pinbox_put_pins,
                (LF_PINS *pins),
@@ -182,15 +182,13 @@ lock_wrap_void(lf_pinbox_free,
   memory allocator, lf_alloc-pin.c
 */
 
-struct st_lf_alloc_node {
-  struct st_lf_alloc_node *next;
-};
-
 typedef struct st_lf_allocator {
   LF_PINBOX pinbox;
-  struct st_lf_alloc_node * volatile top;
+  uchar * volatile top;
   uint element_size;
   uint32 volatile mallocs;
+  void (*constructor)(uchar *);
+  void (*destructor)(uchar *);
 } LF_ALLOCATOR;
 
 void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
@@ -202,8 +200,8 @@ uint lf_alloc_pool_count(LF_ALLOCATOR *a
 */
 #define _lf_alloc_free(PINS, PTR)     _lf_pinbox_free((PINS), (PTR))
 #define lf_alloc_free(PINS, PTR)       lf_pinbox_free((PINS), (PTR))
-#define _lf_alloc_get_pins(A, ST)     _lf_pinbox_get_pins(&(A)->pinbox, (ST))
-#define lf_alloc_get_pins(A, ST)       lf_pinbox_get_pins(&(A)->pinbox, (ST))
+#define _lf_alloc_get_pins(A)         _lf_pinbox_get_pins(&(A)->pinbox)
+#define lf_alloc_get_pins(A)           lf_pinbox_get_pins(&(A)->pinbox)
 #define _lf_alloc_put_pins(PINS)      _lf_pinbox_put_pins(PINS)
 #define lf_alloc_put_pins(PINS)        lf_pinbox_put_pins(PINS)
 #define lf_alloc_direct_free(ALLOC, ADDR) my_free((uchar*)(ADDR), MYF(0))
@@ -220,13 +218,17 @@ lock_wrap(lf_alloc_new, void *,
 
 #define LF_HASH_UNIQUE 1
 
+/* lf_hash overhead per element (that is, sizeof(LF_SLIST) */
+#define LF_HASH_OVERHEAD (sizeof(int*)*4)
+
 typedef struct {
   LF_DYNARRAY array;                    /* hash itself */
   LF_ALLOCATOR alloc;                   /* allocator for elements */
   hash_get_key get_key;                 /* see HASH */
   CHARSET_INFO *charset;                /* see HASH */
   uint key_offset, key_length;          /* see HASH */
-  uint element_size, flags;             /* LF_HASH_UNIQUE, etc */
+  uint element_size;                    /* size of memcpy'ed area on insert */
+  uint flags;                           /* LF_HASH_UNIQUE, etc */
   int32 volatile size;                  /* size of array */
   int32 volatile count;                 /* number of elements in the hash */
 } LF_HASH;
@@ -242,8 +244,8 @@ int lf_hash_delete(LF_HASH *hash, LF_PIN
   shortcut macros to access underlying pinbox functions from an LF_HASH
   see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
 */
-#define _lf_hash_get_pins(HASH, ST) _lf_alloc_get_pins(&(HASH)->alloc, (ST))
-#define lf_hash_get_pins(HASH, ST)   lf_alloc_get_pins(&(HASH)->alloc, (ST))
+#define _lf_hash_get_pins(HASH)     _lf_alloc_get_pins(&(HASH)->alloc)
+#define lf_hash_get_pins(HASH)       lf_alloc_get_pins(&(HASH)->alloc)
 #define _lf_hash_put_pins(PINS)     _lf_pinbox_put_pins(PINS)
 #define lf_hash_put_pins(PINS)       lf_pinbox_put_pins(PINS)
 #define lf_hash_search_unpin(PINS)   lf_unpin((PINS), 2)

=== modified file 'include/maria.h'
--- a/include/maria.h	2008-06-26 05:18:28 +0000
+++ b/include/maria.h	2008-10-14 21:23:33 +0000
@@ -43,7 +43,6 @@ extern "C" {
 #define MARIA_MAX_KEY    MAX_INDEXES            /* Max allowed keys */
 #endif
 
-#define MARIA_MAX_MSG_BUF      1024 /* used in CHECK TABLE, REPAIR TABLE */
 #define MARIA_NAME_IEXT	".MAI"
 #define MARIA_NAME_DEXT	".MAD"
 /* Max extra space to use when sorting keys */
@@ -273,6 +272,12 @@ extern my_off_t maria_max_temp_length;
 extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size;
 extern PAGECACHE maria_pagecache_var, *maria_pagecache;
 extern MY_TMPDIR *maria_tmpdir;
+/*
+  This is used to check if a symlink points into the mysql data home,
+  which is normally forbidden as it can be used to get access to
+  not privileged data
+*/
+extern int (*maria_test_invalid_symlink)(const char *filename);
 
 	/* Prototypes for maria-functions */
 

=== modified file 'include/my_alloc.h'
--- a/include/my_alloc.h	2008-01-12 22:30:38 +0000
+++ b/include/my_alloc.h	2008-10-10 15:28:41 +0000
@@ -39,12 +39,12 @@ typedef struct st_mem_root
   /* if block have less memory it will be put in 'used' list */
   size_t min_malloc;
   size_t block_size;               /* initial block size */
-  unsigned long block_num;         /* allocated blocks counter */
+  unsigned int block_num;          /* allocated blocks counter */
   /* 
      first free block in queue test counter (if it exceed 
      MAX_BLOCK_USAGE_BEFORE_DROP block will be dropped in 'used' list)
   */
-  unsigned long first_block_usage;
+  unsigned int first_block_usage;
 
   void (*error_handler)(void);
 } MEM_ROOT;

=== modified file 'include/my_base.h'
--- a/include/my_base.h	2008-07-11 13:36:54 +0000
+++ b/include/my_base.h	2008-10-20 09:16:47 +0000
@@ -206,7 +206,9 @@ enum ha_extra_function {
   HA_EXTRA_IS_ATTACHED_CHILDREN,
   HA_EXTRA_DETACH_CHILDREN,
   HA_EXTRA_ORDERBY_LIMIT,
-  HA_EXTRA_NO_ORDERBY_LIMIT
+  HA_EXTRA_NO_ORDERBY_LIMIT,
+  /* Inform handler we will force a close as part of flush */
+  HA_EXTRA_PREPARE_FOR_FORCED_CLOSE
 };
 
 /* Compatible option, to be deleted in 6.0 */

=== modified file 'include/my_global.h'
--- a/include/my_global.h	2008-09-09 19:02:38 +0000
+++ b/include/my_global.h	2008-10-20 19:13:22 +0000
@@ -1577,7 +1577,7 @@ inline void  operator delete[](void*, vo
 #if !defined(max)
 #define max(a, b)	((a) > (b) ? (a) : (b))
 #define min(a, b)	((a) < (b) ? (a) : (b))
-#endif  
+#endif
 /*
   Only Linux is known to need an explicit sync of the directory to make sure a
   file creation/deletion/renaming in(from,to) this directory durable.
@@ -1591,7 +1591,7 @@ inline void  operator delete[](void*, vo
 #endif
 
 /* Provide __func__ macro definition for platforms that miss it. */
-#if __STDC_VERSION__ < 199901L && !defined(__func__)
+#if __STDC_VERSION__ < 199901L
 #  if __GNUC__ >= 2
 #    define __func__ __FUNCTION__
 #  else

=== modified file 'include/my_handler.h'
--- a/include/my_handler.h	2008-07-24 11:33:35 +0000
+++ b/include/my_handler.h	2008-10-20 09:16:47 +0000
@@ -45,6 +45,7 @@ extern "C" {
 
 #define HA_MAX_POSSIBLE_KEY_BUFF    (HA_MAX_KEY_LENGTH + 24+ 6+6) 
 #define HA_MAX_KEY_BUFF  (HA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8)
+#define HA_MAX_MSG_BUF      1024 /* used in CHECK TABLE, REPAIR TABLE */
 
 typedef struct st_HA_KEYSEG		/* Key-portion */
 {

=== modified file 'include/my_pthread.h'
--- a/include/my_pthread.h	2008-09-03 06:07:52 +0000
+++ b/include/my_pthread.h	2008-10-20 09:16:47 +0000
@@ -79,25 +79,27 @@ typedef void * (__cdecl *pthread_handler
    so it can be used directly as a 64 bit value. The value
    stored is in 100ns units.
  */
- union ft64 {
+union ft64 {
   FILETIME ft;
   __int64 i64;
- };
+};
+
 struct timespec {
   union ft64 tv;
   /* The max timeout value in millisecond for pthread_cond_timedwait */
   long max_timeout_msec;
 };
-#define set_timespec(ABSTIME,SEC) { \
-  GetSystemTimeAsFileTime(&((ABSTIME).tv.ft)); \
-  (ABSTIME).tv.i64+= (__int64)(SEC)*10000000; \
-  (ABSTIME).max_timeout_msec= (long)((SEC)*1000); \
-}
-#define set_timespec_nsec(ABSTIME,NSEC) { \
-  GetSystemTimeAsFileTime(&((ABSTIME).tv.ft)); \
-  (ABSTIME).tv.i64+= (__int64)(NSEC)/100; \
-  (ABSTIME).max_timeout_msec= (long)((NSEC)/1000000); \
-}
+
+#define set_timespec_time_nsec(ABSTIME,TIME,NSEC) do {          \
+  (ABSTIME).tv.i64= (TIME)+(__int64)(NSEC)/100;                 \
+  (ABSTIME).max_timeout_msec= (long)((NSEC)/1000000);           \
+} while(0)
+
+#define set_timespec_nsec(ABSTIME,NSEC) do {                    \
+  union ft64 tv;                                                \
+  GetSystemTimeAsFileTime(&tv.ft);                              \
+  set_timespec_time_nsec((ABSTIME), tv.i64, (NSEC));            \
+} while(0)
 
 void win_pthread_init(void);
 int win_pthread_setspecific(void *A,void *B,uint length);
@@ -416,43 +418,33 @@ int my_pthread_mutex_trylock(pthread_mut
   for calculating an absolute time at which
   pthread_cond_timedwait should timeout
 */
-#ifdef HAVE_TIMESPEC_TS_SEC
-#ifndef set_timespec
-#define set_timespec(ABSTIME,SEC) \
-{ \
-  (ABSTIME).ts_sec=time(0) + (time_t) (SEC); \
-  (ABSTIME).ts_nsec=0; \
-}
-#endif /* !set_timespec */
+
+#define set_timespec(ABSTIME,SEC) set_timespec_nsec((ABSTIME),(SEC)*1000000000ULL)
+
 #ifndef set_timespec_nsec
-#define set_timespec_nsec(ABSTIME,NSEC) \
-{ \
-  ulonglong now= my_getsystime() + (NSEC/100); \
-  (ABSTIME).ts_sec=  (now / ULL(10000000)); \
-  (ABSTIME).ts_nsec= (now % ULL(10000000) * 100 + ((NSEC) % 100)); \
-}
+#define set_timespec_nsec(ABSTIME,NSEC)                                 \
+  set_timespec_time_nsec((ABSTIME),my_getsystime(),(NSEC))
 #endif /* !set_timespec_nsec */
+
+/* adapt for two different flavors of struct timespec */
+#ifdef HAVE_TIMESPEC_TS_SEC
+#define TV_sec  ts_sec
+#define TV_nsec ts_nsec
 #else
-#ifndef set_timespec
-#define set_timespec(ABSTIME,SEC) \
-{\
-  struct timeval tv;\
-  gettimeofday(&tv,0);\
-  (ABSTIME).tv_sec=tv.tv_sec+(time_t) (SEC);\
-  (ABSTIME).tv_nsec=tv.tv_usec*1000;\
-}
-#endif /* !set_timespec */
-#ifndef set_timespec_nsec
-#define set_timespec_nsec(ABSTIME,NSEC) \
-{\
-  ulonglong now= my_getsystime() + (NSEC/100); \
-  (ABSTIME).tv_sec=  (time_t) (now / ULL(10000000));                  \
-  (ABSTIME).tv_nsec= (long) (now % ULL(10000000) * 100 + ((NSEC) % 100)); \
-}
-#endif /* !set_timespec_nsec */
+#define TV_sec  tv_sec
+#define TV_nsec tv_nsec
 #endif /* HAVE_TIMESPEC_TS_SEC */
 
-	/* safe_mutex adds checking to mutex for easier debugging */
+#ifndef set_timespec_time_nsec
+#define set_timespec_time_nsec(ABSTIME,TIME,NSEC) do {                  \
+  ulonglong nsec= (NSEC);                                               \
+  ulonglong now= (TIME) + (nsec/100);                                   \
+  (ABSTIME).TV_sec=  (now / ULL(10000000));                             \
+  (ABSTIME).TV_nsec= (now % ULL(10000000) * 100 + (nsec % 100));        \
+} while(0)
+#endif /* !set_timespec_time_nsec */
+
+/* safe_mutex adds checking to mutex for easier debugging */
 
 #if defined(__NETWARE__) && !defined(SAFE_MUTEX_DETECT_DESTROY)
 #define SAFE_MUTEX_DETECT_DESTROY
@@ -692,6 +684,7 @@ struct st_my_thread_var
   struct st_my_thread_var *next,**prev;
   void *opt_info;
   uint  lock_type; /* used by conditional release the queue */
+  void  *stack_ends_here;
 #ifndef DBUG_OFF
   void *dbug;
   char name[THREAD_NAME_SIZE+1];

=== modified file 'include/my_sys.h'
--- a/include/my_sys.h	2008-09-15 10:11:54 +0000
+++ b/include/my_sys.h	2008-10-20 19:13:22 +0000
@@ -227,6 +227,9 @@ extern void (*fatal_error_handler_hook)(
 extern uint my_file_limit;
 extern ulong my_thread_stack_size;
 
+extern const char *(*proc_info_hook)(void *, const char *, const char *,
+                                     const char *, const unsigned int);
+
 #ifdef HAVE_LARGE_PAGES
 extern my_bool my_use_large_pages;
 extern uint    my_large_page_size;

=== modified file 'include/myisam.h'
--- a/include/myisam.h	2008-08-24 16:12:12 +0000
+++ b/include/myisam.h	2008-10-20 09:16:47 +0000
@@ -57,8 +57,6 @@ extern "C" {
 
 #define MI_MAX_POSSIBLE_KEY_BUFF    HA_MAX_POSSIBLE_KEY_BUFF
 
-#define MI_MAX_KEY_BUFF  (MI_MAX_KEY_LENGTH+MI_MAX_KEY_SEG*6+8+8)
-#define MI_MAX_MSG_BUF      1024 /* used in CHECK TABLE, REPAIR TABLE */
 #define MI_NAME_IEXT	".MYI"
 #define MI_NAME_DEXT	".MYD"
 /* Max extra space to use when sorting keys */

=== modified file 'include/mysql.h.pp'
--- a/include/mysql.h.pp	2008-10-01 12:02:28 +0000
+++ b/include/mysql.h.pp	2008-10-20 19:13:22 +0000
@@ -212,8 +212,8 @@ typedef struct st_mem_root
   USED_MEM *pre_alloc;
   size_t min_malloc;
   size_t block_size;
-  unsigned long block_num;
-  unsigned long first_block_usage;
+  unsigned int block_num;
+  unsigned int first_block_usage;
   void (*error_handler)(void);
 } MEM_ROOT;
 typedef struct st_typelib {

=== added file 'include/waiting_threads.h'
--- a/include/waiting_threads.h	1970-01-01 00:00:00 +0000
+++ b/include/waiting_threads.h	2008-09-01 19:43:11 +0000
@@ -0,0 +1,161 @@
+/* Copyright (C) 2008 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef _waiting_threads_h
+#define _waiting_threads_h
+
+#include <my_global.h>
+#include <my_sys.h>
+
+C_MODE_START
+
+#include <lf.h>
+
+typedef struct st_wt_resource_id WT_RESOURCE_ID;
+
+typedef struct st_wt_resource_type {
+  int (*compare)(void *a, void *b);
+  const void *(*make_key)(WT_RESOURCE_ID *id, uint *len);
+} WT_RESOURCE_TYPE;
+
+struct st_wt_resource_id {
+  WT_RESOURCE_TYPE *type;
+  ulonglong value;
+};
+
+#define WT_WAIT_STATS  24
+#define WT_CYCLE_STATS 32
+extern ulonglong wt_wait_table[WT_WAIT_STATS];
+extern uint32    wt_wait_stats[WT_WAIT_STATS+1];
+extern uint32    wt_cycle_stats[2][WT_CYCLE_STATS+1];
+extern uint32    wt_success_stats;
+
+/*
+  'lock' protects 'owners', 'state', and 'waiter_count'
+  'id' is read-only
+
+  a resource is picked up from a hash in a lock-free manner
+  it's returned pinned, so it cannot be freed at once
+  but it may be freed right after the pin is removed
+  to free a resource it should be
+    1. have no owners
+    2. have no waiters
+
+  two ways to access a resource:
+    1. find it in a hash
+       - it's returned pinned.
+        a) take a lock in exclusive mode
+        b) check the state, it should be ACTIVE
+        c) unpin
+    2. by a direct reference
+       - could only used if a resource cannot be freed
+       e.g. accessing a resource by thd->waiting_for is safe,
+       a resource cannot be freed as there's a thread waiting for it
+*/
+
+typedef struct st_wt_resource {
+  WT_RESOURCE_ID  id;
+  uint            waiter_count;
+  enum { ACTIVE, FREE } state;
+#ifndef DBUG_OFF
+  pthread_mutex_t  *mutex;
+#endif
+  /*
+    before the 'lock' all elements are mutable, after - immutable
+    in the sense that lf_hash_insert() won't memcpy() over them.
+    See wt_init().
+  */
+  rw_lock_t lock;
+  pthread_cond_t   cond;
+  DYNAMIC_ARRAY    owners;
+} WT_RESOURCE;
+
+typedef struct st_wt_thd {
+  /*
+    XXX
+    there's no protection (mutex) against concurrent access of
+    the dynarray below. it is assumed that a caller will have it
+    automatically (not to protect this array but to protect its
+    own - caller's - data structures, and we'll get it for free.
+    If not, we'll need to add a mutex
+  */
+  DYNAMIC_ARRAY   my_resources;
+  /*
+    'waiting_for' is modified under waiting_for->lock, and only by thd itself
+    'waiting_for' is read lock-free (using pinning protocol), but a thd object
+    can read its own 'waiting_for' without any locks or tricks.
+  */
+  WT_RESOURCE    *waiting_for;
+  LF_PINS        *pins;
+
+  /* pointers to values */
+  ulong *timeout_short, *deadlock_search_depth_short;
+  ulong *timeout_long, *deadlock_search_depth_long;
+
+  /*
+    weight relates to the desirability of a transaction being killed if it's
+    part of a deadlock. In a deadlock situation transactions with lower weights
+    are killed first.
+
+    Examples of using the weight to implement different selection strategies:
+
+    1. Latest
+        Keep all weights equal.
+    2. Random
+        Assight weights at random.
+        (variant: modify a weight randomly before every lock request)
+    3. Youngest
+        Set weight to -NOW()
+    4. Minimum locks
+        count locks granted in your lock manager, store the value as a weight
+    5. Minimum work
+        depends on the definition of "work". For example, store the number
+        of rows modifies in this transaction (or a length of REDO log for a
+        transaction) as a weight.
+
+    It is only statistically relevant and is not protected by any locks.
+  */
+  ulong volatile weight;
+  /*
+    'killed' is indirectly protected by waiting_for->lock -
+    a killed thread needs to clear its 'waiting_for', and thus needs a lock.
+    That is a thread needs an exclusive lock to read 'killed' reliably.
+    But other threads may change 'killed' from 0 to 1, a shared
+    lock is enough for that.
+   */
+  my_bool volatile killed;
+#ifndef DBUG_OFF
+  const char     *name;
+#endif
+} WT_THD;
+
+#define WT_TIMEOUT              ETIMEDOUT
+#define WT_OK                   0
+#define WT_DEADLOCK             -1
+#define WT_DEPTH_EXCEEDED       -2
+
+void wt_init(void);
+void wt_end(void);
+void wt_thd_lazy_init(WT_THD *, ulong *, ulong *, ulong *, ulong *);
+void wt_thd_destroy(WT_THD *);
+int wt_thd_will_wait_for(WT_THD *, WT_THD *, WT_RESOURCE_ID *);
+int wt_thd_cond_timedwait(WT_THD *, pthread_mutex_t *);
+void wt_thd_release(WT_THD *, WT_RESOURCE_ID *);
+#define wt_thd_release_all(THD) wt_thd_release((THD), 0)
+int wt_resource_id_memcmp(void *, void *);
+
+C_MODE_END
+
+#endif

=== modified file 'mysql-test/lib/mtr_cases.pl'
--- a/mysql-test/lib/mtr_cases.pl	2008-09-23 13:08:15 +0000
+++ b/mysql-test/lib/mtr_cases.pl	2008-10-20 19:13:22 +0000
@@ -541,19 +541,10 @@ sub collect_one_test_case($$$$$$$$$) {
   my $suite_opts= shift;
 
   my $path= "$testdir/$elem";
-
-  # ----------------------------------------------------------------------
-  # Skip some tests silently
-  # ----------------------------------------------------------------------
-
-  if ( $::opt_start_from and $tname lt $::opt_start_from )
-  {
-    return;
-  }
-
+  my $name= basename($suite) . ".$tname";
 
   my $tinfo= {};
-  $tinfo->{'name'}= basename($suite) . ".$tname";
+  $tinfo->{'name'}= $name;
   $tinfo->{'result_file'}= "$resdir/$tname.result";
   $tinfo->{'component_id'} = $component_id;
   push(@$cases, $tinfo);
@@ -562,7 +553,7 @@ sub collect_one_test_case($$$$$$$$$) {
   # Skip some tests but include in list, just mark them to skip
   # ----------------------------------------------------------------------
 
-  if ( $skip_test and $tname =~ /$skip_test/o )
+  if ( $skip_test and ($tname =~ /$skip_test/o || $name =~ /$skip_test/o))
   {
     $tinfo->{'skip'}= 1;
     return;

=== modified file 'mysql-test/lib/mtr_report.pl'
--- a/mysql-test/lib/mtr_report.pl	2008-10-16 14:30:35 +0000
+++ b/mysql-test/lib/mtr_report.pl	2008-10-20 19:13:22 +0000
@@ -462,6 +462,18 @@ sub mtr_report_stats ($) {
                  )) or
                 # Test case for Bug#31590 produces the following error:
                 /Out of sort memory; increase server sort buffer size/ or
+
+                # Bug#35161, test of auto repair --myisam-recover
+                /able.*_will_crash/ or
+
+                # lowercase_table3 using case sensitive option on
+                # case insensitive filesystem (InnoDB error).
+                /Cannot find or open table test\/BUG29839 from/ or
+
+                # When trying to set lower_case_table_names = 2
+                # on a case sensitive file system. Bug#37402.
+                /lower_case_table_names was set to 2, even though your the file system
'.*' is case sensitive.  Now setting lower_case_table_names to 0 to avoid future
problems./ or
+
                 # maria-recovery.test has warning about missing log file
                 /File '.*maria_log.000.*' not found \(Errcode: 2\)/ or
                 # and about marked-corrupted table

=== modified file 'mysql-test/mysql-test-run.pl'
--- a/mysql-test/mysql-test-run.pl	2008-10-07 17:04:28 +0000
+++ b/mysql-test/mysql-test-run.pl	2008-10-20 19:13:22 +0000
@@ -3171,6 +3171,24 @@ sub run_testcase_check_skip_test($)
   my ($tinfo)= @_;
 
   # ----------------------------------------------------------------------
+  # Skip some tests silently
+  # ----------------------------------------------------------------------
+
+  if ( $::opt_start_from )
+  {
+    if ($tinfo->{'name'} eq $::opt_start_from )
+    {
+      ## Found parting test. Run this test and all tests after this one
+      $::opt_start_from= "";
+    }
+    else
+    {
+      $tinfo->{'result'}= 'MTR_RES_SKIPPED';
+      return 1;
+    }
+  }
+
+  # ----------------------------------------------------------------------
   # If marked to skip, just print out and return.
   # Note that a test case not marked as 'skip' can still be
   # skipped later, because of the test case itself in cooperation

=== modified file 'mysql-test/r/subselect_debug.result'
--- a/mysql-test/r/subselect_debug.result	2008-07-11 14:25:45 +0000
+++ b/mysql-test/r/subselect_debug.result	2008-10-20 09:16:47 +0000
@@ -1,7 +1,7 @@
 CREATE TABLE t1(id INT);
 INSERT INTO t1 VALUES (1),(2),(3),(4);
 INSERT INTO t1 SELECT a.id FROM t1 a,t1 b,t1 c,t1 d;
-SET @orig_debug = @@debug;
+SET @orig_debug=@@debug;
 SET SESSION debug="d,subselect_exec_fail";
 SELECT SUM(EXISTS(SELECT RAND() FROM t1)) FROM t1;
 SUM(EXISTS(SELECT RAND() FROM t1))

=== modified file 'mysql-test/r/sync_frm_basic.result'
--- a/mysql-test/r/sync_frm_basic.result	2008-04-10 13:14:28 +0000
+++ b/mysql-test/r/sync_frm_basic.result	2008-10-10 15:28:41 +0000
@@ -1,7 +1,7 @@
 SET @start_value = @@global.sync_frm;
 SELECT @start_value;
 @start_value
-1
+VAL
 '#--------------------FN_DYNVARS_169_01------------------------#'
 SET @@global.sync_frm = FALSE;
 SET @@global.sync_frm = DEFAULT;
@@ -94,4 +94,4 @@ ERROR 42S22: Unknown column 'sync_frm' i
 SET @@global.sync_frm = @start_value;
 SELECT @@global.sync_frm;
 @@global.sync_frm
-1
+VAL

=== modified file 'mysql-test/suite/funcs_1/datadict/processlist_priv.inc'
--- a/mysql-test/suite/funcs_1/datadict/processlist_priv.inc	2008-08-25 15:03:24 +0000
+++ b/mysql-test/suite/funcs_1/datadict/processlist_priv.inc	2008-10-20 09:16:47 +0000
@@ -144,6 +144,7 @@ eval SELECT * FROM $table $select_where 
 --replace_column 1 ID 3 HOST_NAME 6 TIME
 eval SELECT $columns FROM $table $select_where ORDER BY id;
 --source suite/funcs_1/datadict/datadict_priv.inc
+--real_sleep 0.3
 
 
 --echo
####################################################################################

=== modified file 'mysql-test/suite/funcs_1/datadict/processlist_val.inc'
--- a/mysql-test/suite/funcs_1/datadict/processlist_val.inc	2008-08-13 20:05:34 +0000
+++ b/mysql-test/suite/funcs_1/datadict/processlist_val.inc	2008-10-20 09:16:47 +0000
@@ -72,6 +72,7 @@ echo
 # Show the definition of the PROCESSLIST table
 #--------------------------------------------------------------------------
 ;
+--replace_result ENGINE=MyISAM "" ENGINE=MARIA "" " PAGE_CHECKSUM=1" "" "
PAGE_CHECKSUM=0" ""
 SHOW CREATE TABLE INFORMATION_SCHEMA.PROCESSLIST;
 
 echo

=== modified file 'mysql-test/suite/funcs_1/r/processlist_val_no_prot.result'
--- a/mysql-test/suite/funcs_1/r/processlist_val_no_prot.result	2008-08-13 20:05:34 +0000
+++ b/mysql-test/suite/funcs_1/r/processlist_val_no_prot.result	2008-10-20 09:16:47 +0000
@@ -20,7 +20,7 @@ PROCESSLIST	CREATE TEMPORARY TABLE `PROC
   `TIME` bigint(7) NOT NULL DEFAULT '0',
   `STATE` varchar(64) DEFAULT NULL,
   `INFO` longtext
-) ENGINE=MARIA DEFAULT CHARSET=utf8 PAGE_CHECKSUM=0
+)  DEFAULT CHARSET=utf8
 # Ensure that the information about the own connection is correct.
 #--------------------------------------------------------------------------
 

=== added directory 'mysql-test/suite/maria'
=== added directory 'mysql-test/suite/maria/r'
=== renamed file 'mysql-test/r/maria-autozerofill.result' =>
'mysql-test/suite/maria/r/maria-autozerofill.result'
=== renamed file 'mysql-test/r/maria-big.result' =>
'mysql-test/suite/maria/r/maria-big.result'
=== renamed file 'mysql-test/r/maria-big2.result' =>
'mysql-test/suite/maria/r/maria-big2.result'
=== renamed file 'mysql-test/r/maria-connect.result' =>
'mysql-test/suite/maria/r/maria-connect.result'
=== renamed file 'mysql-test/r/maria-gis-rtree-dynamic.result' =>
'mysql-test/suite/maria/r/maria-gis-rtree-dynamic.result'
=== renamed file 'mysql-test/r/maria-gis-rtree-trans.result' =>
'mysql-test/suite/maria/r/maria-gis-rtree-trans.result'
=== renamed file 'mysql-test/r/maria-gis-rtree.result' =>
'mysql-test/suite/maria/r/maria-gis-rtree.result'
=== renamed file 'mysql-test/r/maria-mvcc.result' =>
'mysql-test/suite/maria/r/maria-mvcc.result'
=== renamed file 'mysql-test/r/maria-no-logging.result' =>
'mysql-test/suite/maria/r/maria-no-logging.result'
=== renamed file 'mysql-test/r/maria-page-checksum.result' =>
'mysql-test/suite/maria/r/maria-page-checksum.result'
=== renamed file 'mysql-test/r/maria-preload.result' =>
'mysql-test/suite/maria/r/maria-preload.result'
=== renamed file 'mysql-test/r/maria-purge.result' =>
'mysql-test/suite/maria/r/maria-purge.result'
=== renamed file 'mysql-test/r/maria-recover.result' =>
'mysql-test/suite/maria/r/maria-recover.result'
=== renamed file 'mysql-test/r/maria-recovery-big.result' =>
'mysql-test/suite/maria/r/maria-recovery-big.result'
=== renamed file 'mysql-test/r/maria-recovery-bitmap.result' =>
'mysql-test/suite/maria/r/maria-recovery-bitmap.result'
=== renamed file 'mysql-test/r/maria-recovery-rtree-ft.result' =>
'mysql-test/suite/maria/r/maria-recovery-rtree-ft.result'
=== renamed file 'mysql-test/r/maria-recovery.result' =>
'mysql-test/suite/maria/r/maria-recovery.result'
=== renamed file 'mysql-test/r/maria-recovery2.result' =>
'mysql-test/suite/maria/r/maria-recovery2.result'
=== renamed file 'mysql-test/r/maria.result' => 'mysql-test/suite/maria/r/maria.result'
--- a/mysql-test/r/maria.result	2008-08-25 18:23:18 +0000
+++ b/mysql-test/suite/maria/r/maria.result	2008-10-20 13:03:34 +0000
@@ -6,6 +6,7 @@ set session storage_engine=maria;
 set global maria_page_checksum=0;
 set global maria_log_file_size=4294967295;
 drop table if exists t1,t2;
+drop view if exists v1;
 SET SQL_WARNINGS=1;
 CREATE TABLE t1 (
 STRING_DATA char(255) default NULL,
@@ -1749,6 +1750,28 @@ id	ref
 3	2
 4	5
 DROP TABLE t1, t2;
+CREATE TABLE t1 (a INT) ENGINE=MARIA CHECKSUM=1 ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES (0);
+UPDATE t1 SET a=1;
+SELECT a FROM t1;
+a
+1
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+INSERT INTO t1 VALUES (0), (5), (4), (2);
+UPDATE t1 SET a=2;
+SELECT a FROM t1;
+a
+2
+2
+2
+2
+2
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
 End of 5.0 tests
 create table t1 (a int not null, key `a` (a) key_block_size=1024);
 show create table t1;
@@ -1866,6 +1889,287 @@ t1	CREATE TABLE `t1` (
   KEY `b` (`b`) KEY_BLOCK_SIZE=8192
 ) ENGINE=MARIA DEFAULT CHARSET=latin1 PAGE_CHECKSUM=0 KEY_BLOCK_SIZE=16384
 drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=512);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  KEY `a` (`a`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 PAGE_CHECKSUM=0
+drop table t1;
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000);
+Warnings:
+Warning	1071	Specified key was too long; max key length is 1444 bytes
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` varchar(2048) DEFAULT NULL,
+  KEY `a` (`a`(1444)) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 PAGE_CHECKSUM=0
+drop table t1;
+create table t1 (a int not null, key `a` (a) key_block_size=1025);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL,
+  KEY `a` (`a`) KEY_BLOCK_SIZE=8192
+) ENGINE=MARIA DEFAULT CHARSET=latin1 PAGE_CHECKSUM=0
+drop table t1;
+create table t1 (a int not null, key key_block_size=1024 (a));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to
your MySQL server version for the right syntax to use near '=1024 (a))' at line 1
+create table t1 (a int not null, key `a` key_block_size=1024 (a));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to
your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at
line 1
+CREATE TABLE t1 (
+c1 INT,
+c2 VARCHAR(300),
+KEY (c1) KEY_BLOCK_SIZE 1024,
+KEY (c2) KEY_BLOCK_SIZE 8192
+);
+INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))),
+(11, REPEAT('b', CEIL(RAND() * 300))),
+(12, REPEAT('c', CEIL(RAND() * 300))),
+(13, REPEAT('d', CEIL(RAND() * 300))),
+(14, REPEAT('e', CEIL(RAND() * 300))),
+(15, REPEAT('f', CEIL(RAND() * 300))),
+(16, REPEAT('g', CEIL(RAND() * 300))),
+(17, REPEAT('h', CEIL(RAND() * 300))),
+(18, REPEAT('i', CEIL(RAND() * 300))),
+(19, REPEAT('j', CEIL(RAND() * 300))),
+(20, REPEAT('k', CEIL(RAND() * 300))),
+(21, REPEAT('l', CEIL(RAND() * 300))),
+(22, REPEAT('m', CEIL(RAND() * 300))),
+(23, REPEAT('n', CEIL(RAND() * 300))),
+(24, REPEAT('o', CEIL(RAND() * 300))),
+(25, REPEAT('p', CEIL(RAND() * 300))),
+(26, REPEAT('q', CEIL(RAND() * 300))),
+(27, REPEAT('r', CEIL(RAND() * 300))),
+(28, REPEAT('s', CEIL(RAND() * 300))),
+(29, REPEAT('t', CEIL(RAND() * 300))),
+(30, REPEAT('u', CEIL(RAND() * 300))),
+(31, REPEAT('v', CEIL(RAND() * 300))),
+(32, REPEAT('w', CEIL(RAND() * 300))),
+(33, REPEAT('x', CEIL(RAND() * 300))),
+(34, REPEAT('y', CEIL(RAND() * 300))),
+(35, REPEAT('z', CEIL(RAND() * 300)));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+REPAIR TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	repair	status	OK
+DELETE FROM t1 WHERE c1 >= 10;
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(130),
+c2 VARCHAR(1)
+) ENGINE=maria;
+INSERT INTO t1 VALUES(REPEAT("a",128), 'b');
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+REPAIR TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	repair	status	OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(130),
+c2 VARCHAR(1)
+) ENGINE=maria;
+INSERT INTO t1 VALUES(REPEAT("a",128), 'b');
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+REPAIR TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	repair	status	OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(130),
+c2 VARCHAR(1)
+) ENGINE=maria;
+INSERT INTO t1 VALUES(REPEAT("a",128), 'b');
+INSERT INTO t1 VALUES('b', 'b');
+INSERT INTO t1 VALUES('c', 'b');
+DELETE FROM t1 WHERE c1='b';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+OPTIMIZE TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	optimize	status	OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(130),
+c2 VARCHAR(1),
+KEY (c1)
+) ENGINE=maria;
+# Insert 100 rows. Query log disabled.
+UPDATE t1 SET c1=REPEAT("a",128) LIMIT 90;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+ALTER TABLE t1 ENGINE=maria;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+CHECK TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(50),
+c2 VARCHAR(1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+INSERT INTO t1 VALUES(REPEAT(_utf8 x'e0ae85',43), 'b');
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+REPAIR TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	repair	status	OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(50),
+c2 VARCHAR(1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+INSERT INTO t1 VALUES(REPEAT(_utf8 x'e0ae85',43), 'b');
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+REPAIR TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	repair	status	OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+CHECK TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(50),
+c2 VARCHAR(1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+INSERT INTO t1 VALUES(REPEAT(_utf8 x'e0ae85',43), 'b');
+INSERT INTO t1 VALUES('b', 'b');
+INSERT INTO t1 VALUES('c', 'b');
+DELETE FROM t1 WHERE c1='b';
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+OPTIMIZE TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	optimize	status	OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 CHAR(50),
+c2 VARCHAR(1),
+KEY (c1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+# Insert 100 rows. Query log disabled.
+UPDATE t1 SET c1=REPEAT(_utf8 x'e0ae85',43) LIMIT 90;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+ALTER TABLE t1 ENGINE=maria;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+100
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+CHECK TABLE t1 EXTENDED;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 VARCHAR(10) NOT NULL,
+c2 CHAR(10) DEFAULT NULL,
+c3 VARCHAR(10) NOT NULL,
+KEY (c1),
+KEY (c2)
+) ENGINE=maria DEFAULT CHARSET=utf8 PACK_KEYS=0;
+MARIA file:          MYSQLTEST_VARDIR/master-data/test/t1
+Record format:       Block
+Crashsafe:           yes
+Character set:       utf8_general_ci (45)
+Data records:                       0  Deleted blocks:                      0
+Block_size:                      8192
+Recordlength:                     129
+
+table description:
+Key Start Len Index   Type
+1   2     40  multip. varchar                
+2   43    40  multip. char NULL              
+DROP TABLE t1;
+create table t1 (n int not null, c char(1)) transactional=1;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `n` int(11) NOT NULL,
+  `c` char(1) DEFAULT NULL
+) ENGINE=MARIA DEFAULT CHARSET=latin1 PAGE_CHECKSUM=0 TRANSACTIONAL=1
+drop table t1;
+CREATE TABLE t1 (line LINESTRING NOT NULL) engine=maria;
+INSERT INTO t1 VALUES (GeomFromText("POINT(0 0)"));
+checksum table t1;
+Table	Checksum
+test.t1	326284887
+CREATE TABLE t2 (line LINESTRING NOT NULL) engine=maria;
+INSERT INTO t2 VALUES (GeomFromText("POINT(0 0)"));
+checksum table t2;
+Table	Checksum
+test.t2	326284887
+CREATE TABLE t3 select * from t1;
+checksum table t3;
+Table	Checksum
+test.t3	326284887
+drop table t1,t2,t3;
+End of 5.1 tests
 create table t2(a varchar(255),key(a))engine=maria row_format=dynamic transactional=0;
 insert into t2 values (repeat('o',124)), (repeat('h',226)), (repeat('i',236)),
 (repeat('l',234)), (repeat('b',13)), (repeat('g',236)), (repeat('y',205)),
@@ -1901,6 +2205,19 @@ check table t2 extended;
 Table	Op	Msg_type	Msg_text
 test.t2	check	status	OK
 drop table t2;
+create table t1 (a int unique) transactional=1;
+insert t1 values (1);
+lock table t1 write concurrent;
+insert t1 values (2);
+lock table t1 write concurrent;
+insert t1 values (3);
+insert t1 values (2);
+insert t1 values (3);
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+unlock tables;
+ERROR 23000: Duplicate entry '2' for key 'a'
+unlock tables;
+drop table t1;
 CREATE TABLE t1 (
 col0 float DEFAULT NULL,
 col1 date DEFAULT NULL,   
@@ -2234,7 +2551,58 @@ insert into t1 values (1);
 lock table t1 write concurrent;
 delete from t1;
 ERROR 42000: The storage engine for the table doesn't support DELETE in WRITE CONCURRENT
-drop table t1;
-ERROR HY000: Table 't1' was locked with a READ lock and can't be updated
 unlock tables;
 drop table t1;
+create table t1 (p int primary key, i int, a char(10), key k1(i), key k2(a))
+engine maria;
+insert into t1 values (1, 1, 'qqqq'), (2, 1, 'pppp'),
+(3, 1, 'yyyy'), (4, 3, 'zzzz');
+insert into t1 values (5, 3, 'yyyy'), (6, 3, 'yyyy'), (7, 0, NULL),
+(8, 0, NULL);
+select * from t1 where a='zzzz';
+p	i	a
+4	3	zzzz
+select * from t1 where a='yyyy';
+p	i	a
+3	1	yyyy
+5	3	yyyy
+6	3	yyyy
+select * from t1 where a is NULL;
+p	i	a
+7	0	NULL
+8	0	NULL
+select * from t1;
+p	i	a
+1	1	qqqq
+2	1	pppp
+3	1	yyyy
+4	3	zzzz
+5	3	yyyy
+6	3	yyyy
+7	0	NULL
+8	0	NULL
+check table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+drop table t1;
+create table t1 (f1 int unique, f2 int) engine=maria;
+create table t2 (f3 int, f4 int) engine=maria;
+create view v1 as select * from t1, t2 where f1= f3;
+insert into t1 values (1,11), (2,22);
+insert into v1 (f1) values (3) on duplicate key update f1= f3 + 10;
+insert into v1 (f1) values (3) on duplicate key update f1= f3 + 10;
+drop table t1,t2;
+drop view v1;
+CREATE TABLE t1 (id int, c varchar(10)) engine=maria;
+INSERT INTO t1 VALUES (1,"1");
+ALTER TABLE t1 CHANGE c d varchar(10);
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+drop table t1;
+create table t1 (c1 int);
+create table t2 (c1 int);
+lock table t1 read, t2 read;
+flush tables with read lock;
+ERROR HY000: Can't execute the given command because you have active locked tables or an
active transaction
+unlock tables;
+drop table t1, t2;

=== renamed file 'mysql-test/r/maria2.result' =>
'mysql-test/suite/maria/r/maria2.result'
=== renamed file 'mysql-test/r/maria3.result' =>
'mysql-test/suite/maria/r/maria3.result'
--- a/mysql-test/r/maria3.result	2008-08-27 15:48:32 +0000
+++ b/mysql-test/suite/maria/r/maria3.result	2008-10-20 09:16:47 +0000
@@ -506,7 +506,7 @@ count(*)
 select count(*) from t1 where a >= 4;
 count(*)
 1
-drop table t1;
+drop table t1, t2;
 create table t1 (i int auto_increment not null primary key) transactional=0;
 check table t1 extended;
 Table	Op	Msg_type	Msg_text
@@ -540,3 +540,14 @@ TABLE_SCHEMA='test' and TABLE_NAME='t1';
 CREATE_OPTIONS
 transactional=1
 drop table t1;
+create table t1 (a int, unique(a)) engine=maria transactional=1;
+insert into t1 values(1);
+insert into t1 values(2),(2);
+ERROR 23000: Duplicate entry '2' for key 'a'
+create table t2 (a int, unique(a)) engine=maria transactional=0 row_format=dynamic;
+insert into t2 values(1);
+insert into t2 values(2),(2);
+ERROR 23000: Duplicate entry '2' for key 'a'
+insert into t1 values(3);
+insert into t2 values(3);
+drop table t1, t2;

=== renamed file 'mysql-test/r/maria_notembedded.result' =>
'mysql-test/suite/maria/r/maria_notembedded.result'
=== added file 'mysql-test/suite/maria/r/maria_partition.result'
--- a/mysql-test/suite/maria/r/maria_partition.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/r/maria_partition.result	2008-10-15 12:44:31 +0000
@@ -0,0 +1,12 @@
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_page_checksum=0;
+set global maria_log_file_size=4294967295;
+drop table if exists t1,t2;
+drop view if exists v1;
+SET SQL_WARNINGS=1;
+create table t1 (s1 int);
+insert into t1 values (1);
+alter table t1 partition by list (s1) (partition p1 values in (2));
+ERROR HY000: Table has no partition for value 1
+drop table t1;

=== renamed file 'mysql-test/r/ps_maria.result' =>
'mysql-test/suite/maria/r/ps_maria.result'
=== added directory 'mysql-test/suite/maria/t'
=== renamed file 'mysql-test/t/maria-autozerofill.test' =>
'mysql-test/suite/maria/t/maria-autozerofill.test'
=== renamed file 'mysql-test/t/maria-big.test' =>
'mysql-test/suite/maria/t/maria-big.test'
=== renamed file 'mysql-test/t/maria-big2.test' =>
'mysql-test/suite/maria/t/maria-big2.test'
=== renamed file 'mysql-test/t/maria-connect.test' =>
'mysql-test/suite/maria/t/maria-connect.test'
=== renamed file 'mysql-test/t/maria-gis-rtree-dynamic.test' =>
'mysql-test/suite/maria/t/maria-gis-rtree-dynamic.test'
=== renamed file 'mysql-test/t/maria-gis-rtree-trans.test' =>
'mysql-test/suite/maria/t/maria-gis-rtree-trans.test'
=== renamed file 'mysql-test/t/maria-gis-rtree.test' =>
'mysql-test/suite/maria/t/maria-gis-rtree.test'
=== renamed file 'mysql-test/t/maria-mvcc.test' =>
'mysql-test/suite/maria/t/maria-mvcc.test'
=== renamed file 'mysql-test/t/maria-no-logging.test' =>
'mysql-test/suite/maria/t/maria-no-logging.test'
=== renamed file 'mysql-test/t/maria-page-checksum.test' =>
'mysql-test/suite/maria/t/maria-page-checksum.test'
=== renamed file 'mysql-test/t/maria-preload.test' =>
'mysql-test/suite/maria/t/maria-preload.test'
=== renamed file 'mysql-test/t/maria-purge.test' =>
'mysql-test/suite/maria/t/maria-purge.test'
=== renamed file 'mysql-test/t/maria-recover-master.opt' =>
'mysql-test/suite/maria/t/maria-recover-master.opt'
=== renamed file 'mysql-test/t/maria-recover.test' =>
'mysql-test/suite/maria/t/maria-recover.test'
=== renamed file 'mysql-test/t/maria-recovery-big-master.opt' =>
'mysql-test/suite/maria/t/maria-recovery-big-master.opt'
=== renamed file 'mysql-test/t/maria-recovery-big.test' =>
'mysql-test/suite/maria/t/maria-recovery-big.test'
=== renamed file 'mysql-test/t/maria-recovery-bitmap-master.opt' =>
'mysql-test/suite/maria/t/maria-recovery-bitmap-master.opt'
=== renamed file 'mysql-test/t/maria-recovery-bitmap.test' =>
'mysql-test/suite/maria/t/maria-recovery-bitmap.test'
=== renamed file 'mysql-test/t/maria-recovery-master.opt' =>
'mysql-test/suite/maria/t/maria-recovery-master.opt'
=== renamed file 'mysql-test/t/maria-recovery-rtree-ft-master.opt' =>
'mysql-test/suite/maria/t/maria-recovery-rtree-ft-master.opt'
=== renamed file 'mysql-test/t/maria-recovery-rtree-ft.test' =>
'mysql-test/suite/maria/t/maria-recovery-rtree-ft.test'
=== renamed file 'mysql-test/t/maria-recovery.test' =>
'mysql-test/suite/maria/t/maria-recovery.test'
=== renamed file 'mysql-test/t/maria-recovery2-master.opt' =>
'mysql-test/suite/maria/t/maria-recovery2-master.opt'
=== renamed file 'mysql-test/t/maria-recovery2.test' =>
'mysql-test/suite/maria/t/maria-recovery2.test'
=== renamed file 'mysql-test/t/maria.test' => 'mysql-test/suite/maria/t/maria.test'
--- a/mysql-test/t/maria.test	2008-08-25 18:23:18 +0000
+++ b/mysql-test/suite/maria/t/maria.test	2008-10-20 13:03:34 +0000
@@ -17,6 +17,7 @@ set global maria_log_file_size=429496729
 # Initialise
 --disable_warnings
 drop table if exists t1,t2;
+drop view if exists v1;
 --enable_warnings
 SET SQL_WARNINGS=1;
 
@@ -1088,7 +1089,7 @@ CHECK TABLE t1 EXTENDED;
 DROP TABLE t1;
 
 #
-# Bug#28837: Maria storage engine error (134) doing delete with self-join
+# Test doing delete with self-join
 #
 
 CREATE TABLE t1 (id int NOT NULL, ref int NOT NULL, INDEX (id));
@@ -1104,6 +1105,20 @@ SELECT * FROM t1;
 
 DROP TABLE t1, t2;
 
+#
+# Bug#37310: 'on update CURRENT_TIMESTAMP' option crashes the table
+#
+CREATE TABLE t1 (a INT) ENGINE=MARIA CHECKSUM=1 ROW_FORMAT=DYNAMIC;
+INSERT INTO t1 VALUES (0);
+UPDATE t1 SET a=1;
+SELECT a FROM t1;
+CHECK TABLE t1;
+INSERT INTO t1 VALUES (0), (5), (4), (2);
+UPDATE t1 SET a=2;
+SELECT a FROM t1;
+CHECK TABLE t1;
+DROP TABLE t1;
+
 --echo End of 5.0 tests
 
 
@@ -1150,6 +1165,274 @@ create table t1 (a int not null, b int, 
 show create table t1;
 drop table t1;
 
+# Test limits and errors of key_block_size
+
+create table t1 (a int not null, key `a` (a) key_block_size=512);
+show create table t1;
+drop table t1;
+
+create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000);
+show create table t1;
+drop table t1;
+
+create table t1 (a int not null, key `a` (a) key_block_size=1025);
+show create table t1;
+drop table t1;
+
+--error 1064
+create table t1 (a int not null, key key_block_size=1024 (a));
+--error 1064
+create table t1 (a int not null, key `a` key_block_size=1024 (a));
+
+#
+# Bug#22119 - Changing MI_KEY_BLOCK_LENGTH makes a wrong myisamchk
+#
+CREATE TABLE t1 (
+  c1 INT,
+  c2 VARCHAR(300),
+  KEY (c1) KEY_BLOCK_SIZE 1024,
+  KEY (c2) KEY_BLOCK_SIZE 8192
+  );
+INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))),
+  (11, REPEAT('b', CEIL(RAND() * 300))),
+  (12, REPEAT('c', CEIL(RAND() * 300))),
+  (13, REPEAT('d', CEIL(RAND() * 300))),
+  (14, REPEAT('e', CEIL(RAND() * 300))),
+  (15, REPEAT('f', CEIL(RAND() * 300))),
+  (16, REPEAT('g', CEIL(RAND() * 300))),
+  (17, REPEAT('h', CEIL(RAND() * 300))),
+  (18, REPEAT('i', CEIL(RAND() * 300))),
+  (19, REPEAT('j', CEIL(RAND() * 300))),
+  (20, REPEAT('k', CEIL(RAND() * 300))),
+  (21, REPEAT('l', CEIL(RAND() * 300))),
+  (22, REPEAT('m', CEIL(RAND() * 300))),
+  (23, REPEAT('n', CEIL(RAND() * 300))),
+  (24, REPEAT('o', CEIL(RAND() * 300))),
+  (25, REPEAT('p', CEIL(RAND() * 300))),
+  (26, REPEAT('q', CEIL(RAND() * 300))),
+  (27, REPEAT('r', CEIL(RAND() * 300))),
+  (28, REPEAT('s', CEIL(RAND() * 300))),
+  (29, REPEAT('t', CEIL(RAND() * 300))),
+  (30, REPEAT('u', CEIL(RAND() * 300))),
+  (31, REPEAT('v', CEIL(RAND() * 300))),
+  (32, REPEAT('w', CEIL(RAND() * 300))),
+  (33, REPEAT('x', CEIL(RAND() * 300))),
+  (34, REPEAT('y', CEIL(RAND() * 300))),
+  (35, REPEAT('z', CEIL(RAND() * 300)));
+INSERT INTO t1 SELECT * FROM t1;
+INSERT INTO t1 SELECT * FROM t1;
+CHECK TABLE t1;
+REPAIR TABLE t1;
+DELETE FROM t1 WHERE c1 >= 10;
+CHECK TABLE t1;
+DROP TABLE t1;
+
+#
+# Bug#33222 - myisam-table drops rows when column is added
+#             and a char-field > 128 exists
+#
+# Test #1 - CHECK TABLE sees wrong record, REPAR TABLE deletes it.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+CREATE TABLE t1 (
+  c1 CHAR(130),
+  c2 VARCHAR(1)
+) ENGINE=maria;
+INSERT INTO t1 VALUES(REPEAT("a",128), 'b');
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1;
+REPAIR TABLE t1;
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1;
+DROP TABLE t1;
+#
+# Test #2 - same as test #1, but using EXTENDED.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+CREATE TABLE t1 (
+  c1 CHAR(130),
+  c2 VARCHAR(1)
+) ENGINE=maria;
+INSERT INTO t1 VALUES(REPEAT("a",128), 'b');
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1 EXTENDED;
+REPAIR TABLE t1 EXTENDED;
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1 EXTENDED;
+DROP TABLE t1;
+#
+# Test #3 - same as test #1, but using OPTIMIZE TABLE.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+CREATE TABLE t1 (
+  c1 CHAR(130),
+  c2 VARCHAR(1)
+) ENGINE=maria;
+INSERT INTO t1 VALUES(REPEAT("a",128), 'b');
+# Insert more rows and delete one in the middle to force optimize.
+INSERT INTO t1 VALUES('b', 'b');
+INSERT INTO t1 VALUES('c', 'b');
+DELETE FROM t1 WHERE c1='b';
+SELECT COUNT(*) FROM t1;
+OPTIMIZE TABLE t1;
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+#
+# Test #4 - ALTER TABLE deletes rows.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+# Using an index which can be disabled during bulk insert.
+CREATE TABLE t1 (
+  c1 CHAR(130),
+  c2 VARCHAR(1),
+  KEY (c1)
+) ENGINE=maria;
+#
+# Insert 100 rows. This turns bulk insert on during the copy phase of
+# ALTER TABLE. Bulk insert disables keys before the insert and re-enables
+# them by repair after the insert.
+--disable_query_log
+let $count= 100;
+--echo # Insert $count rows. Query log disabled.
+while ($count)
+{
+  INSERT INTO t1 VALUES ('a', 'b');
+  dec $count;
+}
+--enable_query_log
+#
+# Change most of the rows into long character values with > 127 characters.
+UPDATE t1 SET c1=REPEAT("a",128) LIMIT 90;
+SELECT COUNT(*) FROM t1;
+ALTER TABLE t1 ENGINE=maria;
+#
+# With bug present, this shows that all long rows are gone.
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1;
+CHECK TABLE t1 EXTENDED;
+DROP TABLE t1;
+#
+# Test #5 - same as test #1 but UTF-8.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+CREATE TABLE t1 (
+  c1 CHAR(50),
+  c2 VARCHAR(1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+# Using Tamil Letter A, Unicode U+0B85
+INSERT INTO t1 VALUES(REPEAT(_utf8 x'e0ae85',43), 'b');
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1;
+REPAIR TABLE t1;
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1;
+DROP TABLE t1;
+#
+# Test #6 - same as test #2, but UTF-8.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+CREATE TABLE t1 (
+  c1 CHAR(50),
+  c2 VARCHAR(1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+# Using Tamil Letter A, Unicode U+0B85
+INSERT INTO t1 VALUES(REPEAT(_utf8 x'e0ae85',43), 'b');
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1 EXTENDED;
+REPAIR TABLE t1 EXTENDED;
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1 EXTENDED;
+DROP TABLE t1;
+#
+# Test #7 - same as test #3, but UTF-8.
+# Using a CHAR column that can have > 127 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+CREATE TABLE t1 (
+  c1 CHAR(50),
+  c2 VARCHAR(1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+# Using Tamil Letter A, Unicode U+0B85
+INSERT INTO t1 VALUES(REPEAT(_utf8 x'e0ae85',43), 'b');
+# Insert more rows and delete one in the middle to force optimize.
+INSERT INTO t1 VALUES('b', 'b');
+INSERT INTO t1 VALUES('c', 'b');
+DELETE FROM t1 WHERE c1='b';
+SELECT COUNT(*) FROM t1;
+OPTIMIZE TABLE t1;
+SELECT COUNT(*) FROM t1;
+DROP TABLE t1;
+#
+# Test #8 - same as test #4, but UTF-8.
+# Using a CHAR column that can have > 42 UTF-8 characters.
+# Using a VARCHAR to create a table with dynamic row format.
+# Using an index which can be disabled during bulk insert.
+CREATE TABLE t1 (
+  c1 CHAR(50),
+  c2 VARCHAR(1),
+  KEY (c1)
+) ENGINE=maria DEFAULT CHARSET UTF8;
+#
+# Insert 100 rows. This turns bulk insert on during the copy phase of
+# ALTER TABLE. Bulk insert disables keys before the insert and re-enables
+# them by repair after the insert.
+--disable_query_log
+let $count= 100;
+--echo # Insert $count rows. Query log disabled.
+while ($count)
+{
+  INSERT INTO t1 VALUES ('a', 'b');
+  dec $count;
+}
+--enable_query_log
+#
+# Change most of the rows into long character values with > 42 characters.
+# Using Tamil Letter A, Unicode U+0B85
+UPDATE t1 SET c1=REPEAT(_utf8 x'e0ae85',43) LIMIT 90;
+SELECT COUNT(*) FROM t1;
+ALTER TABLE t1 ENGINE=maria;
+#
+# With bug present, this shows that all long rows are gone.
+SELECT COUNT(*) FROM t1;
+CHECK TABLE t1;
+CHECK TABLE t1 EXTENDED;
+DROP TABLE t1;
+
+#
+# Bug#29182 - MyISAMCHK reports wrong character set
+#
+CREATE TABLE t1 (
+  c1 VARCHAR(10) NOT NULL,
+  c2 CHAR(10) DEFAULT NULL,
+  c3 VARCHAR(10) NOT NULL,
+  KEY (c1),
+  KEY (c2)
+) ENGINE=maria DEFAULT CHARSET=utf8 PACK_KEYS=0;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--exec $MARIA_CHK -d $MYSQLTEST_VARDIR/master-data/test/t1
+DROP TABLE t1;
+
+# Test warnings with transactional=1 with MyISAM
+#
+create table t1 (n int not null, c char(1)) transactional=1;
+show create table t1;
+drop table t1;
+
+#
+# Test of BUG#35570 CHECKSUM TABLE unreliable if LINESTRING field
+# (same content / differen checksum)
+#
+
+CREATE TABLE t1 (line LINESTRING NOT NULL) engine=maria;
+INSERT INTO t1 VALUES (GeomFromText("POINT(0 0)"));
+checksum table t1;
+CREATE TABLE t2 (line LINESTRING NOT NULL) engine=maria;
+INSERT INTO t2 VALUES (GeomFromText("POINT(0 0)"));
+checksum table t2;
+CREATE TABLE t3 select * from t1;
+checksum table t3;
+drop table t1,t2,t3;
+--echo End of 5.1 tests
+
 #
 # from bug37276_reduced_corruption.sql
 #
@@ -1188,6 +1471,31 @@ insert into t2 values (repeat('x',28)), 
 check table t2 extended;
 drop table t2;
 
+#
+# an example of a deadlock
+#
+create table t1 (a int unique) transactional=1;
+insert t1 values (1);
+lock table t1 write concurrent;
+insert t1 values (2);
+connect(con_d,localhost,root,,);
+lock table t1 write concurrent;
+insert t1 values (3);
+send insert t1 values (2);
+connection default;
+let $wait_condition=select count(*) = 1 from information_schema.processlist where
state="waiting for a resource";
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+insert t1 values (3);
+unlock tables;
+connection con_d;
+--error ER_DUP_ENTRY
+reap;
+unlock tables;
+disconnect con_d;
+connection default;
+drop table t1;
+
 CREATE TABLE t1 (
 col0 float DEFAULT NULL,
 col1 date DEFAULT NULL,   
@@ -1524,14 +1832,68 @@ lock table t1 write concurrent;
 # should be fixed with fully implemented versioning
 --error ER_CHECK_NOT_IMPLEMENTED
 delete from t1;
---error 1099
-drop table t1;
 unlock tables;
 drop table t1;
 
+#
+# Bug#39243 SELECT WHERE does not find row
+# (Problem with skip_row)
+#
+
+create table t1 (p int primary key, i int, a char(10), key k1(i), key k2(a))
+engine maria;
+insert into t1 values (1, 1, 'qqqq'), (2, 1, 'pppp'),
+                      (3, 1, 'yyyy'), (4, 3, 'zzzz');
+insert into t1 values (5, 3, 'yyyy'), (6, 3, 'yyyy'), (7, 0, NULL),
+                      (8, 0, NULL);
+select * from t1 where a='zzzz';
+select * from t1 where a='yyyy';
+select * from t1 where a is NULL;
+select * from t1;
+check table t1;
+drop table t1;
+
+#
+# Bug39248 INSERT ON DUPLICATE KEY UPDATE gives error if using a view
+# Note that this only crashes when using
+# --mysqld=--binlog-format=row --ps-protocol
+#
+
+create table t1 (f1 int unique, f2 int) engine=maria;
+create table t2 (f3 int, f4 int) engine=maria;
+create view v1 as select * from t1, t2 where f1= f3;
+insert into t1 values (1,11), (2,22);
+insert into v1 (f1) values (3) on duplicate key update f1= f3 + 10;
+insert into v1 (f1) values (3) on duplicate key update f1= f3 + 10;
+drop table t1,t2;
+drop view v1;
+
+#
+# BUG#39399 ALTER TABLE renaming column: affected_rows > 0
+#
+
+CREATE TABLE t1 (id int, c varchar(10)) engine=maria;
+INSERT INTO t1 VALUES (1,"1");
+--enable_info
+ALTER TABLE t1 CHANGE c d varchar(10);
+--disable_info
+drop table t1;
+
+#
+# Bug #39226 Maria: crash with FLUSH TABLES WITH READ LOCK after LOCK TABLES
+
+create table t1 (c1 int);
+create table t2 (c1 int);
+lock table t1 read, t2 read;
+# 6.0 throws this error, unlike 5.1
+--error ER_LOCK_OR_ACTIVE_TRANSACTION
+flush tables with read lock;
+unlock tables;
+drop table t1, t2;
+
+# Set defaults back
 --disable_result_log
 --disable_query_log
 eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum;
 --enable_result_log
 --enable_query_log
-

=== renamed file 'mysql-test/t/maria2.test' => 'mysql-test/suite/maria/t/maria2.test'
=== renamed file 'mysql-test/t/maria3.test' => 'mysql-test/suite/maria/t/maria3.test'
--- a/mysql-test/t/maria3.test	2008-07-01 20:47:09 +0000
+++ b/mysql-test/suite/maria/t/maria3.test	2008-10-09 20:03:54 +0000
@@ -406,7 +406,7 @@ insert into t2 select * from t1;
 insert into t1 select NULL from t2;
 select count(*) from t1;
 select count(*) from t1 where a >= 4;
-drop table t1;
+drop table t1, t2;
 
 #
 # Test problems with small rows and row_type=page 
@@ -461,6 +461,24 @@ select CREATE_OPTIONS from information_s
 TABLE_SCHEMA='test' and TABLE_NAME='t1';
 drop table t1;
 
+#
+# BUG#39697 - Maria: hang when failing to insert due to UNIQUE
+#
+create table t1 (a int, unique(a)) engine=maria transactional=1;
+insert into t1 values(1);
+--error 1062
+insert into t1 values(2),(2);
+create table t2 (a int, unique(a)) engine=maria transactional=0 row_format=dynamic;
+insert into t2 values(1);
+--error 1062
+insert into t2 values(2),(2);
+connect (root,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
+connection root;
+insert into t1 values(3);
+insert into t2 values(3);
+connection default;
+drop table t1, t2;
+
 # End of 5.1 tests
 
 --disable_result_log

=== renamed file 'mysql-test/t/maria_notembedded.test' =>
'mysql-test/suite/maria/t/maria_notembedded.test'
=== added file 'mysql-test/suite/maria/t/maria_partition.test'
--- a/mysql-test/suite/maria/t/maria_partition.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/t/maria_partition.test	2008-10-15 12:44:31 +0000
@@ -0,0 +1,35 @@
+# Maria tests which require partitioning enabled
+
+--source include/have_partition.inc
+-- source include/have_maria.inc
+
+let $default_engine=`select @@global.storage_engine`;
+let $default_checksum=`select @@global.maria_page_checksum`;
+set global storage_engine=maria;
+set session storage_engine=maria;
+set global maria_page_checksum=0;
+set global maria_log_file_size=4294967295;
+
+# Initialise
+--disable_warnings
+drop table if exists t1,t2;
+drop view if exists v1;
+--enable_warnings
+SET SQL_WARNINGS=1;
+
+#
+# Bug #39227 Maria: crash with ALTER TABLE PARTITION
+#
+
+create table t1 (s1 int);
+insert into t1 values (1);
+--error ER_NO_PARTITION_FOR_GIVEN_VALUE
+alter table t1 partition by list (s1) (partition p1 values in (2));
+drop table t1;
+
+# Set defaults back
+--disable_result_log
+--disable_query_log
+eval set global storage_engine=$default_engine, maria_page_checksum=$default_checksum;
+--enable_result_log
+--enable_query_log

=== renamed file 'mysql-test/t/ps_maria.test' =>
'mysql-test/suite/maria/t/ps_maria.test'
=== modified file 'mysql-test/t/merge.test'
--- a/mysql-test/t/merge.test	2008-07-09 07:12:43 +0000
+++ b/mysql-test/t/merge.test	2008-10-20 09:16:47 +0000
@@ -1274,7 +1274,6 @@ DROP TABLE t1, t2, t3, t4;
 
 --echo End of 5.1 tests
 
-
 --echo #
 --echo # An additional test case for Bug#27430 Crash in subquery code
 --echo # when in PS and table DDL changed after PREPARE
@@ -1624,8 +1623,6 @@ DROP TRIGGER t2_au;
 DROP FUNCTION f1;
 DROP TABLE tm1, t1, t2, t3, t4, t5;
 
-
-
 --echo End of 6.0 tests
 
 --disable_result_log

=== modified file 'mysql-test/t/subselect_debug.test'
--- a/mysql-test/t/subselect_debug.test	2008-07-11 14:25:45 +0000
+++ b/mysql-test/t/subselect_debug.test	2008-10-20 09:16:47 +0000
@@ -9,7 +9,7 @@ CREATE TABLE t1(id INT);
 INSERT INTO t1 VALUES (1),(2),(3),(4);
 INSERT INTO t1 SELECT a.id FROM t1 a,t1 b,t1 c,t1 d;
 # Setup the mysqld to crash at certain point
-SET @orig_debug = @@debug;
+SET @orig_debug=@@debug;
 SET SESSION debug="d,subselect_exec_fail";
 SELECT SUM(EXISTS(SELECT RAND() FROM t1)) FROM t1;
 SELECT REVERSE(EXISTS(SELECT RAND() FROM t1));

=== modified file 'mysql-test/t/sync_frm_basic.test'
--- a/mysql-test/t/sync_frm_basic.test	2008-04-10 13:14:28 +0000
+++ b/mysql-test/t/sync_frm_basic.test	2008-10-10 15:28:41 +0000
@@ -34,6 +34,8 @@
 ################################################################
 
 SET @start_value = @@global.sync_frm;
+# In some cases the server may have been started with --disable-sync-frm
+--replace_column 1 VAL
 SELECT @start_value;
 
 
@@ -155,9 +157,9 @@ SELECT sync_frm = @@session.sync_frm;
 ##############################
 
 SET @@global.sync_frm = @start_value;
+--replace_column 1 VAL
 SELECT @@global.sync_frm;
 
-
 ######################################################
 #              END OF sync_frm TESTS                 #
 ######################################################

=== modified file 'mysys/CMakeLists.txt' (properties changed: +x to -x)
--- a/mysys/CMakeLists.txt	2008-07-23 08:52:08 +0000
+++ b/mysys/CMakeLists.txt	2008-10-20 19:13:22 +0000
@@ -44,7 +44,7 @@ SET(MYSYS_SOURCES  array.c charset-def.c
 				thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c
                                 lf_alloc-pin.c lf_dynarray.c lf_hash.c
                                 my_atomic.c my_getncpus.c my_rnd.c
-                                my_uuid.c wqueue.c
+                                my_uuid.c wqueue.c waiting_threads.c
 )
 
 IF(NOT SOURCE_SUBLIBS)

=== modified file 'mysys/Makefile.am'
--- a/mysys/Makefile.am	2008-07-23 08:52:08 +0000
+++ b/mysys/Makefile.am	2008-10-20 19:13:22 +0000
@@ -57,11 +57,12 @@ libmysys_a_SOURCES =    my_init.c my_get
 			my_memmem.c stacktrace.c \
 			my_windac.c my_access.c base64.c my_libwrap.c \
 		        wqueue.c
-EXTRA_DIST =		thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
-			thr_mutex.c thr_rwlock.c \
-			CMakeLists.txt mf_soundex.c \
+if THREAD
+libmysys_a_SOURCES+=	thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
+			thr_mutex.c thr_rwlock.c waiting_threads.c
+endif
+EXTRA_DIST =		CMakeLists.txt mf_soundex.c \
 			my_conio.c my_wincond.c my_winthread.c my_winerr.c my_winfile.c
-libmysys_a_LIBADD =	@THREAD_LOBJECTS@
 # test_dir_DEPENDENCIES=	$(LIBRARIES)
 # testhash_DEPENDENCIES=	$(LIBRARIES)
 # test_charset_DEPENDENCIES=	$(LIBRARIES)
@@ -75,8 +76,6 @@ DEFS =			-DDEFAULT_BASEDIR=\"$(prefix)\"
 			-DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \
                         @DEFS@
 
-libmysys_a_DEPENDENCIES= @THREAD_LOBJECTS@
-
 # I hope this always does the right thing. Otherwise this is only test programs
 FLAGS=$(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) @NOINST_LDFLAGS@
 

=== modified file 'mysys/array.c'
--- a/mysys/array.c	2008-04-28 16:24:05 +0000
+++ b/mysys/array.c	2008-08-08 11:11:27 +0000
@@ -51,19 +51,14 @@ my_bool init_dynamic_array2(DYNAMIC_ARRA
     if (init_alloc > 8 && alloc_increment > init_alloc * 2)
       alloc_increment=init_alloc*2;
   }
-
-  if (!init_alloc)
-  {
-    init_alloc=alloc_increment;
-    init_buffer= 0;
-  }
   array->elements=0;
   array->max_element=init_alloc;
   array->alloc_increment=alloc_increment;
   array->size_of_element=element_size;
   if ((array->buffer= init_buffer))
     DBUG_RETURN(FALSE);
-  if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,
+  if (init_alloc &&
+      !(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,
                                             MYF(MY_WME))))
   {
     array->max_element=0;

=== modified file 'mysys/lf_alloc-pin.c'
--- a/mysys/lf_alloc-pin.c	2008-02-21 00:51:51 +0000
+++ b/mysys/lf_alloc-pin.c	2008-10-07 16:49:01 +0000
@@ -96,11 +96,10 @@
   versioning a pointer - because we use an array, a pointer to pins is 16 bit,
   upper 16 bits are used for a version.
 
-  It is assumed that pins belong to a thread and are not transferable
-  between threads (LF_PINS::stack_ends_here being a primary reason
+  It is assumed that pins belong to a THD and are not transferable
+  between THD's (LF_PINS::stack_ends_here being a primary reason
   for this limitation).
 */
-
 #include <my_global.h>
 #include <my_sys.h>
 #include <lf.h>
@@ -137,10 +136,6 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox
 
   SYNOPSYS
     pinbox      -
-    stack_end   - a pointer to the end (top/bottom, depending on the
-                  STACK_DIRECTION) of stack. Used for safe alloca. There's
-                  no safety margin deducted, a caller should take care of it,
-                  if necessary.
 
   DESCRIPTION
     get a new LF_PINS structure from a stack of unused pins,
@@ -150,7 +145,7 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox
     It is assumed that pins belong to a thread and are not transferable
     between threads.
 */
-LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end)
+LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox)
 {
   uint32 pins, next, top_ver;
   LF_PINS *el;
@@ -194,7 +189,7 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *
   el->link= pins;
   el->purgatory_count= 0;
   el->pinbox= pinbox;
-  el->stack_ends_here= stack_end;
+  el->stack_ends_here= & my_thread_var->stack_ends_here;
   return el;
 }
 
@@ -325,6 +320,9 @@ static int match_pins(LF_PINS *el, void 
 #define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR))
 #endif
 
+#define next_node(P, X) (*((uchar * volatile *)(((uchar *)(X)) +
(P)->free_ptr_offset)))
+#define anext_node(X) next_node(&allocator->pinbox, (X))
+
 /*
   Scan the purgatory and free everything that can be freed
 */
@@ -332,7 +330,7 @@ static void _lf_pinbox_real_free(LF_PINS
 {
   int npins, alloca_size;
   void *list, **addr;
-  struct st_lf_alloc_node *first, *last= NULL;
+  uchar *first, *last= NULL;
   LF_PINBOX *pinbox= pins->pinbox;
 
   LINT_INIT(first);
@@ -341,7 +339,7 @@ static void _lf_pinbox_real_free(LF_PINS
 #ifdef HAVE_ALLOCA
   alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
   /* create a sorted list of pinned addresses, to speed up searches */
-  if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size)
+  if (available_stack_size(&pinbox, *pins->stack_ends_here) > alloca_size)
   {
     struct st_harvester hv;
     addr= (void **) alloca(alloca_size);
@@ -391,9 +389,9 @@ static void _lf_pinbox_real_free(LF_PINS
     }
     /* not pinned - freeing */
     if (last)
-      last= last->next= (struct st_lf_alloc_node *)cur;
+      last= next_node(pinbox, last)= (uchar *)cur;
     else
-      first= last= (struct st_lf_alloc_node *)cur;
+      first= last= (uchar *)cur;
     continue;
 found:
     /* pinned - keeping */
@@ -412,22 +410,22 @@ LF_REQUIRE_PINS(1)
   add it back to the allocator stack
 
   DESCRIPTION
-    'first' and 'last' are the ends of the linked list of st_lf_alloc_node's:
+    'first' and 'last' are the ends of the linked list of nodes:
     first->el->el->....->el->last. Use first==last to free only one
element.
 */
-static void alloc_free(struct st_lf_alloc_node *first,
-                       struct st_lf_alloc_node volatile *last,
+static void alloc_free(uchar *first,
+                       uchar volatile *last,
                        LF_ALLOCATOR *allocator)
 {
   /*
     we need a union here to access type-punned pointer reliably.
     otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop
   */
-  union { struct st_lf_alloc_node * node; void *ptr; } tmp;
+  union { uchar * node; void *ptr; } tmp;
   tmp.node= allocator->top;
   do
   {
-    last->next= tmp.node;
+    anext_node(last)= tmp.node;
   } while (!my_atomic_casptr((void **)(char *)&allocator->top,
                              (void **)&tmp.ptr, first) && LF_BACKOFF);
 }
@@ -452,6 +450,8 @@ void lf_alloc_init(LF_ALLOCATOR *allocat
   allocator->top= 0;
   allocator->mallocs= 0;
   allocator->element_size= size;
+  allocator->constructor= 0;
+  allocator->destructor= 0;
   DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
 }
 
@@ -468,10 +468,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocat
 */
 void lf_alloc_destroy(LF_ALLOCATOR *allocator)
 {
-  struct st_lf_alloc_node *node= allocator->top;
+  uchar *node= allocator->top;
   while (node)
   {
-    struct st_lf_alloc_node *tmp= node->next;
+    uchar *tmp= anext_node(node);
+    if (allocator->destructor)
+      allocator->destructor(node);
     my_free((void *)node, MYF(0));
     node= tmp;
   }
@@ -489,7 +491,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allo
 void *_lf_alloc_new(LF_PINS *pins)
 {
   LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
-  struct st_lf_alloc_node *node;
+  uchar *node;
   for (;;)
   {
     do
@@ -500,6 +502,8 @@ void *_lf_alloc_new(LF_PINS *pins)
     if (!node)
     {
       node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
+      if (allocator->constructor)
+        allocator->constructor(node);
 #ifdef MY_LF_EXTRA_DEBUG
       if (likely(node != 0))
         my_atomic_add32(&allocator->mallocs, 1);
@@ -507,7 +511,7 @@ void *_lf_alloc_new(LF_PINS *pins)
       break;
     }
     if (my_atomic_casptr((void **)(char *)&allocator->top,
-                         (void *)&node, node->next))
+                         (void *)&node, anext_node(node)))
       break;
   }
   _lf_unpin(pins, 0);
@@ -523,8 +527,8 @@ void *_lf_alloc_new(LF_PINS *pins)
 uint lf_alloc_pool_count(LF_ALLOCATOR *allocator)
 {
   uint i;
-  struct st_lf_alloc_node *node;
-  for (node= allocator->top, i= 0; node; node= node->next, i++)
+  uchar *node;
+  for (node= allocator->top, i= 0; node; node= anext_node(node), i++)
     /* no op */;
   return i;
 }

=== modified file 'mysys/lf_hash.c'
--- a/mysys/lf_hash.c	2008-02-13 17:25:56 +0000
+++ b/mysys/lf_hash.c	2008-07-29 14:10:24 +0000
@@ -299,11 +299,22 @@ static int initialize_bucket(LF_HASH *, 
 
 /*
   Initializes lf_hash, the arguments are compatible with hash_init
+
+  @@note element_size sets both the size of allocated memory block for
+  lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
+  they are the same, indeed. But LF_HASH::element_size can be decreased
+  after lf_hash_init, and then lf_alloc will allocate larger block that
+  lf_hash_insert will copy over. It is desireable if part of the element
+  is expensive to initialize - for example if there is a mutex or
+  DYNAMIC_ARRAY. In this case they should be initialize in the
+  LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
+  See wt_init() for example.
 */
 void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
                   uint key_offset, uint key_length, hash_get_key get_key,
                   CHARSET_INFO *charset)
 {
+  compile_time_assert(sizeof(LF_SLIST) == LF_HASH_OVERHEAD);
   lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
                 offsetof(LF_SLIST, key));
   lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
@@ -453,7 +464,7 @@ void *lf_hash_search(LF_HASH *hash, LF_P
   return found ? found+1 : 0;
 }
 
-static const uchar *dummy_key= "";
+static const uchar *dummy_key= (uchar*)"";
 
 /*
   RETURN
@@ -473,7 +484,7 @@ static int initialize_bucket(LF_HASH *ha
       unlikely(initialize_bucket(hash, el, parent, pins)))
     return -1;
   dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
-  dummy->key= (char*) dummy_key;
+  dummy->key= dummy_key;
   dummy->keylen= 0;
   if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
   {

=== modified file 'mysys/my_static.c'
--- a/mysys/my_static.c	2008-08-23 00:18:35 +0000
+++ b/mysys/my_static.c	2008-10-20 19:13:22 +0000
@@ -92,6 +92,18 @@ void (*error_handler_hook)(uint error,co
 void (*fatal_error_handler_hook)(uint error,const char *str,myf MyFlags)=
   my_message_no_curses;
 
+static const char *proc_info_dummy(void *a __attribute__((unused)),
+                                   const char *b __attribute__((unused)),
+                                   const char *c __attribute__((unused)),
+                                   const char *d __attribute__((unused)),
+                                   const unsigned int e __attribute__((unused)))
+{
+  return 0;
+}
+
+const char *(*proc_info_hook)(void *, const char *, const char *, const char *,
+                              const unsigned int)= proc_info_dummy;
+
 #ifdef __WIN__
 /* from my_getsystime.c */
 ulonglong query_performance_frequency, query_performance_offset;

=== modified file 'mysys/my_thr_init.c'
--- a/mysys/my_thr_init.c	2008-07-09 07:12:43 +0000
+++ b/mysys/my_thr_init.c	2008-10-20 09:16:47 +0000
@@ -263,7 +263,7 @@ my_bool my_thread_init(void)
 #ifdef EXTRA_DEBUG_THREADS
   fprintf(stderr,"my_thread_init(): thread_id: 0x%lx\n",
           (ulong) pthread_self());
-#endif  
+#endif
 
 #if !defined(__WIN__) || defined(USE_TLS)
   if (my_pthread_getspecific(struct st_my_thread_var *,THR_KEY_mysys))
@@ -271,7 +271,7 @@ my_bool my_thread_init(void)
 #ifdef EXTRA_DEBUG_THREADS
     fprintf(stderr,"my_thread_init() called more than once in thread 0x%lx\n",
             (long) pthread_self());
-#endif    
+#endif
     goto end;
   }
   if (!(tmp= (struct st_my_thread_var *) calloc(1, sizeof(*tmp))))
@@ -297,6 +297,8 @@ my_bool my_thread_init(void)
   pthread_mutex_init(&tmp->mutex,MY_MUTEX_INIT_FAST);
   pthread_cond_init(&tmp->suspend, NULL);
 
+  tmp->stack_ends_here= &tmp + STACK_DIRECTION * my_thread_stack_size;
+
   pthread_mutex_lock(&THR_LOCK_threads);
   tmp->id= ++thread_id;
   ++THR_thread_count;
@@ -332,7 +334,7 @@ void my_thread_end(void)
 #ifdef EXTRA_DEBUG_THREADS
   fprintf(stderr,"my_thread_end(): tmp: 0x%lx  pthread_self: 0x%lx  thread_id: %ld\n",
 	  (long) tmp, (long) pthread_self(), tmp ? (long) tmp->id : 0L);
-#endif  
+#endif
   if (tmp && tmp->init)
   {
 #if !defined(DBUG_OFF)

=== added file 'mysys/waiting_threads.c'
--- a/mysys/waiting_threads.c	1970-01-01 00:00:00 +0000
+++ b/mysys/waiting_threads.c	2008-09-01 19:43:11 +0000
@@ -0,0 +1,908 @@
+/* Copyright (C) 2008 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  "waiting threads" subsystem - a unified interface for threads to wait
+  on each other, with built-in deadlock detection.
+
+  Main concepts
+  ^^^^^^^^^^^^^
+    a thread - is represented by a WT_THD structure. One physical thread
+      can have only one WT_THD descriptor.
+
+    a resource - a thread does not wait for other threads directly,
+      instead it waits for a "resource", which is "owned" by other threads.
+      It waits, exactly, for all "owners" to "release" a resource.
+      It does not have to correspond to a physical resource. For example, it
+      may be convenient in certain cases to force resource == thread.
+      A resource is represented by a WT_RESOURCE structure. 
+
+    a resource identifier - a pair of {resource type, value}. A value is
+      an ulonglong number. Represented by a WT_RESOURCE_ID structure.
+
+    a resource type - a pointer to a statically defined instance of
+    WT_RESOURCE_TYPE structure. This structure contains a pointer to
+    a function that knows how to compare values of this resource type.
+    In the simple case it could be wt_resource_id_memcmp().
+
+   Usage
+   ^^^^^
+   to use the interface one needs to use this thread's WT_THD,
+   call wt_thd_will_wait_for() for every thread it needs to wait on,
+   then call wt_thd_cond_timedwait(). When thread releases a resource
+   it should call wt_thd_release() (or wt_thd_release_all()) - it will
+   notify (send a signal) threads waiting in wt_thd_cond_timedwait(),
+   if appropriate.
+
+   Just like with pthread's cond_wait, there could be spurious
+   wake-ups from wt_thd_cond_timedwait(). A caller is expected to
+   handle that.
+
+   wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either
+   WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return
+   WT_TIMEOUT. Out of memory and other fatal errors are reported as
+   WT_DEADLOCK - and a transaction must be aborted just the same.
+
+   Configuration
+   ^^^^^^^^^^^^^
+   There are four config variables. Two deadlock search depths - short and
+   long - and two timeouts. Deadlock search is performed with the short
+   depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait()
+   waits with a short timeout, performs a deadlock search with the long
+   depth, and waits with a long timeout. As most deadlock cycles are supposed
+   to be short, most deadlocks will be detected at once, and waits will
+   rarely be necessary.
+
+   These config variables are thread-local. Different threads may have
+   different search depth and timeout values.
+
+   Also, deadlock detector supports different killing strategies, the victim
+   in a deadlock cycle is selected based on the "weight". See "weight"
+   description in waiting_threads.h for details. It's up to the caller to
+   set weights accordingly.
+
+   Status
+   ^^^^^^
+   We calculate the number of successfull waits (WT_OK returned from
+   wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle
+   length distribution - number of deadlocks with every length from
+   1 to WT_CYCLE_STATS, and a wait time distribution - number
+   of waits with a time from 1 us to 1 min in WT_CYCLE_STATS
+   intervals on a log scale.
+*/
+
+/*
+  Note that if your lock system satisfy the following condition:
+
+    there exist four lock levels A, B, C, D, such as
+      A is compatible with B
+      A is not compatible with C
+      D is not compatible with B
+
+      (example A=IX, B=IS, C=S, D=X)
+
+   you need to include lock level in the resource identifier - thread 1
+   waiting for lock A on resource R and thread 2 waiting for lock B
+   on resource R should wait on different WT_RESOURCE structures, on different
+   {lock, resource} pairs. Otherwise the following is possible:
+
+      thread1> take S-lock on R
+      thread2> take IS-lock on R
+      thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R.
+      thread3 is killed (or timeout or whatever)
+      WT_RESOURCE structure for R is still in the hash, as it has two owners
+      thread4> wants an IX-lock on R
+      WT_RESOURCE for R is found in the hash, thread4 starts waiting on it.
+      !! now thread4 is waiting for both thread1 and thread2
+      !! while, in fact, IX-lock and IS-lock are compatible and
+      !! thread4 should not wait for thread2.
+*/
+
+#include <waiting_threads.h>
+#include <m_string.h>
+
+/*
+  status variables:
+    distribution of cycle lengths
+    wait time log distribution
+
+  Note:
+
+    we call deadlock() twice per wait (with different search lengths).
+    it means a deadlock will be counted twice. It's difficult to avoid,
+    as on the second search we could find a *different* deadlock and we
+    *want* to count it too. So we just count all deadlocks - two searches
+    mean two increments on the wt_cycle_stats.
+*/
+
+ulonglong wt_wait_table[WT_WAIT_STATS];
+uint32    wt_wait_stats[WT_WAIT_STATS+1];
+uint32    wt_cycle_stats[2][WT_CYCLE_STATS+1], wt_success_stats;
+
+static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock;
+
+#define increment_success_stats()                                       \
+  do {                                                                  \
+    my_atomic_rwlock_wrlock(&success_stats_lock);                       \
+    my_atomic_add32(&wt_success_stats, 1);                              \
+    my_atomic_rwlock_wrunlock(&success_stats_lock);                     \
+  } while (0)
+
+#define increment_cycle_stats(X,SLOT)                                   \
+  do {                                                                  \
+    uint i= (X);                                                        \
+    if (i >= WT_CYCLE_STATS)                                            \
+      i= WT_CYCLE_STATS;                                                \
+    my_atomic_rwlock_wrlock(&cycle_stats_lock);                         \
+    my_atomic_add32(&wt_cycle_stats[SLOT][i], 1);                       \
+    my_atomic_rwlock_wrunlock(&cycle_stats_lock);                       \
+  } while (0)
+
+#define increment_wait_stats(X,RET)                                     \
+  do {                                                                  \
+    uint i;                                                             \
+    if ((RET) == ETIMEDOUT)                                             \
+      i= WT_WAIT_STATS;                                                 \
+    else                                                                \
+    {                                                                   \
+      ulonglong w=(X)/10;                                               \
+      for (i=0; i < WT_WAIT_STATS && w > wt_wait_table[i]; i++) ;       \
+    }                                                                   \
+    my_atomic_rwlock_wrlock(&wait_stats_lock);                          \
+    my_atomic_add32(wt_wait_stats+i, 1);                                \
+    my_atomic_rwlock_wrunlock(&wait_stats_lock);                        \
+  } while (0)
+
+#define rc_rdlock(X)                                                    \
+  do {                                                                  \
+    WT_RESOURCE *R=(X);                                                 \
+    DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value));        \
+    rw_rdlock(&R->lock);                                                \
+  } while (0)
+#define rc_wrlock(X)                                                    \
+  do {                                                                  \
+    WT_RESOURCE *R=(X);                                                 \
+    DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value));       \
+    rw_wrlock(&R->lock);                                                \
+  } while (0)
+#define rc_unlock(X)                                                    \
+  do {                                                                  \
+    WT_RESOURCE *R=(X);                                                 \
+    DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value));               \
+    rw_unlock(&R->lock);                                                \
+  } while (0)
+
+/*
+  All resources are stored in a lock-free hash. Different threads
+  may add new resources and perform deadlock detection concurrently.
+*/
+static LF_HASH      reshash;
+
+/**
+  WT_RESOURCE constructor
+
+  It's called from lf_hash and takes an offset to LF_SLIST instance.
+  WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
+static void wt_resource_init(uchar *arg)
+{
+  WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
+  DBUG_ENTER("wt_resource_init");
+
+  bzero(rc, sizeof(*rc));
+  my_rwlock_init(&rc->lock, 0);
+  pthread_cond_init(&rc->cond, 0);
+  my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5);
+  DBUG_VOID_RETURN;
+}
+
+/**
+  WT_RESOURCE destructor
+
+  It's called from lf_hash and takes an offset to LF_SLIST instance.
+  WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
+static void wt_resource_destroy(uchar *arg)
+{
+  WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
+  DBUG_ENTER("wt_resource_destroy");
+
+  DBUG_ASSERT(rc->owners.elements == 0);
+  rwlock_destroy(&rc->lock);
+  pthread_cond_destroy(&rc->cond);
+  delete_dynamic(&rc->owners);
+  DBUG_VOID_RETURN;
+}
+
+void wt_init()
+{
+  DBUG_ENTER("wt_init");
+
+  lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0,
+               sizeof(struct st_wt_resource_id), 0, 0);
+  reshash.alloc.constructor= wt_resource_init;
+  reshash.alloc.destructor= wt_resource_destroy;
+  /*
+    Note a trick: we initialize the hash with the real element size,
+    but fix it later to a shortened element size. This way
+    the allocator will allocate elements correctly, but
+    lf_hash_insert() will only overwrite part of the element with memcpy().
+    lock, condition, and dynamic array will be intact.
+  */
+  reshash.element_size= offsetof(WT_RESOURCE, lock);
+  bzero(wt_wait_stats, sizeof(wt_wait_stats));
+  bzero(wt_cycle_stats, sizeof(wt_cycle_stats));
+  wt_success_stats=0;
+  { /* initialize wt_wait_table[]. from 1 us to 1 min, log scale */
+    int i;
+    double from=log(1);   /* 1 us */
+    double to=log(60e6);  /* 1 min */
+    for (i=0; i < WT_WAIT_STATS; i++)
+    {
+      wt_wait_table[i]=(ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from);
+      DBUG_ASSERT(i==0 || wt_wait_table[i-1] != wt_wait_table[i]);
+    }
+  }
+  my_atomic_rwlock_init(&cycle_stats_lock);
+  my_atomic_rwlock_init(&success_stats_lock);
+  my_atomic_rwlock_init(&wait_stats_lock);
+  DBUG_VOID_RETURN;
+}
+
+void wt_end()
+{
+  DBUG_ENTER("wt_end");
+
+  DBUG_ASSERT(reshash.count == 0);
+  lf_hash_destroy(&reshash);
+  my_atomic_rwlock_destroy(&cycle_stats_lock);
+  my_atomic_rwlock_destroy(&success_stats_lock);
+  my_atomic_rwlock_destroy(&wait_stats_lock);
+  DBUG_VOID_RETURN;
+}
+
+/**
+  Lazy WT_THD initialization
+
+  Cheap initialization of WT_THD. Only initialized fields that don't require
+  memory allocations - basically, it only does assignments. The rest of the
+  WT_THD structure will be initialized on demand, on the first use.
+  This allows one to initialize lazily all WT_THD structures, even if some
+  (or even most) of them will never be used for deadlock detection.
+
+  @param ds     a pointer to deadlock search depth short value
+  @param ts     a pointer to deadlock timeout short value
+  @param dl     a pointer to deadlock search depth long value
+  @param tl     a pointer to deadlock timeout long value
+*/
+void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl)
+{
+  DBUG_ENTER("wt_thd_lazy_init");
+  thd->waiting_for=0;
+  thd->my_resources.buffer= 0;
+  thd->my_resources.elements= 0;
+  thd->weight=0;
+  thd->deadlock_search_depth_short= ds;
+  thd->timeout_short= ts;
+  thd->deadlock_search_depth_long= dl;
+  thd->timeout_long= tl;
+  /* dynamic array is also initialized lazily - without memory allocations */
+  my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5);
+#ifndef DBUG_OFF
+  thd->name=my_thread_name();
+#endif
+  DBUG_VOID_RETURN;
+}
+
+/**
+  Finalize WT_THD initialization
+
+  After lazy WT_THD initialization, parts of the structure are still
+  uninitialized. This function completes the initialization, allocating
+  memory, if necessary. It's called automatically on demand, when WT_THD
+  is about to be used.
+*/
+static int fix_thd_pins(WT_THD *thd)
+{
+  if (unlikely(thd->pins == 0))
+  {
+    thd->pins=lf_hash_get_pins(&reshash);
+#ifndef DBUG_OFF
+    thd->name=my_thread_name();
+#endif
+  }
+  return thd->pins == 0;
+}
+
+void wt_thd_destroy(WT_THD *thd)
+{
+  DBUG_ENTER("wt_thd_destroy");
+
+  DBUG_ASSERT(thd->my_resources.elements == 0);
+
+  if (thd->pins != 0)
+    lf_hash_put_pins(thd->pins);
+
+  delete_dynamic(&thd->my_resources);
+  thd->waiting_for=0;
+  DBUG_VOID_RETURN;
+}
+/**
+  Trivial resource id comparison function - bytewise memcmp.
+
+  It can be used in WT_RESOURCE_TYPE structures where bytewise
+  comparison of values is sufficient.
+*/
+int wt_resource_id_memcmp(void *a, void *b)
+{
+  return memcmp(a, b, sizeof(WT_RESOURCE_ID));
+}
+
+/**
+  arguments for the recursive deadlock_search function
+*/
+struct deadlock_arg {
+  WT_THD *thd;          /**< starting point of a search */
+  uint    max_depth;    /**< search depth limit */
+  WT_THD *victim;       /**< a thread to be killed to resolve a deadlock */
+  WT_RESOURCE *rc;      /**< see comment at the end of deadlock_search() */
+};
+
+/**
+  helper function to change the victim, according to the weight
+*/
+static void change_victim(WT_THD* found, struct deadlock_arg *arg)
+{
+  if (found->weight < arg->victim->weight)
+  {
+    if (arg->victim != arg->thd)
+    {
+      rc_unlock(arg->victim->waiting_for); /* release the previous victim */
+      DBUG_ASSERT(arg->rc == found->waiting_for);
+    }
+    arg->victim= found;
+    arg->rc= 0;
+  }
+}
+
+/**
+  recursive loop detection in a wait-for graph with a limited search depth
+*/
+static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
+                           uint depth)
+{
+  WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for;
+  WT_THD *cursor;
+  uint i;
+  int ret= WT_OK;
+  DBUG_ENTER("deadlock_search");
+  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u",
+                    arg->thd->name, blocker->name, depth));
+
+  LF_REQUIRE_PINS(1);
+
+  arg->rc= 0;
+
+  if (depth > arg->max_depth)
+  {
+    DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)"));
+    DBUG_RETURN(WT_DEPTH_EXCEEDED);
+  }
+
+retry:
+  /* safe dereference as explained in lf_alloc-pin.c */
+  do
+  {
+    rc= *shared_ptr;
+    lf_pin(arg->thd->pins, 0, rc);
+  } while (rc != *shared_ptr && LF_BACKOFF);
+
+  if (rc == 0)
+  {
+    DBUG_PRINT("wt", ("exit: OK (early)"));
+    DBUG_RETURN(0);
+  }
+
+  rc_rdlock(rc);
+  if (rc->state != ACTIVE || *shared_ptr != rc)
+  {
+    rc_unlock(rc);
+    lf_unpin(arg->thd->pins, 0);
+    goto retry;
+  }
+  /* as the state is locked, we can unpin now */
+  lf_unpin(arg->thd->pins, 0);
+
+  /*
+    Below is not a pure depth-first search. It's a depth-first with a
+    slightest hint of breadth-first. Depth-first is:
+
+      check(element):
+        foreach current in element->nodes[] do:
+          if current == element return error;
+          check(current);
+
+    while we do
+
+      check(element):
+        foreach current in element->nodes[] do:
+          if current == element return error;
+        foreach current in element->nodes[] do:
+          check(current);
+  */
+  for (i=0; i < rc->owners.elements; i++)
+  {
+    cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+    /*
+      We're only looking for (and detecting) cycles that include 'arg->thd'.
+      That is, only deadlocks that *we* have created. For example,
+        thd->A->B->thd
+      (thd waits for A, A waits for B, while B is waiting for thd).
+      While walking the graph we can encounter other cicles, e.g.
+        thd->A->B->C->A
+      This will not be detected. Instead we will walk it in circles until
+      the search depth limit is reached (the latter guarantees that an
+      infinite loop is impossible). We expect the thread that has created
+      the cycle (one of A, B, and C) to detect its deadlock.
+    */
+    if (cursor == arg->thd)
+    {
+      ret= WT_DEADLOCK;
+      increment_cycle_stats(depth, arg->max_depth ==
+                                   *arg->thd->deadlock_search_depth_long);
+      arg->victim= cursor;
+      goto end;
+    }
+  }
+  for (i=0; i < rc->owners.elements; i++)
+  {
+    cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+    switch (deadlock_search(arg, cursor, depth+1)) {
+    case WT_OK:
+      break;
+    case WT_DEPTH_EXCEEDED:
+      ret= WT_DEPTH_EXCEEDED;
+      break;
+    case WT_DEADLOCK:
+      ret= WT_DEADLOCK;
+      change_victim(cursor, arg);       /* also sets arg->rc to 0 */
+      i= rc->owners.elements;           /* jump out of the loop */
+      break;
+    default:
+      DBUG_ASSERT(0);
+    }
+    if (arg->rc)
+      rc_unlock(arg->rc);
+  }
+end:
+  /*
+    Note that 'rc' is locked in this function, but it's never unlocked there.
+    Instead it's saved in arg->rc and the *caller* is expected to unlock it.
+    It's done to support different killing strategies. This is how it works:
+    Assuming a graph
+
+      thd->A->B->C->thd
+
+    deadlock_search() function starts from thd, locks it (in fact it locks not
+    a thd, but a resource it is waiting on, but below, for simplicity, I'll
+    talk about "locking a thd"). Then it goes down recursively, locks A, and so
+    on. Goes down recursively, locks B. Goes down recursively, locks C.
+    Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd.
+    Returns from the last deadlock_search() call. C stays locked!
+    Now it checks whether C is a more appropriate victim then 'thd'.
+    If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked.
+    Now it checks whether B is a more appropriate victim then arg->victim.
+    If yes - old arg->victim is unlocked and arg->victim=B,
+    otherwise B is unlocked. Return.
+    And so on.
+
+    In short, a resource is locked in a frame. But it's not unlocked in the
+    same frame, it's unlocked by the caller, and only after the caller checks
+    that it doesn't need to use current WT_THD as a victim. If it does - the
+    lock is kept and the old victim's resource is unlocked. When the recursion
+    is unrolled and we are back to deadlock() function, there are only two
+    locks left - on thd and on the victim.
+  */
+  arg->rc= rc;
+  DBUG_PRINT("wt", ("exit: %s",
+                    ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" :
+                    ret ? "WT_DEADLOCK" : "OK"));
+  DBUG_RETURN(ret);
+}
+
+/**
+  Deadlock detection in a wait-for graph
+
+  A wrapper for recursive deadlock_search() - prepares deadlock_arg structure,
+  invokes deadlock_search(), increments statistics, notifies the victim.
+
+  @param thd            thread that is going to wait. Deadlock is detected
+                        if, while walking the graph, we reach a thread that
+                        is waiting on thd
+  @param blocker        starting point of a search. In wt_thd_cond_timedwait()
+                        it's thd, in wt_thd_will_wait_for() it's a thread that
+                        thd is going to wait for
+  @param depth          starting search depth. In general it's the number of
+                        edges in the wait-for graph between thd and the
+                        blocker. Practically only two values are used (and
+                        supported) - when thd == blocker it's 0, when thd
+                        waits directly for blocker, it's 1
+  @param max_depth      search depth limit
+*/
+static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
+                            uint max_depth)
+{
+  struct deadlock_arg arg= {thd, max_depth, 0, 0};
+  int ret;
+  DBUG_ENTER("deadlock");
+  ret= deadlock_search(&arg, blocker, depth);
+  if (ret == WT_DEPTH_EXCEEDED)
+  {
+    increment_cycle_stats(WT_CYCLE_STATS, max_depth ==
+                                          *thd->deadlock_search_depth_long);
+    ret= WT_OK;
+  }
+  /*
+    if we started with depth==1, blocker was never considered for a victim
+    in deadlock_search(). Do it here.
+  */
+  if (ret == WT_DEADLOCK && depth)
+    change_victim(blocker, &arg);
+  if (arg.rc)
+    rc_unlock(arg.rc);
+  /* notify the victim, if appropriate */
+  if (ret == WT_DEADLOCK && arg.victim != thd)
+  {
+    DBUG_PRINT("wt", ("killing %s", arg.victim->name));
+    arg.victim->killed=1;
+    pthread_cond_broadcast(&arg.victim->waiting_for->cond);
+    rc_unlock(arg.victim->waiting_for);
+    ret= WT_OK;
+  }
+  DBUG_RETURN(ret);
+}
+
+
+/**
+  Delete an element from reshash if it has no waiters or owners
+
+  rc->lock must be locked by the caller and it's unlocked on return.
+*/
+static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
+{
+  uint keylen;
+  const void *key;
+  DBUG_ENTER("unlock_lock_and_free_resource");
+
+  DBUG_ASSERT(rc->state == ACTIVE);
+
+  if (rc->owners.elements || rc->waiter_count)
+  {
+    DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters",
+                      rc->owners.elements, rc->waiter_count));
+    rc_unlock(rc);
+    DBUG_RETURN(0);
+  }
+
+  if (fix_thd_pins(thd))
+  {
+    rc_unlock(rc);
+    DBUG_RETURN(1);
+  }
+
+  /* XXX if (rc->id.type->make_key) key=
rc->id.type->make_key(&rc->id, &keylen); else */
+  {
+    key= &rc->id;
+    keylen= sizeof(rc->id);
+  }
+
+  /*
+    To free the element correctly we need to:
+     1. take its lock (already done).
+     2. set the state to FREE
+     3. release the lock
+     4. remove from the hash
+
+     I *think* it's safe to release the lock while the element is still
+     in the hash. If not, the corrected procedure should be
+     3. pin; 4; remove; 5; release; 6; unpin and it'll need pin[3].
+  */
+  rc->state=FREE;
+  rc_unlock(rc);
+  DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1);
+}
+
+
+/**
+  register the fact that thd is not waiting anymore
+
+  decrease waiter_count, clear waiting_for, free the resource if appropriate.
+  thd->waiting_for must be locked!
+*/
+static int stop_waiting_locked(WT_THD *thd)
+{
+  int ret;
+  WT_RESOURCE *rc= thd->waiting_for;
+  DBUG_ENTER("stop_waiting_locked");
+
+  DBUG_ASSERT(rc->waiter_count);
+  DBUG_ASSERT(rc->state == ACTIVE);
+  rc->waiter_count--;
+  thd->waiting_for= 0;
+  ret= unlock_lock_and_free_resource(thd, rc);
+  DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK);
+}
+
+/**
+  register the fact that thd is not waiting anymore
+
+  locks thd->waiting_for and calls stop_waiting_locked().
+*/
+static int stop_waiting(WT_THD *thd)
+{
+  int ret;
+  WT_RESOURCE *rc= thd->waiting_for;
+  DBUG_ENTER("stop_waiting");
+
+  if (!rc)
+    DBUG_RETURN(WT_OK);
+  /*
+    nobody's trying to free the resource now,
+    as its waiter_count is guaranteed to be non-zero
+  */
+  rc_wrlock(rc);
+  ret= stop_waiting_locked(thd);
+  DBUG_RETURN(ret);
+}
+
+/**
+  notify the system that a thread needs to wait for another thread
+
+  called by a *waiter* to declare what resource it will wait for.
+  can be called many times, if many blockers own a blocking resource.
+  but must always be called with the same resource id - a thread cannot
+  wait for more than one resource at a time.
+
+  As a new edge is added to the wait-for graph, a deadlock detection is
+  performed for this new edge.
+*/
+int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid)
+{
+  uint i;
+  WT_RESOURCE *rc;
+  DBUG_ENTER("wt_thd_will_wait_for");
+
+  LF_REQUIRE_PINS(3);
+
+  DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu",
+                    thd->name, blocker->name, resid->value));
+
+  if (fix_thd_pins(thd))
+    DBUG_RETURN(WT_DEADLOCK);
+
+  if (thd->waiting_for == 0)
+  {
+    uint keylen;
+    const void *key;
+    /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else
*/
+    {
+      key= resid;
+      keylen= sizeof(*resid);
+    }
+
+    DBUG_PRINT("wt", ("first blocker"));
+
+retry:
+    while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0)
+    {
+      WT_RESOURCE tmp;
+
+      DBUG_PRINT("wt", ("failed to find rc in hash, inserting"));
+      bzero(&tmp, sizeof(tmp));
+      tmp.id= *resid;
+      tmp.state= ACTIVE;
+
+      if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */
+        DBUG_RETURN(WT_DEADLOCK);
+      /*
+        Two cases: either lf_hash_insert() failed - because another thread
+        has just inserted a resource with the same id - and we need to retry.
+        Or lf_hash_insert() succeeded, and then we need to repeat
+        lf_hash_search() to find a real address of the newly inserted element.
+        That is, we don't care what lf_hash_insert() has returned.
+        And we need to repeat the loop anyway.
+      */
+    }
+    if (rc == MY_ERRPTR)
+      DBUG_RETURN(WT_DEADLOCK);
+
+    DBUG_PRINT("wt", ("found in hash rc=%p", rc));
+
+    rc_wrlock(rc);
+    if (rc->state != ACTIVE)
+    {
+      DBUG_PRINT("wt", ("but it's not active, retrying"));
+      /* Somebody has freed the element while we weren't looking */
+      rc_unlock(rc);
+      lf_hash_search_unpin(thd->pins);
+      goto retry;
+    }
+
+    lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */
+    thd->waiting_for= rc;
+    rc->waiter_count++;
+    thd->killed= 0;
+  }
+  else
+  {
+    DBUG_ASSERT(thd->waiting_for->id.type == resid->type);
+    DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0);
+    DBUG_PRINT("wt", ("adding another blocker"));
+
+    /*
+      we can safely access the resource here, it's in the hash as it has
+      at least one owner, and non-zero waiter_count
+    */
+    rc= thd->waiting_for;
+    rc_wrlock(rc);
+    DBUG_ASSERT(rc->waiter_count);
+    DBUG_ASSERT(rc->state == ACTIVE);
+
+    if (thd->killed)
+    {
+      stop_waiting_locked(thd);
+      DBUG_RETURN(WT_DEADLOCK);
+    }
+  }
+  for (i=0; i < rc->owners.elements; i++)
+    if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker)
+      break;
+  if (i >= rc->owners.elements)
+  {
+    if (push_dynamic(&blocker->my_resources, (void*)&rc))
+    {
+      stop_waiting_locked(thd);
+      DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */
+    }
+    if (push_dynamic(&rc->owners, (void*)&blocker))
+    {
+      pop_dynamic(&blocker->my_resources);
+      stop_waiting_locked(thd);
+      DBUG_RETURN(WT_DEADLOCK);
+    }
+  }
+  rc_unlock(rc);
+
+  if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short))
+  {
+    stop_waiting(thd);
+    DBUG_RETURN(WT_DEADLOCK);
+  }
+  DBUG_RETURN(0);
+}
+
+/**
+  called by a *waiter* to start waiting
+
+  It's supposed to be a drop-in replacement for
+  pthread_cond_timedwait(), and it takes mutex as an argument.
+*/
+int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
+{
+  int ret= WT_TIMEOUT;
+  struct timespec timeout;
+  ulonglong before, after, starttime;
+  WT_RESOURCE *rc= thd->waiting_for;
+  DBUG_ENTER("wt_thd_cond_timedwait");
+  DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc));
+
+#ifndef DBUG_OFF
+  if (rc->mutex)
+    DBUG_ASSERT(rc->mutex == mutex);
+  else
+    rc->mutex= mutex;
+  safe_mutex_assert_owner(mutex);
+#endif
+
+  before= starttime= my_getsystime();
+
+#ifdef __WIN__
+  /*
+    only for the sake of Windows we distinguish between
+    'before' and 'starttime'
+  */
+  GetSystemTimeAsFileTime((PFILETIME)&starttime);
+#endif
+
+  rc_wrlock(rc);
+  if (rc->owners.elements == 0 || thd->killed)
+    ret= WT_OK;
+  rc_unlock(rc);
+
+  set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*ULL(1000));
+  if (ret == WT_TIMEOUT)
+    ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
+  if (ret == WT_TIMEOUT)
+  {
+    if (deadlock(thd, thd, 0, *thd->deadlock_search_depth_long))
+      ret= WT_DEADLOCK;
+    else if (*thd->timeout_long > *thd->timeout_short)
+    {
+      set_timespec_time_nsec(timeout, starttime, (*thd->timeout_long)*ULL(1000));
+      if (!thd->killed)
+        ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
+    }
+  }
+  after= my_getsystime();
+  if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */
+    ret= WT_DEADLOCK;
+  increment_wait_stats(after-before, ret);
+  if (ret == WT_OK)
+    increment_success_stats();
+  DBUG_RETURN(ret);
+}
+
+/**
+  called by a *blocker* when it releases a resource
+
+  it's conceptually similar to pthread_cond_broadcast, and must be done
+  under the same mutex as wt_thd_cond_timedwait().
+
+  @param resid   a resource to release. 0 to release all resources
+*/
+
+void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid)
+{
+  uint i;
+  DBUG_ENTER("wt_thd_release");
+
+  for (i=0; i < thd->my_resources.elements; i++)
+  {
+    uint j;
+    WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
+    if (!resid || (resid->type->compare(&rc->id, resid) == 0))
+    {
+      rc_wrlock(rc);
+      /*
+        nobody's trying to free the resource now,
+        as its owners[] array is not empty (at least thd must be there)
+      */
+      DBUG_ASSERT(rc->state == ACTIVE);
+      for (j=0; j < rc->owners.elements; j++)
+        if (*dynamic_element(&rc->owners, j, WT_THD**) == thd)
+          break;
+      DBUG_ASSERT(j < rc->owners.elements);
+      delete_dynamic_element(&rc->owners, j);
+      if (rc->owners.elements == 0)
+      {
+        pthread_cond_broadcast(&rc->cond);
+#ifndef DBUG_OFF
+        if (rc->mutex)
+          safe_mutex_assert_owner(rc->mutex);
+#endif
+      }
+      unlock_lock_and_free_resource(thd, rc);
+      if (resid)
+      {
+        delete_dynamic_element(&thd->my_resources, i);
+        DBUG_VOID_RETURN;
+      }
+    }
+  }
+  if (!resid)
+    reset_dynamic(&thd->my_resources);
+  DBUG_VOID_RETURN;
+}
+

=== modified file 'sql-common/client.c'
--- a/sql-common/client.c	2008-05-08 16:01:15 +0000
+++ b/sql-common/client.c	2008-10-20 09:16:47 +0000
@@ -999,7 +999,7 @@ static int add_init_command(struct st_my
   {
     options->init_commands= (DYNAMIC_ARRAY*)my_malloc(sizeof(DYNAMIC_ARRAY),
 						      MYF(MY_WME));
-    init_dynamic_array(options->init_commands,sizeof(char*),0,5 CALLER_INFO);
+    init_dynamic_array(options->init_commands,sizeof(char*),5,5 CALLER_INFO);
   }
 
   if (!(tmp= my_strdup(cmd,MYF(MY_WME))) ||

=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2008-10-13 19:26:56 +0000
+++ b/sql/ha_partition.cc	2008-10-20 19:13:22 +0000
@@ -1020,7 +1020,7 @@ static bool print_admin_msg(THD* thd, co
   va_list args;
   Protocol *protocol= thd->protocol;
   uint length, msg_length;
-  char msgbuf[PARTITION_MAX_MSG_BUF];
+  char msgbuf[HA_MAX_MSG_BUF];
   char name[NAME_LEN*2+2];
 
   va_start(args, fmt);
@@ -5226,6 +5226,7 @@ int ha_partition::extra(enum ha_extra_fu
   case HA_EXTRA_KEYREAD:
   case HA_EXTRA_NO_KEYREAD:
   case HA_EXTRA_FLUSH:
+  case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
     DBUG_RETURN(loop_extra(operation));
 
     /* Category 2), used by non-MyISAM handlers */
@@ -5249,8 +5250,7 @@ int ha_partition::extra(enum ha_extra_fu
   case HA_EXTRA_PREPARE_FOR_DROP:
   case HA_EXTRA_FLUSH_CACHE:
   {
-    if (m_myisam)
-      DBUG_RETURN(loop_extra(operation));
+    DBUG_RETURN(loop_extra(operation));
     break;
   }
   case HA_EXTRA_CACHE:
@@ -5459,8 +5459,8 @@ int ha_partition::loop_extra(enum ha_ext
   DBUG_ENTER("ha_partition::loop_extra()");
   
   /* 
-    TODO, 5.2: this is where you could possibly add optimisations to add the bitmap
-    _if_ a SELECT.
+    TODO, 5.2: this is where you could possibly add optimisations to add the
+    bitmap _if_ a SELECT.
   */
   for (file= m_file; *file; file++)
   {

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2008-10-17 17:47:16 +0000
+++ b/sql/mysql_priv.h	2008-10-20 19:13:22 +0000
@@ -1318,6 +1318,9 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
                   List<List_item> &values, List<Item> &update_fields,
                   List<Item> &update_values, enum_duplicates flag,
                   bool ignore);
+void upgrade_lock_type_for_insert(THD *thd, thr_lock_type *lock_type,
+                                  enum_duplicates duplic,
+                                  bool is_multi_insert);
 int check_that_all_fields_are_given_values(THD *thd, TABLE *entry,
                                            TABLE_LIST *table_list);
 void prepare_triggers_for_insert_stmt(TABLE *table);

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2008-10-17 17:47:16 +0000
+++ b/sql/mysqld.cc	2008-10-20 19:13:22 +0000
@@ -28,6 +28,7 @@
 #include "events.h"
 #include "ddl_blocker.h"
 #include "sql_audit.h"
+#include <waiting_threads.h>
 
 #include "../storage/myisam/ha_myisam.h"
 
@@ -1357,6 +1358,7 @@ void clean_up(bool print_message)
     tc_log->close();
   delegates_destroy();
   xid_cache_free();
+  wt_end();
   delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
   multi_keycache_free();
   free_status_vars();
@@ -3904,6 +3906,8 @@ static int init_server_components()
   if (table_def_init() | hostname_cache_init())
     unireg_abort(1);
 
+  wt_init();
+
   query_cache_result_size_limit(query_cache_limit);
   query_cache_set_min_res_unit(query_cache_min_res_unit);
   query_cache_init();
@@ -3944,6 +3948,9 @@ static int init_server_components()
 
   /* set up the hook before initializing plugins which may use it */
   error_handler_hook= my_message_sql;
+  proc_info_hook= (const char *(*)(void *, const char *, const char *,
+                                   const char *, const unsigned int))
+                  set_thd_proc_info;
 
   if (xid_cache_init())
   {
@@ -5848,7 +5855,11 @@ enum options_mysqld
   OPT_DEBUG_CRC, OPT_DEBUG_ON,
   OPT_SLAVE_EXEC_MODE,
   OPT_GENERAL_LOG_FILE,
-  OPT_SLOW_QUERY_LOG_FILE
+  OPT_SLOW_QUERY_LOG_FILE,
+  OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
+  OPT_DEADLOCK_SEARCH_DEPTH_LONG,
+  OPT_DEADLOCK_TIMEOUT_SHORT,
+  OPT_DEADLOCK_TIMEOUT_LONG
 };
 
 
@@ -5979,6 +5990,26 @@ struct my_option my_long_options[] =
    NO_ARG, 0, 0, 0, 0, 0, 0},
   {"datadir", 'h', "Path to the database root.", (uchar**) &mysql_data_home,
    (uchar**) &mysql_data_home, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+  {"deadlock-search-depth-short", OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
+   "Short search depth for the two-step deadlock detection",
+   (uchar**) &global_system_variables.wt_deadlock_search_depth_short,
+   (uchar**) &max_system_variables.wt_deadlock_search_depth_short,
+   0, GET_ULONG, REQUIRED_ARG, 4, 0, 32, 0, 0, 0},
+  {"deadlock-search-depth-long", OPT_DEADLOCK_SEARCH_DEPTH_LONG,
+   "Long search depth for the two-step deadlock detection",
+   (uchar**) &global_system_variables.wt_deadlock_search_depth_long,
+   (uchar**) &max_system_variables.wt_deadlock_search_depth_long,
+   0, GET_ULONG, REQUIRED_ARG, 15, 0, 33, 0, 0, 0},
+  {"deadlock-timeout-short", OPT_DEADLOCK_TIMEOUT_SHORT,
+   "Short timeout for the two-step deadlock detection (in microseconds)",
+   (uchar**) &global_system_variables.wt_timeout_short,
+   (uchar**) &max_system_variables.wt_timeout_short,
+   0, GET_ULONG, REQUIRED_ARG, 10000, 0, ULONG_MAX, 0, 0, 0},
+  {"deadlock-timeout-long", OPT_DEADLOCK_TIMEOUT_LONG,
+   "Long timeout for the two-step deadlock detection (in microseconds)",
+   (uchar**) &global_system_variables.wt_timeout_long,
+   (uchar**) &max_system_variables.wt_timeout_long,
+   0, GET_ULONG, REQUIRED_ARG, 50000000, 0, ULONG_MAX, 0, 0, 0},
 #ifndef DBUG_OFF
   {"debug", '#', "Debug log.", (uchar**) &default_dbug_option,
    (uchar**) &default_dbug_option, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
@@ -7777,7 +7808,10 @@ static void mysql_init_variables(void)
   /* Things reset to zero */
   opt_skip_slave_start= opt_reckless_slave = 0;
   mysql_home[0]= pidfile_name[0]= log_error_file[0]= 0;
+#if defined(HAVE_REALPATH) && !defined(HAVE_purify) &&
!defined(HAVE_BROKEN_REALPATH)
+  /*  We can only test for sub paths if my_symlink.c is using realpath */
   myisam_test_invalid_symlink= test_if_data_home_dir;
+#endif
   opt_log= opt_slow_log= 0;
   opt_update_log= 0;
   log_output_options= find_bit_type(log_output_str, &log_output_typelib);

=== modified file 'sql/set_var.cc'
--- a/sql/set_var.cc	2008-10-07 02:06:44 +0000
+++ b/sql/set_var.cc	2008-10-20 19:13:22 +0000
@@ -59,7 +59,7 @@
 #include <thr_alarm.h>
 #include <myisam.h>
 #include <my_dir.h>
-
+#include <waiting_threads.h>
 #include "events.h"
 #include "transaction.h"
 
@@ -239,6 +239,18 @@ static sys_var_long_ptr	sys_connect_time
 					    &connect_timeout);
 static sys_var_const_str       sys_datadir(&vars, "datadir", mysql_real_data_home);
 static sys_var_backup_wait_timeout sys_backup_wait_timeout(&vars,
"backup_wait_timeout");
+static sys_var_thd_ulong sys_deadlock_search_depth_short(&vars,
+                                "deadlock_search_depth_short",
+                                 &SV::wt_deadlock_search_depth_short);
+static sys_var_thd_ulong sys_deadlock_search_depth_long(&vars,
+                                "deadlock_search_depth_long",
+                                 &SV::wt_deadlock_search_depth_long);
+static sys_var_thd_ulong sys_deadlock_timeout_short(&vars,
+                                "deadlock_timeout_short",
+                                 &SV::wt_timeout_short);
+static sys_var_thd_ulong sys_deadlock_timeout_long(&vars,
+                                "deadlock_timeout_long",
+                                 &SV::wt_timeout_long);
 #ifndef DBUG_OFF
 static sys_var_thd_dbug        sys_dbug(&vars, "debug");
 #endif

=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc	2008-10-08 09:27:11 +0000
+++ b/sql/sql_base.cc	2008-10-20 19:13:22 +0000
@@ -3252,7 +3252,6 @@ check_and_update_table_version(THD *thd,
   }
 
   DBUG_EXECUTE_IF("reprepare_each_statement", return inject_reprepare(thd););
-
   return FALSE;
 }
 
@@ -3570,8 +3569,8 @@ int open_tables(THD *thd, TABLE_LIST **s
   /* Also used for indicating that prelocking is need */
   TABLE_LIST **query_tables_last_own;
   bool safe_to_ignore_table;
-
   DBUG_ENTER("open_tables");
+
   /*
     temporary mem_root for new .frm parsing.
     TODO: variables for size

=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc	2008-10-08 15:29:55 +0000
+++ b/sql/sql_class.cc	2008-10-20 19:13:22 +0000
@@ -259,13 +259,16 @@ int thd_tablespace_op(const THD *thd)
 
 
 extern "C"
-const char *set_thd_proc_info(THD *thd, const char *info, 
-                              const char *calling_function, 
-                              const char *calling_file, 
+const char *set_thd_proc_info(THD *thd, const char *info,
+                              const char *calling_function,
+                              const char *calling_file,
                               const unsigned int calling_line)
 {
+  if (!thd)
+    thd= current_thd;
+
   const char *old_info= thd->proc_info;
-  DBUG_PRINT("proc_info", ("%s:%d  %s", calling_file, calling_line, 
+  DBUG_PRINT("proc_info", ("%s:%d  %s", calling_file, calling_line,
                            (info != NULL) ? info : "(null)"));
 #if defined(ENABLED_PROFILING)
   thd->profiling.status_change(info, calling_function, calling_file, calling_line);
@@ -605,6 +608,10 @@ THD::THD()
   peer_port= 0;					// For SHOW PROCESSLIST
   transaction.m_pending_rows_event= 0;
   transaction.on= 1;
+  wt_thd_lazy_init(&transaction.wt, &variables.wt_deadlock_search_depth_short,
+                                    &variables.wt_timeout_short,
+                                    &variables.wt_deadlock_search_depth_long,
+                                    &variables.wt_timeout_long);
 #ifdef SIGNAL_WITH_VIO_CLOSE
   active_vio = 0;
 #endif
@@ -860,6 +867,7 @@ void THD::cleanup(void)
   debug_sync_end_thread(this);
 #endif /* defined(ENABLED_DEBUG_SYNC) */
 
+  wt_thd_destroy(&transaction.wt);
   mysql_ha_cleanup(this);
   delete_dynamic(&user_var_events);
   hash_free(&user_vars);
@@ -867,7 +875,7 @@ void THD::cleanup(void)
   my_free((char*) variables.time_format, MYF(MY_ALLOW_ZERO_PTR));
   my_free((char*) variables.date_format, MYF(MY_ALLOW_ZERO_PTR));
   my_free((char*) variables.datetime_format, MYF(MY_ALLOW_ZERO_PTR));
-  
+
   sp_cache_clear(&sp_proc_cache);
   sp_cache_clear(&sp_func_cache);
 
@@ -927,13 +935,15 @@ THD::~THD()
 #endif
   stmt_map.reset();                     /* close all prepared statements */
   DBUG_ASSERT(lock_info.n_cursors == 0);
+  ha_close_connection(this);
+  plugin_thdvar_cleanup(this);
+
   if (!cleanup_done)
     cleanup();
 
   mdl_context_destroy(&mdl_context);
   mdl_context_destroy(&handler_mdl_context);
 
-  ha_close_connection(this);
   mysql_audit_release(this);
   plugin_thdvar_cleanup(this);
 

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2008-10-09 12:48:49 +0000
+++ b/sql/sql_class.h	2008-10-20 19:13:22 +0000
@@ -69,6 +69,7 @@ private:
   bool m_invalidated;
 };
 
+#include <waiting_threads.h>
 
 class Relay_log_info;
 
@@ -77,6 +78,7 @@ class Load_log_event;
 class Slave_log_event;
 class sp_rcontext;
 class sp_cache;
+class Lex_input_stream;
 class Parser_state;
 class Rows_log_event;
 
@@ -442,6 +444,10 @@ struct system_variables
   DATE_TIME_FORMAT *datetime_format;
   DATE_TIME_FORMAT *time_format;
   my_bool sysdate_is_now;
+
+  /* deadlock detection */
+  ulong wt_timeout_short, wt_deadlock_search_depth_short;
+  ulong wt_timeout_long, wt_deadlock_search_depth_long;
 };
 
 
@@ -1557,6 +1563,7 @@ public:
     THD_TRANS stmt;			// Trans for current statement
     bool on;                            // see ha_enable_transaction()
     XID_STATE xid_state;
+    WT_THD wt;
     Rows_log_event *m_pending_rows_event;
 
     /*

=== modified file 'sql/sql_insert.cc'
--- a/sql/sql_insert.cc	2008-10-17 17:47:16 +0000
+++ b/sql/sql_insert.cc	2008-10-20 19:13:22 +0000
@@ -389,10 +389,9 @@ void prepare_triggers_for_insert_stmt(TA
   downgrade the lock in handler::store_lock() method.
 */
 
-static
-void upgrade_lock_type(THD *thd, thr_lock_type *lock_type,
-                       enum_duplicates duplic,
-                       bool is_multi_insert)
+void upgrade_lock_type_for_insert(THD *thd, thr_lock_type *lock_type,
+                                  enum_duplicates duplic,
+                                  bool is_multi_insert)
 {
   if (duplic == DUP_UPDATE ||
       duplic == DUP_REPLACE && *lock_type == TL_WRITE_CONCURRENT_INSERT)
@@ -589,8 +588,8 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
     Upgrade lock type if the requested lock is incompatible with
     the current connection mode or table operation.
   */
-  upgrade_lock_type(thd, &table_list->lock_type, duplic,
-                    values_list.elements > 1);
+  upgrade_lock_type_for_insert(thd, &table_list->lock_type, duplic,
+                               values_list.elements > 1);
 
   /*
     We can't write-delayed into a table locked with LOCK TABLES:

=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc	2008-10-17 17:47:16 +0000
+++ b/sql/sql_parse.cc	2008-10-20 19:13:22 +0000
@@ -2006,14 +2006,16 @@ mysql_execute_command(THD *thd)
 #endif
   case SQLCOM_SHOW_STATUS_PROC:
   case SQLCOM_SHOW_STATUS_FUNC:
-    if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, FALSE, UINT_MAX)))
+    if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, FALSE,
+                                  UINT_MAX)))
       res= execute_sqlcom_select(thd, all_tables);
     break;
   case SQLCOM_SHOW_STATUS:
   {
     system_status_var old_status_var= thd->status_var;
     thd->initial_status_var= &old_status_var;
-    if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, FALSE, UINT_MAX)))
+    if (!(res= check_table_access(thd, SELECT_ACL, all_tables, FALSE, FALSE,
+                                  UINT_MAX)))
       res= execute_sqlcom_select(thd, all_tables);
     /* Don't log SHOW STATUS commands to slow query log */
     thd->server_status&= ~(SERVER_QUERY_NO_INDEX_USED |
@@ -5088,15 +5090,19 @@ check_table_access(THD *thd, ulong requi
       continue;
     }
 
+    DBUG_PRINT("info", ("derived: %d  view: %d", tables->derived != 0,
+                        tables->view != 0));
     if (tables->is_anonymous_derived_table() ||
-        (tables->table && tables->table->s &&
(int)tables->table->s->tmp_table))
+        (tables->table && tables->table->s &&
+         (int)tables->table->s->tmp_table))
       continue;
     thd->security_ctx= sctx;
-    if ((sctx->master_access & want_access) ==
-        want_access && thd->db)
+    if ((sctx->master_access & want_access) == want_access &&
+        thd->db)
       tables->grant.privilege= want_access;
-    else if (check_access(thd, want_access, tables->get_db_name(),
-                          &tables->grant.privilege, 0, no_errors, 0))
+    else if (check_access(thd,want_access,tables->get_db_name(),
+                          &tables->grant.privilege,
+                          0, no_errors, 0))
       goto deny;
   }
   thd->security_ctx= backup_ctx;
@@ -7456,15 +7462,14 @@ bool check_identifier_name(LEX_STRING *s
 
 /*
   Check if path does not contain mysql data home directory
+
   SYNOPSIS
     test_if_data_home_dir()
     dir                     directory
-    conv_home_dir           converted data home directory
-    home_dir_len            converted data home directory length
 
   RETURN VALUES
     0	ok
-    1	error  
+    1	error ;  Given path contains data directory
 */
 C_MODE_START
 
@@ -7492,11 +7497,17 @@ int test_if_data_home_dir(const char *di
                         mysql_unpacked_real_data_home_len,
                         (const uchar*) mysql_unpacked_real_data_home,
                         mysql_unpacked_real_data_home_len))
+      {
+        DBUG_PRINT("error", ("Path is part of mysql_real_data_home"));
         DBUG_RETURN(1);
+      }
     }
     else if (!memcmp(path, mysql_unpacked_real_data_home,
                      mysql_unpacked_real_data_home_len))
+    {
+      DBUG_PRINT("error", ("Path is part of mysql_real_data_home"));
       DBUG_RETURN(1);
+    }
   }
   DBUG_RETURN(0);
 }

=== modified file 'sql/sql_prepare.cc'
--- a/sql/sql_prepare.cc	2008-10-10 16:23:30 +0000
+++ b/sql/sql_prepare.cc	2008-10-20 19:13:22 +0000
@@ -1122,6 +1122,8 @@ static bool mysql_test_insert(Prepared_s
   if (insert_precheck(thd, table_list))
     goto error;
 
+  upgrade_lock_type_for_insert(thd, &table_list->lock_type, duplic,
+                               values_list.elements > 1);
   /*
     open temporary memory pool for temporary data allocated by derived
     tables & preparation procedure
@@ -2465,7 +2467,6 @@ void mysql_stmt_execute(THD *thd, char *
   stmt->execute_loop(&expanded_query, open_cursor, packet, packet_end);
 
   DBUG_VOID_RETURN;
-
 }
 
 

=== modified file 'sql/sql_table.cc'
--- a/sql/sql_table.cc	2008-10-08 11:46:49 +0000
+++ b/sql/sql_table.cc	2008-10-20 19:13:22 +0000
@@ -5347,8 +5347,7 @@ compare_tables(THD *thd,
   new_field_it.init(alter_info->create_list);
   tmp_new_field_it.init(tmp_alter_info.create_list);
 
-  /*
-    Go through fields and check if the original ones are compatible
+  /*   Go through fields and check if the original ones are compatible
     with new table.
   */
   for (f_ptr= table->field, new_field= new_field_it++,
@@ -5362,10 +5361,11 @@ compare_tables(THD *thd,
       new_field->charset= create_info->default_table_charset;
 
     /* Don't pack rows in old tables if the user has requested this. */
-    if ((tmp_new_field->flags & BLOB_FLAG) ||
-        tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
-        create_info->row_type != ROW_TYPE_FIXED)
-      create_info->table_options|= HA_OPTION_PACK_RECORD;
+      if (create_info->row_type == ROW_TYPE_DYNAMIC ||
+	(tmp_new_field->flags & BLOB_FLAG) ||
+	tmp_new_field->sql_type == MYSQL_TYPE_VARCHAR &&
+  	create_info->row_type != ROW_TYPE_FIXED)
+        create_info->table_options|= HA_OPTION_PACK_RECORD;
 
     /* Check how fields have been modified */
     if (alter_info->flags & ALTER_CHANGE_COLUMN)

=== modified file 'sql/sql_view.cc'
--- a/sql/sql_view.cc	2008-09-30 13:31:41 +0000
+++ b/sql/sql_view.cc	2008-10-20 19:13:22 +0000
@@ -1029,7 +1029,6 @@ bool mysql_make_view(THD *thd, File_pars
   bool parse_status;
   bool result, view_is_mergeable;
   TABLE_LIST *view_main_select_tables;
-
   DBUG_ENTER("mysql_make_view");
   DBUG_PRINT("info", ("table: %p (%s)", table, table->table_name));
 

=== modified file 'storage/falcon/Makefile.am'
--- a/storage/falcon/Makefile.am	2008-09-03 21:49:18 +0000
+++ b/storage/falcon/Makefile.am	2008-10-20 09:16:47 +0000
@@ -438,5 +438,7 @@ falcon_probes.h: falcon_probes.d
 falcon_probes.o:
 	$(DTRACE) $(DTRACEFLAGS) -G -s falcon_probes.d $(DTRACEFILES)
 
+CLEANFILES=	libhafalcon.a
+
 # Don't update the files from bitkeeper
 %::SCCS/s.%

=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc	2008-09-04 18:30:34 +0000
+++ b/storage/maria/ha_maria.cc	2008-10-20 13:03:34 +0000
@@ -241,7 +241,7 @@ static void _ma_check_print_msg(HA_CHECK
   THD *thd= (THD *) param->thd;
   Protocol *protocol= thd->protocol;
   uint length, msg_length;
-  char msgbuf[MARIA_MAX_MSG_BUF];
+  char msgbuf[HA_MAX_MSG_BUF];
   char name[NAME_LEN * 2 + 2];
 
   msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
@@ -1681,6 +1681,7 @@ void ha_maria::start_bulk_insert(ha_rows
   THD *thd= current_thd;
   ulong size= min(thd->variables.read_buff_size,
                   (ulong) (table->s->avg_row_length * rows));
+  MARIA_SHARE *share= file->s;
   DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu",
                       (ulong) rows, size));
 
@@ -1688,8 +1689,8 @@ void ha_maria::start_bulk_insert(ha_rows
   if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
     maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size);
 
-  can_enable_indexes= (maria_is_all_keys_active(file->s->state.key_map,
-                                                file->s->base.keys));
+  can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
+                                                share->base.keys));
   bulk_insert_single_undo= BULK_INSERT_NONE;
 
   if (!(specialflag & SPECIAL_SAFE_MODE))
@@ -1701,8 +1702,17 @@ void ha_maria::start_bulk_insert(ha_rows
        we don't want to update the key statistics based of only a few rows.
        Index file rebuild requires an exclusive lock, so if versioning is on
        don't do it (see how ha_maria::store_lock() tries to predict repair).
+       We can repair index only if we have an exclusive (TL_WRITE) lock. To
+       see if table is empty, we shouldn't rely on the old records' count from
+       our transaction's start (if that old count is 0 but now there are
+       records in the table, we would wrongly destroy them).
+       So we need to look at share->state.state.records.
+       As a safety net for now, we don't remove the test of
+       file->state->records, because there is uncertainty on what will happen
+       during repair if the two states disagree.
     */
-    if (file->state->records == 0 && can_enable_indexes &&
+    if ((file->state->records == 0) &&
+        (share->state.state.records == 0) && can_enable_indexes &&
         (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
         (file->lock.type == TL_WRITE))
     {
@@ -1711,7 +1721,7 @@ void ha_maria::start_bulk_insert(ha_rows
          is more costly (flushes, syncs) than a row write.
       */
       maria_disable_non_unique_index(file, rows);
-      if (file->s->now_transactional)
+      if (share->now_transactional)
       {
         bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
         write_log_record_for_bulk_insert(file);
@@ -2109,7 +2119,8 @@ int ha_maria::extra(enum ha_extra_functi
        operation == HA_EXTRA_PREPARE_FOR_RENAME))
   {
     THD *thd= table->in_use;
-    file->trn= THD_TRN;
+    TRN *trn= THD_TRN;
+    _ma_set_trn_for_table(file, trn);
   }
   return maria_extra(file, operation, 0);
 }
@@ -2175,17 +2186,14 @@ int ha_maria::external_lock(THD *thd, in
       /* Start of new statement */
       if (!trn)  /* no transaction yet - open it now */
       {
-        trn= trnman_new_trn(& thd->mysys_var->mutex,
-                            & thd->mysys_var->suspend,
-                            thd->thread_stack + STACK_DIRECTION *
-                            (my_thread_stack_size - STACK_MIN_SIZE));
+        trn= trnman_new_trn(& thd->transaction.wt);
         if (unlikely(!trn))
           DBUG_RETURN(HA_ERR_OUT_OF_MEM);
         THD_TRN= trn;
         if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
           trans_register_ha(thd, TRUE, maria_hton);
       }
-      file->trn= trn;
+      _ma_set_trn_for_table(file, trn);
       if (!trnman_increment_locked_tables(trn))
       {
         trans_register_ha(thd, FALSE, maria_hton);
@@ -2241,7 +2249,7 @@ int ha_maria::external_lock(THD *thd, in
       if (_ma_reenable_logging_for_table(file, TRUE))
         DBUG_RETURN(1);
       /** @todo zero file->trn also in commit and rollback */
-      file->trn= 0;                             // Safety
+      _ma_set_trn_for_table(file, NULL);        // Safety
       /*
         Ensure that file->state points to the current number of rows. This
         is needed if someone calls maria_info() without first doing an
@@ -2299,7 +2307,7 @@ int ha_maria::start_stmt(THD *thd, thr_l
       different ha_maria than 'this' then this->file->trn is a stale
       pointer. We fix it:
     */
-    file->trn= trn;
+    _ma_set_trn_for_table(file, trn);
     /*
       As external_lock() was already called, don't increment locked_tables.
       Note that we call the function below possibly several times when
@@ -2367,10 +2375,7 @@ int ha_maria::implicit_commit(THD *thd, 
       tables may be under LOCK TABLES, and so they will start the next
       statement assuming they have a trn (see ha_maria::start_stmt()).
     */
-    trn= trnman_new_trn(& thd->mysys_var->mutex,
-                        & thd->mysys_var->suspend,
-                        thd->thread_stack + STACK_DIRECTION *
-                        (my_thread_stack_size - STACK_MIN_SIZE));
+    trn= trnman_new_trn(& thd->transaction.wt);
     /* This is just a commit, tables stay locked if they were: */
     trnman_reset_locked_tables(trn, locked_tables);
     THD_TRN= trn;
@@ -2391,7 +2396,7 @@ int ha_maria::implicit_commit(THD *thd, 
         MARIA_HA *handler= ((ha_maria*) table->file)->file;
         if (handler->s->base.born_transactional)
         {
-          handler->trn= trn;
+          _ma_set_trn_for_table(handler, trn);
           if (handler->s->lock.get_status)
           {
             if (_ma_setup_live_state(handler))
@@ -2495,10 +2500,9 @@ enum row_type ha_maria::get_row_type() c
 }
 
 
-static enum data_file_type maria_row_type(HA_CREATE_INFO *info,
-                                          my_bool ignore_transactional)
+static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
 {
-  if (info->transactional == HA_CHOICE_YES && ! ignore_transactional)
+  if (info->transactional == HA_CHOICE_YES)
     return BLOCK_RECORD;
   switch (info->row_type) {
   case ROW_TYPE_FIXED:   return STATIC_RECORD;
@@ -2531,7 +2535,7 @@ int ha_maria::create(const char *name, r
     }
   }
   /* Note: BLOCK_RECORD is used if table is transactional */
-  row_type= maria_row_type(ha_create_info, 0);
+  row_type= maria_row_type(ha_create_info);
   if (ha_create_info->transactional == HA_CHOICE_YES &&
       ha_create_info->row_type != ROW_TYPE_PAGE &&
       ha_create_info->row_type != ROW_TYPE_NOT_USED &&
@@ -2715,15 +2719,15 @@ bool ha_maria::check_if_incompatible_dat
   if (create_info->auto_increment_value != stats.auto_increment_value ||
       create_info->data_file_name != data_file_name ||
       create_info->index_file_name != index_file_name ||
-      (maria_row_type(create_info,  1) != data_file_type &&
+      (maria_row_type(create_info) != data_file_type &&
        create_info->row_type != ROW_TYPE_DEFAULT) ||
       table_changes == IS_EQUAL_NO ||
       table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet
     return COMPATIBLE_DATA_NO;
 
-  if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+  if ((options & (HA_OPTION_CHECKSUM |
                   HA_OPTION_DELAY_KEY_WRITE)) !=
-      (create_info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+      (create_info->table_options & (HA_OPTION_CHECKSUM |
                               HA_OPTION_DELAY_KEY_WRITE)))
     return COMPATIBLE_DATA_NO;
   return COMPATIBLE_DATA_YES;
@@ -2987,6 +2991,11 @@ static int ha_maria_init(void *p)
     ((force_start_after_recovery_failures != 0) && mark_recovery_success()) ||
     ma_checkpoint_init(checkpoint_interval);
   maria_multi_threaded= maria_in_ha_maria= TRUE;
+
+#if defined(HAVE_REALPATH) && !defined(HAVE_purify) &&
!defined(HAVE_BROKEN_REALPATH)
+  /*  We can only test for sub paths if my_symlink.c is using realpath */
+  maria_test_invalid_symlink= test_if_data_home_dir;
+#endif
   return res ? HA_ERR_INITIALIZATION : 0;
 }
 

=== modified file 'storage/maria/ma_bitmap.c'
--- a/storage/maria/ma_bitmap.c	2008-04-03 13:40:25 +0000
+++ b/storage/maria/ma_bitmap.c	2008-10-17 13:37:07 +0000
@@ -167,10 +167,10 @@ static inline my_bool write_changed_bitm
     int res= pagecache_write(share->pagecache,
                              &bitmap->file, bitmap->page, 0,
                              (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
-                             PAGECACHE_LOCK_WRITE, PAGECACHE_PIN,
+                             PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN,
                              PAGECACHE_WRITE_DELAY, &page_link.link,
                              LSN_IMPOSSIBLE);
-    page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+    page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED;
     page_link.changed= 1;
     push_dynamic(&bitmap->pinned_pages, (void*) &page_link);
     DBUG_RETURN(res);
@@ -281,6 +281,14 @@ my_bool _ma_bitmap_end(MARIA_SHARE *shar
     by this thread (ie, checking the changed flag is ok). The reason we
     check it again in the mutex is that if someone else did a flush at the
     same time, we don't have to do the write.
+    This is also ok for _ma_scan_init_block_record() which does not want to
+    miss rows: it cares only for committed rows, that is, rows for which there
+    was a commit before our transaction started; as commit and transaction's
+    start are protected by the same LOCK_trn_list mutex, we see memory at
+    least as new as at other transaction's commit time, so if the committed
+    rows caused bitmap->changed to be true, we see it; if we see 0 it really
+    means a flush happened since then. So, it's ok to read without bitmap's
+    mutex.
 
   RETURN
     0    ok
@@ -305,38 +313,6 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *sh
 }
 
 
-/*
-  @brief Send updated bitmap to the page cache if bitmap is free
-
-  @note
-  This is used by reader threads which don't unpin things
-*/
-
-my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share)
-{
-  my_bool res= 0;
-  MARIA_FILE_BITMAP *bitmap= &share->bitmap;
-  DBUG_ENTER("_ma_bitmap_flush");
-  if (bitmap->changed)
-  {
-    pthread_mutex_lock(&bitmap->bitmap_lock);
-    while (bitmap->non_flushable && bitmap->changed)
-    {
-      DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
-      pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
-    }
-    if (bitmap->changed)
-    {
-      bitmap->changed= 0;
-      res= write_changed_bitmap(share, bitmap);
-    }
-    pthread_mutex_unlock(&bitmap->bitmap_lock);
-  }
-  DBUG_RETURN(res);
-}
-
-
-
 /**
    Dirty-page filtering criteria for bitmap pages
 
@@ -386,8 +362,11 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
       unpinned. We keep the mutex to preserve this situation, and flush to the
       file.
     */
-    res= write_changed_bitmap(share, bitmap);
-    bitmap->changed= FALSE;
+    if (bitmap->changed)
+    {
+      res= write_changed_bitmap(share, bitmap);
+      bitmap->changed= FALSE;
+    }
     /*
       We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
       pages have been flushed. That's a condition of correctness of
@@ -424,6 +403,8 @@ my_bool _ma_bitmap_flush_all(MARIA_SHARE
 
   @return Operation status
     @retval   0   ok
+
+  @note This unpins pages pinned by other threads.
 */
 
 static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
@@ -437,7 +418,7 @@ static void _ma_bitmap_unpin_all(MARIA_S
   while (pinned_page-- != page_link)
     pagecache_unlock_by_link(share->pagecache, pinned_page->link,
                              pinned_page->unlock, PAGECACHE_UNPIN,
-                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE);
+                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE, TRUE);
   bitmap->pinned_pages.elements= 0;
   DBUG_VOID_RETURN;
 }
@@ -2139,8 +2120,8 @@ my_bool _ma_bitmap_set_full_page_bits(MA
    function first waits for the flush to be done.
 
    @note
-   info->non_flushable_state is set to 1 if we have incremented
-   bitmap->info->non_flushable and not yet decremented it.
+   this sets info->non_flushable_state to 1 if we have incremented
+   bitmap->non_flushable and not yet decremented it.
 
    @param  share               Table's share
    @param  non_flushable_inc   Increment of MARIA_FILE_BITMAP::non_flushable
@@ -2151,20 +2132,21 @@ void _ma_bitmap_flushable(MARIA_HA *info
 {
   MARIA_SHARE *share= info->s;
   MARIA_FILE_BITMAP *bitmap;
+  DBUG_ENTER("_ma_bitmap_flushable");
 
   /*
     Not transactional tables are never automaticly flushed and needs no
     protection
   */
   if (!share->now_transactional)
-    return;
+    DBUG_VOID_RETURN;
 
   bitmap= &share->bitmap;
   if (non_flushable_inc == -1)
   {
     pthread_mutex_lock(&bitmap->bitmap_lock);
-    DBUG_ASSERT((int) bitmap->non_flushable > 0 &&
-                info->non_flushable_state == 1);
+    DBUG_ASSERT((int) bitmap->non_flushable > 0);
+    DBUG_ASSERT(info->non_flushable_state == 1);
     info->non_flushable_state= 0;
     if (--bitmap->non_flushable == 0)
     {
@@ -2182,14 +2164,15 @@ void _ma_bitmap_flushable(MARIA_HA *info
     }
     DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
     pthread_mutex_unlock(&bitmap->bitmap_lock);
-    return;
+    DBUG_VOID_RETURN;
   }
-  DBUG_ASSERT(non_flushable_inc == 1 && info->non_flushable_state == 0);
-  /* It is a read without mutex because only an optimization */
-  if (unlikely(bitmap->flush_all_requested))
+  DBUG_ASSERT(non_flushable_inc == 1);
+  DBUG_ASSERT(info->non_flushable_state == 0);
+  pthread_mutex_lock(&bitmap->bitmap_lock);
+  while (unlikely(bitmap->flush_all_requested))
   {
     /*
-      _ma_bitmap_flush_all() is waiting for the bitmap to become
+      Some other thread is waiting for the bitmap to become
       flushable. Not the moment to make the bitmap unflushable or more
       unflushable; let's rather back off and wait. If we didn't do this, with
       multiple writers, there may always be one thread causing the bitmap to
@@ -2199,21 +2182,14 @@ void _ma_bitmap_flushable(MARIA_HA *info
       our thread), it is not going to increase it more so is not going to come
       here.
     */
-    pthread_mutex_lock(&bitmap->bitmap_lock);
-    while (bitmap->flush_all_requested)
-    {
-      DBUG_PRINT("info", ("waiting for bitmap flusher"));
-      pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
-    }
-    pthread_mutex_unlock(&bitmap->bitmap_lock);
+    DBUG_PRINT("info", ("waiting for bitmap flusher"));
+    pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
   }
-  /*
-    Ok to set without mutex: we didn't touch the bitmap's content yet; when we
-    touch it we will take the mutex.
-  */
   bitmap->non_flushable++;
   info->non_flushable_state= 1;
   DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
+  pthread_mutex_unlock(&bitmap->bitmap_lock);
+  DBUG_VOID_RETURN;
 }
 
 
@@ -2321,10 +2297,10 @@ my_bool _ma_bitmap_release_unused(MARIA_
       goto err;
   }
 
-  if (info->s->now_transactional)
+  /* This duplicates ma_bitmap_flushable(-1) except it already has mutex */
+  if (info->non_flushable_state)
   {
-    DBUG_ASSERT((int) bitmap->non_flushable >= 0 &&
-                info->non_flushable_state);
+    DBUG_ASSERT(((int) (bitmap->non_flushable)) > 0);
     info->non_flushable_state= 0;
     if (--bitmap->non_flushable == 0)
     {

=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c	2008-08-11 14:40:32 +0000
+++ b/storage/maria/ma_blockrec.c	2008-10-14 15:18:14 +0000
@@ -1964,7 +1964,7 @@ static my_bool write_tail(MARIA_HA *info
     pagecache_unlock_by_link(share->pagecache, page_link->link,
                              PAGECACHE_LOCK_WRITE_TO_READ,
                              PAGECACHE_PIN_LEFT_PINNED, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 1);
+                             LSN_IMPOSSIBLE, 1, FALSE);
     DBUG_ASSERT(page_link->changed);
     page_link->unlock= PAGECACHE_LOCK_READ_UNLOCK;
     res= 0;
@@ -3026,7 +3026,7 @@ static my_bool write_block_record(MARIA_
     pagecache_unlock_by_link(share->pagecache, page_link->link,
                              PAGECACHE_LOCK_WRITE_TO_READ,
                              PAGECACHE_PIN_LEFT_PINNED, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 1);
+                             LSN_IMPOSSIBLE, 1, FALSE);
     page_link->unlock= PAGECACHE_LOCK_READ_UNLOCK;
     page_link->changed= 1;
   }
@@ -3094,7 +3094,7 @@ static my_bool write_block_record(MARIA_
                                                        info->rec_buff);
       log_pos= store_page_range(log_pos, head_block+1, block_size,
                                 (ulong) block_length, &extents);
-      log_array_pos->str=    (char*) info->rec_buff;
+      log_array_pos->str= info->rec_buff;
       log_array_pos->length= block_length;
       log_entry_length+= block_length;
       log_array_pos++;
@@ -4025,7 +4025,7 @@ static my_bool delete_head_or_tail(MARIA
   pagecache_unlock_by_link(share->pagecache, page_link.link,
                            lock_at_write,
                            PAGECACHE_PIN_LEFT_PINNED, LSN_IMPOSSIBLE,
-                           LSN_IMPOSSIBLE, 1);
+                           LSN_IMPOSSIBLE, 1, FALSE);
   page_link.unlock= lock_at_unpin;
   set_dynamic(&info->pinned_pages, (void*) &page_link,
               info->pinned_pages.elements-1);
@@ -4982,10 +4982,12 @@ my_bool _ma_scan_init_block_record(MARIA
   info->scan.bitmap_pos= info->scan.bitmap_end;
   info->scan.bitmap_page= (pgcache_page_no_t) 0 - share->bitmap.pages_covered;
   /*
-    We have to flush bitmap as we will read the bitmap from the page cache
-    while scanning rows
+    We need to flush what's in memory (bitmap.map) to page cache otherwise, as
+    we are going to read bitmaps from page cache in table scan (see
+    _ma_scan_block_record()), we may miss recently inserted rows (bitmap page
+    in page cache would be too old).
   */
-  DBUG_RETURN(_ma_bitmap_wait_or_flush(info->s));
+  DBUG_RETURN(_ma_bitmap_flush(info->s));
 }
 
 
@@ -5141,7 +5143,9 @@ restart_record_read:
     if (end_of_data > info->scan.dir_end ||
         offset < PAGE_HEADER_SIZE || length < share->base.min_block_length)
     {
-      DBUG_ASSERT(0);
+      DBUG_ASSERT(!(end_of_data > info->scan.dir_end));
+      DBUG_ASSERT(!(offset < PAGE_HEADER_SIZE));
+      DBUG_ASSERT(!(length < share->base.min_block_length));
       goto err;
     }
 #endif
@@ -5408,7 +5412,7 @@ static size_t fill_insert_undo_parts(MAR
   log_parts++;
 
   /* Stored bitmap over packed (zero length or all-zero fields) */
-  log_parts->str=    (char *) info->cur_row.empty_bits;
+  log_parts->str= info->cur_row.empty_bits;
   log_parts->length= share->base.pack_bytes;
   row_length+=       log_parts->length;
   log_parts++;
@@ -5416,7 +5420,7 @@ static size_t fill_insert_undo_parts(MAR
   if (share->base.max_field_lengths)
   {
     /* Store length of all not empty char, varchar and blob fields */
-    log_parts->str=      (char *) field_lengths - 2;
+    log_parts->str= field_lengths - 2;
     log_parts->length=   info->cur_row.field_lengths_length+2;
     int2store(log_parts->str, info->cur_row.field_lengths_length);
     row_length+= log_parts->length;
@@ -5428,8 +5432,8 @@ static size_t fill_insert_undo_parts(MAR
     /*
       Store total blob length to make buffer allocation easier during UNDO
      */
-    log_parts->str=      (char *) info->length_buff;
-    log_parts->length=   (uint) (ma_store_length((uchar *) log_parts->str,
+    log_parts->str=  info->length_buff;
+    log_parts->length= (uint) (ma_store_length((uchar *) log_parts->str,
                                                  info->cur_row.blob_length) -
                                  (uchar*) log_parts->str);
     row_length+=          log_parts->length;
@@ -5442,7 +5446,7 @@ static size_t fill_insert_undo_parts(MAR
        column < end_column;
        column++)
   {
-    log_parts->str= (char*) record + column->offset;
+    log_parts->str= record + column->offset;
     log_parts->length= column->length;
     row_length+= log_parts->length;
     log_parts++;
@@ -5493,7 +5497,7 @@ static size_t fill_insert_undo_parts(MAR
     default:
       DBUG_ASSERT(0);
     }
-    log_parts->str= (char*) column_pos;
+    log_parts->str= column_pos;
     log_parts->length= column_length;
     row_length+= log_parts->length;
     log_parts++;
@@ -5512,8 +5516,8 @@ static size_t fill_insert_undo_parts(MAR
     */
     if (blob_length)
     {
-      char *blob_pos;
-      memcpy_fixed((uchar*) &blob_pos, record + column->offset + size_length,
+      uchar *blob_pos;
+      memcpy_fixed(&blob_pos, record + column->offset + size_length,
                    sizeof(blob_pos));
       log_parts->str= blob_pos;
       log_parts->length= blob_length;
@@ -5612,7 +5616,7 @@ static size_t fill_update_undo_parts(MAR
     {
       field_data= ma_store_length(field_data,
                                   (uint) (column - share->columndef));
-      log_parts->str= (char*) oldrec + column->offset;
+      log_parts->str= oldrec + column->offset;
       log_parts->length= column->length;
       row_length+=       column->length;
       log_parts++;
@@ -5719,7 +5723,7 @@ static size_t fill_update_undo_parts(MAR
                                   (ulong) (column - share->columndef));
       field_data= ma_store_length(field_data, (ulong) old_column_length);
 
-      log_parts->str=     (char*) old_column_pos;
+      log_parts->str=     old_column_pos;
       log_parts->length=  old_column_length;
       row_length+=        old_column_length;
       log_parts++;
@@ -5730,10 +5734,9 @@ static size_t fill_update_undo_parts(MAR
 
   /* Store length of field length data before the field/field_lengths */
   field_lengths= (uint) (field_data - start_field_data);
-  start_log_parts->str=  ((char*)
-                          (start_field_data -
+  start_log_parts->str=  ((start_field_data -
                            ma_calc_length_for_store_length(field_lengths)));
-  ma_store_length((uchar *) start_log_parts->str, field_lengths);
+  ma_store_length((uchar*)start_log_parts->str, field_lengths);
   start_log_parts->length= (size_t) (field_data - start_log_parts->str);
   row_length+= start_log_parts->length;
   DBUG_RETURN(row_length);
@@ -6034,7 +6037,7 @@ uint _ma_apply_redo_insert_row_head_or_t
       pagecache_unlock_by_link(share->pagecache, page_link.link,
                                PAGECACHE_LOCK_WRITE_UNLOCK,
                                PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                               LSN_IMPOSSIBLE, 0);
+                               LSN_IMPOSSIBLE, 0, FALSE);
       DBUG_RETURN(0);
     }
 
@@ -6124,7 +6127,7 @@ err:
     pagecache_unlock_by_link(share->pagecache, page_link.link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 0);
+                             LSN_IMPOSSIBLE, 0, FALSE);
   _ma_mark_file_crashed(share);
   DBUG_RETURN((my_errno= error));
 }
@@ -6194,7 +6197,7 @@ uint _ma_apply_redo_purge_row_head_or_ta
     pagecache_unlock_by_link(share->pagecache, page_link.link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 0);
+                             LSN_IMPOSSIBLE, 0, FALSE);
     DBUG_RETURN(0);
   }
 
@@ -6222,7 +6225,7 @@ err:
   pagecache_unlock_by_link(share->pagecache, page_link.link,
                            PAGECACHE_LOCK_WRITE_UNLOCK,
                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                           LSN_IMPOSSIBLE, 0);
+                           LSN_IMPOSSIBLE, 0, FALSE);
   _ma_mark_file_crashed(share);
   DBUG_RETURN((my_errno= error));
 
@@ -6326,7 +6329,7 @@ uint _ma_apply_redo_free_head_or_tail(MA
     pagecache_unlock_by_link(share->pagecache, page_link.link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 0);
+                             LSN_IMPOSSIBLE, 0, FALSE);
     goto err;
   }
   if (lsn_korr(buff) >= lsn)
@@ -6335,7 +6338,7 @@ uint _ma_apply_redo_free_head_or_tail(MA
     pagecache_unlock_by_link(share->pagecache, page_link.link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 0);
+                             LSN_IMPOSSIBLE, 0, FALSE);
   }
   else
   {
@@ -6475,7 +6478,7 @@ uint _ma_apply_redo_insert_row_blobs(MAR
               pagecache_unlock_by_link(share->pagecache, page_link.link,
                                        PAGECACHE_LOCK_WRITE_UNLOCK,
                                        PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                                       LSN_IMPOSSIBLE, 0);
+                                       LSN_IMPOSSIBLE, 0, FALSE);
               goto err;
             }
             /*
@@ -6495,7 +6498,7 @@ uint _ma_apply_redo_insert_row_blobs(MAR
               pagecache_unlock_by_link(share->pagecache, page_link.link,
                                        PAGECACHE_LOCK_WRITE_UNLOCK,
                                        PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                                       LSN_IMPOSSIBLE, 0);
+                                       LSN_IMPOSSIBLE, 0, FALSE);
               continue;
             }
           }
@@ -7082,7 +7085,7 @@ void maria_ignore_trids(MARIA_HA *info)
   if (info->s->base.born_transactional)
   {
     if (!info->trn)
-      info->trn= &dummy_transaction_object;
+      _ma_set_trn_for_table(info, &dummy_transaction_object);
     /* Ignore transaction id when row is read */
     info->trn->min_read_from= ~(TrID) 0;
   }

=== modified file 'storage/maria/ma_blockrec.h'
--- a/storage/maria/ma_blockrec.h	2008-04-10 02:26:36 +0000
+++ b/storage/maria/ma_blockrec.h	2008-10-14 09:38:07 +0000
@@ -182,7 +182,6 @@ maria_page_get_lsn(uchar *page, pgcache_
 my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
 my_bool _ma_bitmap_end(MARIA_SHARE *share);
 my_bool _ma_bitmap_flush(MARIA_SHARE *share);
-my_bool _ma_bitmap_wait_or_flush(MARIA_SHARE *share);
 my_bool _ma_bitmap_flush_all(MARIA_SHARE *share);
 void _ma_bitmap_reset_cache(MARIA_SHARE *share);
 my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,

=== modified file 'storage/maria/ma_check.c'
--- a/storage/maria/ma_check.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_check.c	2008-10-20 09:16:47 +0000
@@ -2097,14 +2097,14 @@ int maria_chk_data_link(HA_CHECK *param,
     if (param->del_blocks != share->state.state.del)
     {
       _ma_check_print_warning(param,
-                              "Found %10s deleted blocks       Should be: %s",
+                              "Found %10s deleted blocks.  Should be: %s",
                               llstr(param->del_blocks,llbuff),
                               llstr(share->state.state.del,llbuff2));
     }
     if (param->splits != share->state.split)
     {
       _ma_check_print_warning(param,
-                              "Found %10s parts                Should be: %s parts",
+                              "Found %10s key parts.  Should be: %s",
                               llstr(param->splits, llbuff),
                               llstr(share->state.split,llbuff2));
     }
@@ -2685,7 +2685,7 @@ int maria_repair(HA_CHECK *param, regist
                                 (param->testflag & T_BACKUP_DATA ?
                                  MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
                                 sync_dir) ||
-        _ma_open_datafile(info, share, -1))
+        _ma_open_datafile(info, share, NullS, -1))
     {
       goto err;
     }
@@ -3184,7 +3184,7 @@ static my_bool maria_zerofill_index(HA_C
       pagecache_unlock_by_link(share->pagecache, page_link.link,
                                PAGECACHE_LOCK_WRITE_UNLOCK,
                                PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                               LSN_IMPOSSIBLE, 0);
+                               LSN_IMPOSSIBLE, 0, FALSE);
       _ma_check_print_error(param,
                             "Page %9s: Got error %d when reading index file",
                             llstr(pos, llbuff), my_errno);
@@ -3218,7 +3218,7 @@ static my_bool maria_zerofill_index(HA_C
     pagecache_unlock_by_link(share->pagecache, page_link.link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 1);
+                             LSN_IMPOSSIBLE, 1, FALSE);
   }
   if (flush_pagecache_blocks(share->pagecache, &share->kfile,
                              FLUSH_FORCE_WRITE))
@@ -3342,7 +3342,7 @@ static my_bool maria_zerofill_data(HA_CH
     pagecache_unlock_by_link(share->pagecache, page_link.link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                             LSN_IMPOSSIBLE, 1);
+                             LSN_IMPOSSIBLE, 1, FALSE);
   }
   DBUG_RETURN(_ma_bitmap_flush(share) ||
               flush_pagecache_blocks(share->pagecache, &info->dfile,
@@ -3352,7 +3352,7 @@ err:
   pagecache_unlock_by_link(share->pagecache, page_link.link,
                            PAGECACHE_LOCK_WRITE_UNLOCK,
                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                           LSN_IMPOSSIBLE, 0);
+                           LSN_IMPOSSIBLE, 0, FALSE);
   DBUG_RETURN(1);
 }
 
@@ -3794,7 +3794,7 @@ int maria_repair_by_sort(HA_CHECK *param
                                   (param->testflag & T_BACKUP_DATA ?
                                    MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
                                   sync_dir) ||
-          _ma_open_datafile(info, share, -1))
+          _ma_open_datafile(info, share, NullS, -1))
       {
         _ma_check_print_error(param, "Couldn't change to new data file");
         goto err;
@@ -4402,7 +4402,7 @@ err:
                                   MYF((param->testflag & T_BACKUP_DATA ?
                                        MY_REDEL_MAKE_BACKUP : 0) |
                                       sync_dir)) ||
-	  _ma_open_datafile(info,share,-1))
+	  _ma_open_datafile(info,share, NullS, -1))
 	got_error=1;
     }
   }

=== modified file 'storage/maria/ma_checkpoint.c'
--- a/storage/maria/ma_checkpoint.c	2008-08-25 11:49:47 +0000
+++ b/storage/maria/ma_checkpoint.c	2008-08-28 18:52:23 +0000
@@ -222,7 +222,7 @@ static int really_execute_checkpoint(voi
     */
     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 5];
     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=
-      checkpoint_start_log_horizon_char;
+      (uchar*) checkpoint_start_log_horizon_char;
     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= total_rec_length=
       sizeof(checkpoint_start_log_horizon_char);
     for (i= 0; i < (sizeof(record_pieces)/sizeof(record_pieces[0])); i++)

=== modified file 'storage/maria/ma_close.c'
--- a/storage/maria/ma_close.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_close.c	2008-10-20 13:03:34 +0000
@@ -33,7 +33,7 @@ int maria_close(register MARIA_HA *info)
                       (uint) share->tot_locks));
 
   /* Check that we have unlocked key delete-links properly */
-  DBUG_ASSERT(info->used_key_del == 0);
+  DBUG_ASSERT(info->key_del_used == 0);
 
   pthread_mutex_lock(&THR_LOCK_maria);
   if (info->lock_type == F_EXTRA_LCK)
@@ -107,7 +107,7 @@ int maria_close(register MARIA_HA *info)
         File must be synced as it is going out of the maria_open_list and so
         becoming unknown to future Checkpoints.
       */
-      if (!share->temporary && my_sync(share->kfile.file, MYF(MY_WME)))
+      if (share->now_transactional && my_sync(share->kfile.file,
MYF(MY_WME)))
         error= my_errno;
       if (my_close(share->kfile.file, MYF(0)))
         error= my_errno;

=== modified file 'storage/maria/ma_commit.c'
--- a/storage/maria/ma_commit.c	2008-05-29 15:33:33 +0000
+++ b/storage/maria/ma_commit.c	2008-10-09 20:03:54 +0000
@@ -106,16 +106,12 @@ int maria_begin(MARIA_HA *info)
 
   if (info->s->now_transactional)
   {
-    TRN *trn;
-    struct st_my_thread_var *mysys_var= my_thread_var;
-    trn= trnman_new_trn(&mysys_var->mutex,
-                        &mysys_var->suspend,
-                        (char*) &mysys_var + STACK_DIRECTION *1024*128);
+    TRN *trn= trnman_new_trn(0);
     if (unlikely(!trn))
       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
 
     DBUG_PRINT("info", ("TRN set to 0x%lx", (ulong) trn));
-    info->trn= trn;
+    _ma_set_trn_for_table(info, trn);
   }
   DBUG_RETURN(0);
 }

=== modified file 'storage/maria/ma_create.c'
--- a/storage/maria/ma_create.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_create.c	2008-10-20 13:03:34 +0000
@@ -836,6 +836,7 @@ int maria_create(const char *name, enum 
     my_printf_error(0, "MARIA table '%s' is in use "
                     "(most likely by a MERGE table). Try FLUSH TABLES.",
                     MYF(0), name + dirname_length(name));
+    my_errno= HA_ERR_TABLE_EXIST;
     goto err;
   }
 
@@ -1008,20 +1009,19 @@ int maria_create(const char *name, enum 
     log_data[0]= test(flags & HA_DONT_TOUCH_DATA);
     int2store(log_data + 1, kfile_size_before_extension);
     int2store(log_data + 1 + 2, share.base.keystart);
-    log_array[TRANSLOG_INTERNAL_PARTS + 0].str= name;
+    log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar *)name;
     /* we store the end-zero, for Recovery to just pass it to my_create() */
-    log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
-      strlen(log_array[TRANSLOG_INTERNAL_PARTS + 0].str) + 1;
+    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name) + 1;
     log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_data;
     /* symlink description is also needed for re-creation by Recovery: */
-    log_array[TRANSLOG_INTERNAL_PARTS + 2].str=
-      (ci->data_file_name ? ci->data_file_name : empty_string);
-    log_array[TRANSLOG_INTERNAL_PARTS + 2].length=
-      strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str) + 1;
-    log_array[TRANSLOG_INTERNAL_PARTS + 3].str=
-      (ci->index_file_name ? ci->index_file_name : empty_string);
-    log_array[TRANSLOG_INTERNAL_PARTS + 3].length=
-      strlen(log_array[TRANSLOG_INTERNAL_PARTS + 3].str) + 1;
+    {
+      const char *s= ci->data_file_name ? ci->data_file_name : empty_string;
+      log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (uchar*)s;
+      log_array[TRANSLOG_INTERNAL_PARTS + 2].length= strlen(s) + 1;
+      s= ci->index_file_name ? ci->index_file_name : empty_string;
+      log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (uchar*)s;
+      log_array[TRANSLOG_INTERNAL_PARTS + 3].length= strlen(s) + 1;
+    }
     for (k= TRANSLOG_INTERNAL_PARTS;
          k < (sizeof(log_array)/sizeof(log_array[0])); k++)
       total_rec_length+= (translog_size_t) log_array[k].length;
@@ -1349,7 +1349,8 @@ int _ma_update_state_lsns_sub(MARIA_SHAR
     int res;
     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
     /* table name is logged only for information */
-    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    share->open_file_name.str;
+    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=
+      (uchar *)(share->open_file_name.str);
     log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
       share->open_file_name.length + 1;
     if ((res= translog_write_record(&lsn, LOGREC_IMPORTED_TABLE,

=== modified file 'storage/maria/ma_delete.c'
--- a/storage/maria/ma_delete.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_delete.c	2008-10-20 09:16:47 +0000
@@ -1112,7 +1112,7 @@ static int underflow(register MARIA_HA *
     MARIA_KEY_PARAM anc_key_inserted;
     size_t tmp_length;
 
-    if (first_key)
+    if (keypos == anc_buff + share->keypage_header + key_reflength)
       anc_pos= 0;				/* First key */
     else
     {

=== modified file 'storage/maria/ma_delete_table.c'
--- a/storage/maria/ma_delete_table.c	2008-04-03 13:40:25 +0000
+++ b/storage/maria/ma_delete_table.c	2008-08-06 14:03:27 +0000
@@ -85,7 +85,7 @@ int maria_delete_table(const char *name)
     */
     LSN lsn;
     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
-    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    name;
+    log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar*)name;
     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name) + 1;
     if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DROP_TABLE,
                                        &dummy_transaction_object, NULL,

=== modified file 'storage/maria/ma_extra.c'
--- a/storage/maria/ma_extra.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_extra.c	2008-10-20 09:16:47 +0000
@@ -371,6 +371,16 @@ int maria_extra(MARIA_HA *info, enum ha_
     pthread_mutex_unlock(&THR_LOCK_maria);
     break;
   }
+  case HA_EXTRA_PREPARE_FOR_FORCED_CLOSE:
+    if (info->trn)
+    {
+      pthread_mutex_lock(&share->intern_lock);
+      _ma_remove_table_from_trnman(share, info->trn);
+      /* Ensure we don't point to the deleted data in trn */
+      info->state= info->state_start= &share->state.state;
+      pthread_mutex_unlock(&share->intern_lock);    
+    }
+    break;
   case HA_EXTRA_FLUSH:
     if (!share->temporary)
       error= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,

=== modified file 'storage/maria/ma_init.c'
--- a/storage/maria/ma_init.c	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_init.c	2008-10-09 20:03:54 +0000
@@ -68,6 +68,8 @@ int maria_init(void)
   }
   hash_init(&maria_stored_state, &my_charset_bin, 32,
             0, sizeof(LSN), 0, (hash_free_key) history_state_free, 0);
+  DBUG_PRINT("info",("dummy_transaction_object: %p",
+                     &dummy_transaction_object));
   return 0;
 }
 

=== modified file 'storage/maria/ma_key_recover.c'
--- a/storage/maria/ma_key_recover.c	2008-07-09 09:02:27 +0000
+++ b/storage/maria/ma_key_recover.c	2008-10-14 15:18:14 +0000
@@ -70,7 +70,7 @@ void _ma_unpin_all_pages(MARIA_HA *info,
     pagecache_unlock_by_link(info->s->pagecache, pinned_page->link,
                              pinned_page->unlock, PAGECACHE_UNPIN,
                              info->trn->rec_lsn, undo_lsn,
-                             pinned_page->changed);
+                             pinned_page->changed, FALSE);
   }
 
   info->pinned_pages.elements= 0;
@@ -203,16 +203,6 @@ my_bool write_hook_for_undo_key(enum tra
     (struct st_msg_to_write_hook_for_undo_key *) hook_arg;
 
   *msg->root= msg->value;
-  /**
-    @todo BUG
-    so we have log mutex and then intern_lock.
-    While in checkpoint we have intern_lock and then log mutex, like when we
-    flush bitmap (flushing bitmap pages can call hook which takes log mutex);
-    and in _ma_update_state_lsns_sub() this is the same.
-    So we can deadlock.
-    Another one is that in translog_assign_id_to_share() we have intern_lock
-    and then log mutex.
-  */
   _ma_fast_unlock_key_del(tbl_info);
   return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
 }
@@ -710,7 +700,7 @@ err:
   pagecache_unlock_by_link(share->pagecache, page_link.link,
                            PAGECACHE_LOCK_WRITE_UNLOCK,
                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                           LSN_IMPOSSIBLE, 0);
+                           LSN_IMPOSSIBLE, 0, FALSE);
   DBUG_RETURN(result);
 }
 
@@ -789,7 +779,7 @@ err:
   pagecache_unlock_by_link(share->pagecache, page_link.link,
                            PAGECACHE_LOCK_WRITE_UNLOCK,
                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                           LSN_IMPOSSIBLE, 0);
+                           LSN_IMPOSSIBLE, 0, FALSE);
   DBUG_RETURN(result);
 }
 
@@ -1057,7 +1047,7 @@ err:
   pagecache_unlock_by_link(share->pagecache, page_link.link,
                            PAGECACHE_LOCK_WRITE_UNLOCK,
                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
-                           LSN_IMPOSSIBLE, 0);
+                           LSN_IMPOSSIBLE, 0, FALSE);
   if (result)
     _ma_mark_file_crashed(share);
   DBUG_RETURN(result);
@@ -1209,14 +1199,14 @@ my_bool _ma_apply_undo_key_delete(MARIA_
   @note
     To allow higher concurrency in the common case where we do inserts
     and we don't have any linked blocks we do the following:
-    - Mark in info->used_key_del that we are not using key_del
+    - Mark in info->key_del_used that we are not using key_del
     - Return at once (without marking key_del as used)
 
-    This is safe as we in this case don't write current_key_del into
+    This is safe as we in this case don't write key_del_current into
     the redo log and during recover we are not updating key_del.
 
   @retval 1  Use page at end of file
-  @retval 0  Use page at share->current_key_del
+  @retval 0  Use page at share->key_del_current
 */
 
 my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
@@ -1224,68 +1214,72 @@ my_bool _ma_lock_key_del(MARIA_HA *info,
   MARIA_SHARE *share= info->s;
 
   /*
-    info->used_key_del is 0 initially.
+    info->key_del_used is 0 initially.
     If the caller needs a block (_ma_new()), we look at the free list:
     - looks empty? then caller will create a new block at end of file and
-    remember (through info->used_key_del==2) that it will not change
+    remember (through info->key_del_used==2) that it will not change
     state.key_del and does not need to wake up waiters as nobody will wait for
     it.
     - non-empty? then we wait for other users of the state.key_del list to
-    have finished, then we lock this list (through share->used_key_del==1)
+    have finished, then we lock this list (through share->key_del_used==1)
     because we need to prevent some other thread to also read state.key_del
-    and use the same page as ours. We remember through info->used_key_del==1
+    and use the same page as ours. We remember through info->key_del_used==1
     that we will have to set state.key_del at unlock time and wake up
     waiters.
     If the caller wants to free a block (_ma_dispose()), "empty" and
     "non-empty" are treated as "non-empty" is treated above.
-    When we are ready to unlock, we copy share->current_key_del into
+    When we are ready to unlock, we copy share->key_del_current into
     state.key_del. Unlocking happens when writing the UNDO log record, that
     can make a long lock time.
     Why we wrote "*looks* empty": because we are looking at state.key_del
-    which may be slightly old (share->current_key_del may be more recent and
+    which may be slightly old (share->key_del_current may be more recent and
     exact): when we want a new page, we tolerate to treat "there was no free
     page 1 millisecond ago"  as "there is no free page". It's ok to non-pop
     (_ma_new(), page will be found later anyway) but it's not ok to non-push
     (_ma_dispose(), page would be lost).
-    When we leave this function, info->used_key_del is always 1 or 2.
+    When we leave this function, info->key_del_used is always 1 or 2.
   */
-  if (info->used_key_del != 1)
+  if (info->key_del_used != 1)
   {
-    pthread_mutex_lock(&share->intern_lock);
+    pthread_mutex_lock(&share->key_del_lock);
     if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end)
     {
-      pthread_mutex_unlock(&share->intern_lock);
-      info->used_key_del= 2;                  /* insert-with-append */
+      pthread_mutex_unlock(&share->key_del_lock);
+      info->key_del_used= 2;                  /* insert-with-append */
       return 1;
     }
 #ifdef THREAD
-    while (share->used_key_del)
-      pthread_cond_wait(&share->intern_cond, &share->intern_lock);
+    while (share->key_del_used)
+      pthread_cond_wait(&share->key_del_cond, &share->key_del_lock);
 #endif
-    info->used_key_del= 1;
-    share->used_key_del= 1;
-    share->current_key_del= share->state.key_del;
-    pthread_mutex_unlock(&share->intern_lock);
+    info->key_del_used= 1;
+    share->key_del_used= 1;
+    share->key_del_current= share->state.key_del;
+    pthread_mutex_unlock(&share->key_del_lock);
   }
-  return share->current_key_del == HA_OFFSET_ERROR;
+  return share->key_del_current == HA_OFFSET_ERROR;
 }
 
 
 /**
   @brief copy changes to key_del and unlock it
+
+  @notes
+  In case of many threads using the maria table, we always have a lock
+  on the translog when comming here.
 */
 
 void _ma_unlock_key_del(MARIA_HA *info)
 {
-  DBUG_ASSERT(info->used_key_del);
-  if (info->used_key_del == 1)                  /* Ignore insert-with-append */
+  DBUG_ASSERT(info->key_del_used);
+  if (info->key_del_used == 1)                  /* Ignore insert-with-append */
   {
     MARIA_SHARE *share= info->s;
-    pthread_mutex_lock(&share->intern_lock);
-    share->used_key_del= 0;
-    share->state.key_del= share->current_key_del;
-    pthread_mutex_unlock(&share->intern_lock);
-    pthread_cond_signal(&share->intern_cond);
+    pthread_mutex_lock(&share->key_del_lock);
+    share->key_del_used= 0;
+    share->state.key_del= share->key_del_current;
+    pthread_mutex_unlock(&share->key_del_lock);
+    pthread_cond_signal(&share->key_del_cond);
   }
-  info->used_key_del= 0;
+  info->key_del_used= 0;
 }

=== modified file 'storage/maria/ma_key_recover.h'
--- a/storage/maria/ma_key_recover.h	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_key_recover.h	2008-08-26 12:34:57 +0000
@@ -114,6 +114,6 @@ extern my_bool _ma_lock_key_del(MARIA_HA
 extern void _ma_unlock_key_del(MARIA_HA *info);
 static inline void _ma_fast_unlock_key_del(MARIA_HA *info)
 {
-  if (info->used_key_del)
+  if (info->key_del_used)
     _ma_unlock_key_del(info);
 }

=== modified file 'storage/maria/ma_loghandler.c'
--- a/storage/maria/ma_loghandler.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_loghandler.c	2008-10-20 13:03:34 +0000
@@ -3028,7 +3028,7 @@ static void translog_free_link(PAGECACHE
   if (direct_link)
     pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
                              PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
-                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0);
+                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE);
   DBUG_VOID_RETURN;
 }
 
@@ -5172,9 +5172,9 @@ static void  translog_relative_LSN_encod
 {
   LEX_CUSTRING *part;
   uint lsns_len= lsns * LSN_STORE_SIZE;
-  char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
-  char *buffer= buffer_src;
-  const char *cbuffer;
+  uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
+  uchar *buffer= buffer_src;
+  const uchar *cbuffer;
 
   DBUG_ENTER("translog_relative_LSN_encode");
 
@@ -5249,7 +5249,7 @@ static void  translog_relative_LSN_encod
     DBUG_PRINT("info", ("new length of LSNs: %lu  economy: %d",
                         (ulong)part->length, economy));
     parts->total_record_length-= economy;
-    part->str= (char*)dst_ptr;
+    part->str= dst_ptr;
   }
   DBUG_VOID_RETURN;
 }
@@ -5959,7 +5959,7 @@ static my_bool translog_write_fixed_reco
   DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
   part= parts->parts + (--parts->current);
   parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
-  part->str= (char*)chunk1_header;
+  part->str= chunk1_header;
   *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
   int2store(chunk1_header + 1, short_trid);
 
@@ -7717,7 +7717,8 @@ int translog_assign_id_to_share(MARIA_HA
       is not realpath-ed, etc) which is good: the log can be moved to another
       directory and continue working.
     */
-    log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name.str;
+    log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
+      (uchar *)share->open_file_name.str;
     log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
       share->open_file_name.length + 1;
     /*

=== modified file 'storage/maria/ma_open.c'
--- a/storage/maria/ma_open.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_open.c	2008-10-20 09:16:47 +0000
@@ -85,8 +85,8 @@ MARIA_HA *_ma_test_if_reopen(const char 
 */
 
 
-static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, int mode,
-                                      File data_file)
+static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
+                                      int mode, File data_file)
 {
   int save_errno;
   uint errpos;
@@ -104,7 +104,7 @@ static MARIA_HA *maria_clone_internal(MA
   }
   if (data_file >= 0)
     info.dfile.file= data_file;
-  else if (_ma_open_datafile(&info, share, -1))
+  else if (_ma_open_datafile(&info, share, name, -1))
     goto err;
   errpos= 5;
 
@@ -178,7 +178,8 @@ static MARIA_HA *maria_clone_internal(MA
 
   if (!share->base.born_transactional)   /* For transactional ones ... */
   {
-    info.trn= &dummy_transaction_object; /* ... force crash if no trn given */
+    /* ... force crash if no trn given */
+    _ma_set_trn_for_table(&info, &dummy_transaction_object);
     info.state= &share->state.state;	/* Change global values by default */
   }
   else
@@ -235,7 +236,7 @@ MARIA_HA *maria_clone(MARIA_SHARE *share
 {
   MARIA_HA *new_info;
   pthread_mutex_lock(&THR_LOCK_maria);
-  new_info= maria_clone_internal(share, mode,
+  new_info= maria_clone_internal(share, NullS, mode,
                                  share->data_file_type == BLOCK_RECORD ?
                                  share->bitmap.file.file : -1);
   pthread_mutex_unlock(&THR_LOCK_maria);
@@ -255,7 +256,7 @@ MARIA_HA *maria_clone(MARIA_SHARE *share
 MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
 {
   int kfile,open_mode,save_errno;
-  uint i,j,len,errpos,head_length,base_pos,info_length,keys,
+  uint i,j,len,errpos,head_length,base_pos,info_length,keys, realpath_err,
     key_parts,unique_key_parts,fulltext_keys,uniques;
   char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
        data_name[FN_REFLEN];
@@ -276,8 +277,16 @@ MARIA_HA *maria_open(const char *name, i
   head_length=sizeof(share_buff.state.header);
   bzero((uchar*) &info,sizeof(info));
 
-  my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT,
-                                   MY_UNPACK_FILENAME),MYF(0));
+  realpath_err= my_realpath(name_buff, fn_format(org_name, name, "",
+                                                 MARIA_NAME_IEXT,
+                                                 MY_UNPACK_FILENAME),MYF(0));
+  if (my_is_symlink(org_name) &&
+      (realpath_err || (*maria_test_invalid_symlink)(name_buff)))
+  {
+    my_errno= HA_WRONG_CREATE_OPTION;
+    DBUG_RETURN(0);
+  }
+
   pthread_mutex_lock(&THR_LOCK_maria);
   old_info= 0;
   if ((open_flags & HA_OPEN_COPY) ||
@@ -346,7 +355,7 @@ MARIA_HA *maria_open(const char *name, i
     if (!strcmp(name_buff, org_name) ||
         my_readlink(index_name, org_name, MYF(0)) == -1)
       (void) strmov(index_name, org_name);
-    *strrchr(org_name, '.')= '\0';
+    *strrchr(org_name, FN_EXTCHAR)= '\0';
     (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT,
                      MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS);
 
@@ -738,7 +747,7 @@ MARIA_HA *maria_open(const char *name, i
     if ((share->data_file_type == BLOCK_RECORD ||
          share->data_file_type == COMPRESSED_RECORD))
     {
-      if (_ma_open_datafile(&info, share, -1))
+      if (_ma_open_datafile(&info, share, name, -1))
         goto err;
       data_file= info.dfile.file;
     }
@@ -807,8 +816,9 @@ MARIA_HA *maria_open(const char *name, i
     }
 #ifdef THREAD
     thr_lock_init(&share->lock);
-    (void)(pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST));
-    (void)(pthread_cond_init(&share->intern_cond, 0));
+    pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
+    pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
+    pthread_cond_init(&share->key_del_cond, 0);
     for (i=0; i<keys; i++)
       (void)(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
     (void)(my_rwlock_init(&share->mmap_lock, NULL));
@@ -882,7 +892,7 @@ MARIA_HA *maria_open(const char *name, i
       data_file= share->bitmap.file.file;       /* Only opened once */
   }
 
-  if (!(m_info= maria_clone_internal(share, mode, data_file)))
+  if (!(m_info= maria_clone_internal(share, name, mode, data_file)))
     goto err;
 
   pthread_mutex_unlock(&THR_LOCK_maria);
@@ -1215,6 +1225,13 @@ uint _ma_state_info_write(MARIA_SHARE *s
                            (should only be needed after ALTER TABLE
                            ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE).
 
+   @notes
+     For transactional multiuser tables, this function is called
+     with intern_lock & translog_lock or when the last thread who
+     is using the table is closing it.
+     Because of the translog_lock we don't need to have a lock on
+     key_del_lock.
+
    @return Operation status
      @retval 0      OK
      @retval 1      Error
@@ -1721,9 +1738,27 @@ void _ma_set_index_pagecache_callbacks(P
   exist a dup()-like call that would give us two different file descriptors.
 *************************************************************************/
 
-int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share,
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, const char *org_name,
                       File file_to_dup __attribute__((unused)))
 {
+  char *data_name= share->data_file_name.str;
+  char real_data_name[FN_REFLEN];
+
+  if (org_name)
+  {
+    fn_format(real_data_name, org_name, "", MARIA_NAME_DEXT, 4);
+    if (my_is_symlink(real_data_name))
+    {
+      if (my_realpath(real_data_name, real_data_name, MYF(0)) ||
+          (*maria_test_invalid_symlink)(real_data_name))
+      {
+        my_errno= HA_WRONG_CREATE_OPTION;
+        return 1;
+      }
+      data_name= real_data_name;
+    }
+  }
+
   info->dfile.file= share->bitmap.file.file=
     my_open(share->data_file_name.str, share->mode | O_SHARE,
             MYF(MY_WME));

=== modified file 'storage/maria/ma_page.c'
--- a/storage/maria/ma_page.c	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_page.c	2008-08-26 12:34:57 +0000
@@ -223,8 +223,8 @@ int _ma_dispose(register MARIA_HA *info,
 
   (void) _ma_lock_key_del(info, 0);
 
-  old_link= share->current_key_del;
-  share->current_key_del= pos;
+  old_link= share->key_del_current;
+  share->key_del_current= pos;
   page_no= pos / block_size;
   bzero(buff, share->keypage_header);
   _ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR);
@@ -347,7 +347,7 @@ my_off_t _ma_new(register MARIA_HA *info
   else
   {
     uchar *buff;
-    pos= share->current_key_del;                /* Protected */
+    pos= share->key_del_current;                /* Protected */
     DBUG_ASSERT(share->pagecache->block_size == block_size);
     if (!(buff= pagecache_read(share->pagecache,
                                &share->kfile,
@@ -362,15 +362,15 @@ my_off_t _ma_new(register MARIA_HA *info
         (single linked list):
       */
 #ifndef DBUG_OFF
-      my_off_t current_key_del;
+      my_off_t key_del_current;
 #endif
-      share->current_key_del= mi_sizekorr(buff+share->keypage_header);
+      share->key_del_current= mi_sizekorr(buff+share->keypage_header);
 #ifndef DBUG_OFF
-      current_key_del= share->current_key_del;
-      DBUG_ASSERT(current_key_del != share->state.key_del &&
-                  (current_key_del != 0) &&
-                  ((current_key_del == HA_OFFSET_ERROR) ||
-                   (current_key_del <=
+      key_del_current= share->key_del_current;
+      DBUG_ASSERT(key_del_current != share->state.key_del &&
+                  (key_del_current != 0) &&
+                  ((key_del_current == HA_OFFSET_ERROR) ||
+                   (key_del_current <=
                     (share->state.state.key_file_length - block_size))));
 #endif
     }

=== modified file 'storage/maria/ma_pagecache.c'
--- a/storage/maria/ma_pagecache.c	2008-08-25 18:29:05 +0000
+++ b/storage/maria/ma_pagecache.c	2008-10-20 13:03:34 +0000
@@ -267,16 +267,21 @@ static void info_unlink(PAGECACHE_PIN_IN
     list                 the list where to find the thread
     thread               thread ID (reference to the st_my_thread_var
                          of the thread)
+    any                  return any thread of the list
 
   RETURN
     0 - the thread was not found
-    pointer to the information node of the thread in the list
+    pointer to the information node of the thread in the list, or, if 'any',
+    to any thread of the list.
 */
 
 static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
-                                     struct st_my_thread_var *thread)
+                                     struct st_my_thread_var *thread,
+                                     my_bool any)
 {
   register PAGECACHE_PIN_INFO *i= list;
+  if (any)
+    return i;
   for(; i != 0; i= i->next)
     if (i->thread == thread)
       return i;
@@ -2150,18 +2155,22 @@ static void add_pin(PAGECACHE_BLOCK_LINK
   DBUG_VOID_RETURN;
 }
 
-static void remove_pin(PAGECACHE_BLOCK_LINK *block)
+static void remove_pin(PAGECACHE_BLOCK_LINK *block, my_bool any
+#ifdef DBUG_OFF
+                       __attribute__((unused))
+#endif
+                       )
 {
   DBUG_ENTER("remove_pin");
-  DBUG_PRINT("enter", ("block: 0x%lx  pins: %u",
+  DBUG_PRINT("enter", ("block: 0x%lx  pins: %u  any: %d",
                        (ulong) block,
-                       block->pins));
+                       block->pins, (int)any));
   PCBLOCK_INFO(block);
   DBUG_ASSERT(block->pins > 0);
   block->pins--;
 #ifndef DBUG_OFF
   {
-    PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var);
+    PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var, any);
     DBUG_ASSERT(info != 0);
     info_unlink(info);
     my_free((uchar*) info, MYF(0));
@@ -2183,7 +2192,7 @@ static void info_remove_lock(PAGECACHE_B
 {
   PAGECACHE_LOCK_INFO *info=
     (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
-                                     my_thread_var);
+                                     my_thread_var, FALSE);
   DBUG_ASSERT(info != 0);
   info_unlink((PAGECACHE_PIN_INFO *)info);
   my_free((uchar*)info, MYF(0));
@@ -2192,7 +2201,7 @@ static void info_change_lock(PAGECACHE_B
 {
   PAGECACHE_LOCK_INFO *info=
     (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
-                                     my_thread_var);
+                                     my_thread_var, FALSE);
   DBUG_ASSERT(info != 0);
   DBUG_ASSERT(info->write_lock != wl);
   info->write_lock= wl;
@@ -2448,6 +2457,8 @@ static void release_rdlock(PAGECACHE_BLO
   @param lock            lock change mode
   @param pin             pinchange mode
   @param file            File handler requesting pin
+  @param any             allow unpinning block pinned by any thread; possible
+                         only if not locked, see pagecache_unlock_by_link()
 
   @retval 0 OK
   @retval 1 Try to lock the block failed
@@ -2456,7 +2467,8 @@ static void release_rdlock(PAGECACHE_BLO
 static my_bool make_lock_and_pin(PAGECACHE *pagecache,
                                  PAGECACHE_BLOCK_LINK *block,
                                  enum pagecache_page_lock lock,
-                                 enum pagecache_page_pin pin)
+                                 enum pagecache_page_pin pin,
+                                 my_bool any)
 {
   DBUG_ENTER("make_lock_and_pin");
 
@@ -2465,16 +2477,20 @@ static my_bool make_lock_and_pin(PAGECAC
   if (block)
   {
     DBUG_PRINT("enter", ("block: 0x%lx (%u)  wrlocks: %u  rdlocks: %u  "
-                         "rdlocks_q: %u  pins: %u  lock: %s  pin: %s",
+                         "rdlocks_q: %u  pins: %u  lock: %s  pin: %s any %d",
                          (ulong)block, PCBLOCK_NUMBER(pagecache, block),
                          block->wlocks, block->rlocks, block->rlocks_queue,
                          block->pins,
                          page_cache_page_lock_str[lock],
-                         page_cache_page_pin_str[pin]));
+                         page_cache_page_pin_str[pin], (int)any));
     PCBLOCK_INFO(block);
   }
 #endif
 
+  DBUG_ASSERT(!any ||
+              ((lock == PAGECACHE_LOCK_LEFT_UNLOCKED) &&
+               (pin == PAGECACHE_UNPIN)));
+
   switch (lock) {
   case PAGECACHE_LOCK_WRITE:               /* free  -> write */
     /* Writelock and pin the buffer */
@@ -2500,7 +2516,7 @@ static my_bool make_lock_and_pin(PAGECAC
   case PAGECACHE_LOCK_LEFT_READLOCKED:     /* read  -> read  */
     if (pin == PAGECACHE_UNPIN)
     {
-      remove_pin(block);
+      remove_pin(block, FALSE);
     }
     if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
     {
@@ -2529,7 +2545,7 @@ static my_bool make_lock_and_pin(PAGECAC
   case PAGECACHE_LOCK_LEFT_UNLOCKED:       /* free  -> free  */
     if (pin == PAGECACHE_UNPIN)
     {
-      remove_pin(block);
+      remove_pin(block, any);
     }
     /* fall through */
   case PAGECACHE_LOCK_LEFT_WRITELOCKED:    /* write -> write */
@@ -2759,7 +2775,7 @@ void pagecache_unlock(PAGECACHE *pagecac
   inc_counter_for_resize_op(pagecache);
   /* See NOTE for pagecache_unlock about registering requests */
   block= find_block(pagecache, file, pageno, 0, 0,
-                    test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st);
+                    pin == PAGECACHE_PIN_LEFT_UNPINNED, &page_st);
   PCBLOCK_INFO(block);
   DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
   if (first_REDO_LSN_for_page)
@@ -2793,7 +2809,7 @@ void pagecache_unlock(PAGECACHE *pagecac
                         (ulong) block));
   }
 
-  if (make_lock_and_pin(pagecache, block, lock, pin))
+  if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
   {
     DBUG_ASSERT(0); /* should not happend */
   }
@@ -2863,7 +2879,7 @@ void pagecache_unpin(PAGECACHE *pagecach
   */
   if (make_lock_and_pin(pagecache, block,
                         PAGECACHE_LOCK_LEFT_READLOCKED,
-                        PAGECACHE_UNPIN))
+                        PAGECACHE_UNPIN, FALSE))
     DBUG_ASSERT(0);                           /* should not happend */
 
   remove_reader(block);
@@ -2886,15 +2902,22 @@ void pagecache_unpin(PAGECACHE *pagecach
   @brief Unlock/unpin page and put LSN stamp if it need
   (uses direct block/page pointer)
 
-  @param pagecache      pointer to a page cache data structure
-  @param link           direct link to page (returned by read or write)
-  @param lock           lock change
-  @param pin            pin page
+  @param pagecache       pointer to a page cache data structure
+  @param link            direct link to page (returned by read or write)
+  @param lock            lock change
+  @param pin             pin page
   @param first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
-  @param lsn            if it is not LSN_IMPOSSIBLE and it is bigger then
-                        LSN on the page it will be written on the page
-  @param was_changed    should be true if the page was write locked with
-                        direct link giving and the page was changed
+  @param lsn             if it is not LSN_IMPOSSIBLE and it is bigger then
+                         LSN on the page it will be written on the page
+  @param was_changed     should be true if the page was write locked with
+                         direct link giving and the page was changed
+  @param any             allow unpinning block pinned by any thread; possible
+                         only if not locked
+
+  @note 'any' is a hack so that _ma_bitmap_unpin_all() is allowed to unpin
+  non-locked bitmap pages pinned by other threads. Because it always uses
+  PAGECACHE_LOCK_LEFT_UNLOCKED and PAGECACHE_UNPIN
+  (see write_changed_bitmap()), the hack is limited to these conditions.
 */
 
 void pagecache_unlock_by_link(PAGECACHE *pagecache,
@@ -2902,7 +2925,8 @@ void pagecache_unlock_by_link(PAGECACHE 
                               enum pagecache_page_lock lock,
                               enum pagecache_page_pin pin,
                               LSN first_REDO_LSN_for_page,
-                              LSN lsn, my_bool was_changed)
+                              LSN lsn, my_bool was_changed,
+                              my_bool any)
 {
   DBUG_ENTER("pagecache_unlock_by_link");
   DBUG_PRINT("enter", ("block: 0x%lx  fd: %u  page: %lu  changed: %d  %s  %s",
@@ -2919,15 +2943,16 @@ void pagecache_unlock_by_link(PAGECACHE 
   DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED);
   DBUG_ASSERT(lock != PAGECACHE_LOCK_READ);
   DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE);
+  pagecache_pthread_mutex_lock(&pagecache->cache_lock);
   if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
       lock == PAGECACHE_LOCK_READ_UNLOCK)
   {
-    if (make_lock_and_pin(pagecache, block, lock, pin))
+    if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
       DBUG_ASSERT(0);                         /* should not happend */
+    pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
     DBUG_VOID_RETURN;
   }
 
-  pagecache_pthread_mutex_lock(&pagecache->cache_lock);
   /*
     As soon as we keep lock cache can be used, and we have lock because want
     unlock.
@@ -2976,7 +3001,7 @@ void pagecache_unlock_by_link(PAGECACHE 
                         (ulong) block));
   }
 
-  if (make_lock_and_pin(pagecache, block, lock, pin))
+  if (make_lock_and_pin(pagecache, block, lock, pin, any))
     DBUG_ASSERT(0);                           /* should not happend */
 
   /*
@@ -3039,7 +3064,7 @@ void pagecache_unpin_by_link(PAGECACHE *
   */
   if (make_lock_and_pin(pagecache, block,
                         PAGECACHE_LOCK_LEFT_READLOCKED,
-                        PAGECACHE_UNPIN))
+                        PAGECACHE_UNPIN, FALSE))
     DBUG_ASSERT(0); /* should not happend */
 
   /*
@@ -3056,6 +3081,146 @@ void pagecache_unpin_by_link(PAGECACHE *
   DBUG_VOID_RETURN;
 }
 
+/* description of how to change lock before and after read/write */
+struct rw_lock_change
+{
+  my_bool need_lock_change; /* need changing of lock at the end */
+  enum pagecache_page_lock new_lock; /* lock at the beginning */
+  enum pagecache_page_lock unlock_lock; /* lock at the end */
+};
+
+/* description of how to change pin before and after read/write */
+struct rw_pin_change
+{
+  enum pagecache_page_pin new_pin; /* pin status at the beginning */
+  enum pagecache_page_pin unlock_pin; /* pin status at the end */
+};
+
+/**
+  Depending on the lock which the user wants in pagecache_read(), we
+  need to acquire a first type of lock at start of pagecache_read(), and
+  downgrade it to a second type of lock at end. For example, if user
+  asked for no lock (PAGECACHE_LOCK_LEFT_UNLOCKED) this translates into
+  taking first a read lock PAGECACHE_LOCK_READ (to rightfully block on
+  existing write locks) then read then unlock the lock i.e. change lock
+  to PAGECACHE_LOCK_READ_UNLOCK (the "1" below tells that a change is
+  needed).
+*/ 
+
+static struct rw_lock_change lock_to_read[8]=
+{
+  { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
+    1,
+    PAGECACHE_LOCK_READ, PAGECACHE_LOCK_READ_UNLOCK
+  },
+  { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
+    0,
+    PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_LEFT_READLOCKED
+  },
+  { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
+    0,
+    PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_LEFT_WRITELOCKED
+  },
+  { /*PAGECACHE_LOCK_READ*/
+    1,
+    PAGECACHE_LOCK_READ, PAGECACHE_LOCK_LEFT_READLOCKED
+  },
+  { /*PAGECACHE_LOCK_WRITE*/
+    1,
+    PAGECACHE_LOCK_WRITE, PAGECACHE_LOCK_LEFT_WRITELOCKED
+  },
+  { /*PAGECACHE_LOCK_READ_UNLOCK*/
+    1,
+    PAGECACHE_LOCK_LEFT_READLOCKED, PAGECACHE_LOCK_READ_UNLOCK
+  },
+  { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
+    1,
+    PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_UNLOCK
+  },
+  { /*PAGECACHE_LOCK_WRITE_TO_READ*/
+    1,
+    PAGECACHE_LOCK_LEFT_WRITELOCKED, PAGECACHE_LOCK_WRITE_TO_READ
+  }
+};
+
+/**
+  Two sets of pin modes (every as for lock upper but for pinning). The
+  difference between sets if whether we are going to provide caller with
+  reference on the block or not
+*/
+
+static struct rw_pin_change lock_to_pin[2][8]=
+{
+  {
+    { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED,
+    },
+    { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE*/
+      PAGECACHE_PIN,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ_UNLOCK*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_UNPIN
+    },
+    { /*PAGECACHE_LOCK_WRITE_TO_READ*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_UNPIN
+    }
+  },
+  {
+    { /*PAGECACHE_LOCK_LEFT_UNLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_LEFT_READLOCKED*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED,
+    },
+    { /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ*/
+      PAGECACHE_PIN,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE*/
+      PAGECACHE_PIN,
+      PAGECACHE_PIN_LEFT_PINNED
+    },
+    { /*PAGECACHE_LOCK_READ_UNLOCK*/
+      PAGECACHE_PIN_LEFT_UNPINNED,
+      PAGECACHE_PIN_LEFT_UNPINNED
+    },
+    { /*PAGECACHE_LOCK_WRITE_UNLOCK*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_UNPIN
+    },
+    { /*PAGECACHE_LOCK_WRITE_TO_READ*/
+      PAGECACHE_PIN_LEFT_PINNED,
+      PAGECACHE_PIN_LEFT_PINNED,
+    }
+  }
+};
+
 
 /*
   @brief Read a block of data from a cached file into a buffer;
@@ -3072,34 +3237,11 @@ void pagecache_unpin_by_link(PAGECACHE *
   @return address from where the data is placed if successful, 0 - otherwise.
 
   @note Pin will be chosen according to lock parameter (see lock_to_pin)
-*/
-static enum pagecache_page_pin lock_to_pin[2][8]=
-{
-  {
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
-    PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
-    PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
-    PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_TO_READ*/
-  },
-  {
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
-    PAGECACHE_PIN               /*PAGECACHE_LOCK_READ*/,
-    PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
-    PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
-    PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
-    PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_WRITE_TO_READ*/
-  }
-};
-
 
-/**
   @note 'buff', if not NULL, must be long-aligned.
+
+  @note  If buff==0 then we provide reference on the page so should keep the
+  page pinned.
 */
 
 uchar *pagecache_read(PAGECACHE *pagecache,
@@ -3112,21 +3254,26 @@ uchar *pagecache_read(PAGECACHE *pagecac
                       PAGECACHE_BLOCK_LINK **page_link)
 {
   my_bool error= 0;
-  enum pagecache_page_pin pin= lock_to_pin[test(buff==0)][lock];
+  enum pagecache_page_pin
+    new_pin= lock_to_pin[buff==0][lock].new_pin,
+    unlock_pin= lock_to_pin[buff==0][lock].unlock_pin;
   PAGECACHE_BLOCK_LINK *fake_link;
   my_bool reg_request;
 #ifndef DBUG_OFF
   char llbuf[22];
   DBUG_ENTER("pagecache_read");
   DBUG_PRINT("enter", ("fd: %u  page: %s  buffer: 0x%lx level: %u  "
-                       "t:%s  %s  %s",
+                       "t:%s  (%d)%s->%s  %s->%s",
                        (uint) file->file, ullstr(pageno, llbuf),
                        (ulong) buff, level,
                        page_cache_page_type_str[type],
-                       page_cache_page_lock_str[lock],
-                       page_cache_page_pin_str[pin]));
-  DBUG_ASSERT(buff != 0 || (buff == 0 && (pin == PAGECACHE_PIN ||
-                                          pin == PAGECACHE_PIN_LEFT_PINNED)));
+                       lock_to_read[lock].need_lock_change,
+                       page_cache_page_lock_str[lock_to_read[lock].new_lock],
+                       page_cache_page_lock_str[lock_to_read[lock].unlock_lock],
+                       page_cache_page_pin_str[new_pin],
+                       page_cache_page_pin_str[unlock_pin]));
+  DBUG_ASSERT(buff != 0 || (buff == 0 && (unlock_pin == PAGECACHE_PIN ||
+                                          unlock_pin == PAGECACHE_PIN_LEFT_PINNED)));
   DBUG_ASSERT(pageno < ((ULL(1)) << 40));
 #endif
 
@@ -3153,10 +3300,10 @@ restart:
     inc_counter_for_resize_op(pagecache);
     pagecache->global_cache_r_requests++;
     /* See NOTE for pagecache_unlock about registering requests. */
-    reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
-                  (pin == PAGECACHE_PIN));
+    reg_request= ((new_pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
+                  (new_pin == PAGECACHE_PIN));
     block= find_block(pagecache, file, pageno, level,
-                      test(lock == PAGECACHE_LOCK_WRITE),
+                      lock == PAGECACHE_LOCK_WRITE,
                       reg_request, &page_st);
     DBUG_PRINT("info", ("Block type: %s current type %s",
                         page_cache_page_type_str[block->type],
@@ -3190,7 +3337,8 @@ restart:
         block->type == PAGECACHE_EMPTY_PAGE)
       block->type= type;
 
-    if (make_lock_and_pin(pagecache, block, lock, pin))
+    if (make_lock_and_pin(pagecache, block, lock_to_read[lock].new_lock,
+                          new_pin, FALSE))
     {
       /*
         We failed to write lock the block, cache is unlocked,
@@ -3238,12 +3386,20 @@ restart:
     }
 
     remove_reader(block);
+    if (lock_to_read[lock].need_lock_change)
+    {
+      if (make_lock_and_pin(pagecache, block,
+                            lock_to_read[lock].unlock_lock,
+                            unlock_pin, FALSE))
+        DBUG_ASSERT(0);
+    }
     /*
       Link the block into the LRU chain if it's the last submitted request
       for the block and block will not be pinned.
       See NOTE for pagecache_unlock about registering requests.
     */
-    if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
+    if (unlock_pin == PAGECACHE_PIN_LEFT_UNPINNED ||
+        unlock_pin == PAGECACHE_UNPIN)
       unreg_request(pagecache, block, 1);
     else
       *page_link= block;
@@ -3334,7 +3490,7 @@ static my_bool pagecache_delete_internal
   /* Cache is locked, so we can relese page before freeing it */
   if (make_lock_and_pin(pagecache, block,
                         PAGECACHE_LOCK_WRITE_UNLOCK,
-                        PAGECACHE_UNPIN))
+                        PAGECACHE_UNPIN, FALSE))
     DBUG_ASSERT(0);
   DBUG_ASSERT(block->hash_link->requests > 0);
   page_link->requests--;
@@ -3396,7 +3552,7 @@ my_bool pagecache_delete_by_link(PAGECAC
       make_lock_and_pin() can't fail here, because we are keeping pin on the
       block and it can't be evicted (which is cause of lock fail and retry)
     */
-    if (make_lock_and_pin(pagecache, block, lock, pin))
+    if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
       DBUG_ASSERT(0);
 
     /*
@@ -3461,6 +3617,18 @@ void pagecache_add_level_by_link(PAGECAC
   write locked before) or PAGECACHE_LOCK_WRITE (delete will write
   lock page before delete)
 */
+static enum pagecache_page_pin lock_to_pin_one_phase[8]=
+{
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/,
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/,
+  PAGECACHE_PIN_LEFT_PINNED   /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/,
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/,
+  PAGECACHE_PIN               /*PAGECACHE_LOCK_WRITE*/,
+  PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/,
+  PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_UNLOCK*/,
+  PAGECACHE_UNPIN             /*PAGECACHE_LOCK_WRITE_TO_READ*/
+};
+
 my_bool pagecache_delete(PAGECACHE *pagecache,
                          PAGECACHE_FILE *file,
                          pgcache_page_no_t pageno,
@@ -3468,7 +3636,7 @@ my_bool pagecache_delete(PAGECACHE *page
                          my_bool flush)
 {
   my_bool error= 0;
-  enum pagecache_page_pin pin= lock_to_pin[0][lock];
+  enum pagecache_page_pin pin= lock_to_pin_one_phase[lock];
   DBUG_ENTER("pagecache_delete");
   DBUG_PRINT("enter", ("fd: %u  page: %lu  %s  %s",
                        (uint) file->file, (ulong) pageno,
@@ -3514,7 +3682,7 @@ restart:
     if (pin == PAGECACHE_PIN)
       reg_requests(pagecache, block, 1);
     DBUG_ASSERT(block != 0);
-    if (make_lock_and_pin(pagecache, block, lock, pin))
+    if (make_lock_and_pin(pagecache, block, lock, pin, FALSE))
     {
       /*
         We failed to writelock the block, cache is unlocked, and last write
@@ -3584,15 +3752,7 @@ my_bool pagecache_delete_pages(PAGECACHE
   @retval 1 Error.
 */
 
-/* description of how to change lock before and after write */
-struct write_lock_change
-{
-  int need_lock_change; /* need changing of lock at the end of write */
-  enum pagecache_page_lock new_lock; /* lock at the beginning */
-  enum pagecache_page_lock unlock_lock; /* lock at the end */
-};
-
-static struct write_lock_change write_lock_change_table[]=
+static struct rw_lock_change write_lock_change_table[]=
 {
   {1,
    PAGECACHE_LOCK_WRITE,
@@ -3616,14 +3776,8 @@ static struct write_lock_change write_lo
    PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/
 };
 
-/* description of how to change pin before and after write */
-struct write_pin_change
-{
-  enum pagecache_page_pin new_pin; /* pin status at the beginning */
-  enum pagecache_page_pin unlock_pin; /* pin status at the end */
-};
 
-static struct write_pin_change write_pin_change_table[]=
+static struct rw_pin_change write_pin_change_table[]=
 {
   {PAGECACHE_PIN_LEFT_PINNED,
    PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/,
@@ -3705,10 +3859,10 @@ restart:
     reg_request= ((pin == PAGECACHE_PIN_LEFT_UNPINNED) ||
                   (pin == PAGECACHE_PIN));
     block= find_block(pagecache, file, pageno, level,
-                      test(write_mode != PAGECACHE_WRITE_DONE &&
-                           lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
-                           lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
-                           lock != PAGECACHE_LOCK_WRITE_TO_READ),
+                      (write_mode != PAGECACHE_WRITE_DONE &&
+                       lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
+                       lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
+                       lock != PAGECACHE_LOCK_WRITE_TO_READ),
                       reg_request, &page_st);
     if (!block)
     {
@@ -3736,7 +3890,7 @@ restart:
                           write_lock_change_table[lock].new_lock,
                           (need_lock_change ?
                            write_pin_change_table[pin].new_pin :
-                           pin)))
+                           pin), FALSE))
     {
       /*
         We failed to writelock the block, cache is unlocked, and last write
@@ -3823,7 +3977,7 @@ restart:
       */
       if (make_lock_and_pin(pagecache, block,
                             write_lock_change_table[lock].unlock_lock,
-                            write_pin_change_table[pin].unlock_pin))
+                            write_pin_change_table[pin].unlock_pin, FALSE))
         DBUG_ASSERT(0);
     }
 
@@ -4034,7 +4188,7 @@ static int flush_cached_blocks(PAGECACHE
     DBUG_ASSERT(block->wlocks == 0);
     DBUG_ASSERT(block->pins == 0);
     if (make_lock_and_pin(pagecache, block,
-                          PAGECACHE_LOCK_WRITE, PAGECACHE_PIN))
+                          PAGECACHE_LOCK_WRITE, PAGECACHE_PIN, FALSE))
       DBUG_ASSERT(0);
     DBUG_ASSERT(block->pins == 1);
 
@@ -4068,7 +4222,7 @@ static int flush_cached_blocks(PAGECACHE
 
     if (make_lock_and_pin(pagecache, block,
                           PAGECACHE_LOCK_WRITE_UNLOCK,
-                          PAGECACHE_UNPIN))
+                          PAGECACHE_UNPIN, FALSE))
       DBUG_ASSERT(0);
 
     pagecache->global_cache_write++;

=== modified file 'storage/maria/ma_pagecache.h'
--- a/storage/maria/ma_pagecache.h	2008-03-04 11:58:21 +0000
+++ b/storage/maria/ma_pagecache.h	2008-10-14 15:18:14 +0000
@@ -242,7 +242,8 @@ extern void pagecache_unlock_by_link(PAG
                                      enum pagecache_page_lock lock,
                                      enum pagecache_page_pin pin,
                                      LSN first_REDO_LSN_for_page,
-                                     LSN lsn, my_bool was_changed);
+                                     LSN lsn, my_bool was_changed,
+                                     my_bool any);
 extern void pagecache_unpin(PAGECACHE *pagecache,
                             PAGECACHE_FILE *file,
                             pgcache_page_no_t pageno,

=== modified file 'storage/maria/ma_preload.c'
--- a/storage/maria/ma_preload.c	2008-03-04 11:47:02 +0000
+++ b/storage/maria/ma_preload.c	2008-10-14 15:18:14 +0000
@@ -104,7 +104,7 @@ int maria_preload(MARIA_HA *info, ulongl
     else /* otherwise it stays in cache: */
       pagecache_unlock_by_link(share->pagecache, page_link,
                                PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN,
-                               LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE);
+                               LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE, FALSE);
   }
 
   my_free(buff, MYF(0));

=== modified file 'storage/maria/ma_recovery.c'
--- a/storage/maria/ma_recovery.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_recovery.c	2008-10-20 13:03:34 +0000
@@ -3292,13 +3292,13 @@ my_bool _ma_reenable_logging_for_table(M
       /*
         We are going to change callbacks; if a page is flushed at this moment
         this can cause race conditions, that's one reason to flush pages
-        now. Other reasons: a checkpoint could be running and miss pages. As
+        now. Other reasons: a checkpoint could be running and miss pages; the
+        pages have type PAGECACHE_PLAIN_PAGE which should not remain. As
         there are no REDOs for pages, them, bitmaps and the state also have to
-        be flushed and synced. Leaving non-dirty pages in cache is ok, when
-        they become dirty again they will have their type corrected.
+        be flushed and synced.
       */
       if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
-                                FLUSH_KEEP, FLUSH_KEEP) ||
+                                FLUSH_RELEASE, FLUSH_RELEASE) ||
           _ma_state_info_write(share, 1|4) ||
           _ma_sync_table_files(info))
         DBUG_RETURN(1);

=== modified file 'storage/maria/ma_rename.c'
--- a/storage/maria/ma_rename.c	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_rename.c	2008-08-06 14:03:27 +0000
@@ -69,9 +69,9 @@ int maria_rename(const char *old_name, c
     LSN lsn;
     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
     uint old_name_len= strlen(old_name)+1, new_name_len= strlen(new_name)+1;
-    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    old_name;
+    log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar*)old_name;
     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= old_name_len;
-    log_array[TRANSLOG_INTERNAL_PARTS + 1].str=    new_name;
+    log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (uchar*)new_name;
     log_array[TRANSLOG_INTERNAL_PARTS + 1].length= new_name_len;
     /*
       For this record to be of any use for Recovery, we need the upper

=== modified file 'storage/maria/ma_search.c'
--- a/storage/maria/ma_search.c	2008-08-24 13:29:34 +0000
+++ b/storage/maria/ma_search.c	2008-09-26 08:16:35 +0000
@@ -931,7 +931,7 @@ uint _ma_get_static_key(MARIA_KEY *key, 
 /**
    Skip over static length key from key-block
 
-  @fn _ma_skip_pack_key()
+  @fn _ma_skip_static_key()
   @param key       Keyinfo and buffer that can be used
   @param nod_flag  If nod: Length of node pointer, else zero.
   @param key       Points at key
@@ -1049,6 +1049,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key
       }
       else
       {
+        /* Key that is not packed against previous key */
         if (keyseg->flag & HA_NULL_PART)
         {
           if (!length--)                        /* Null part */
@@ -1121,6 +1122,9 @@ uint _ma_get_pack_key(MARIA_KEY *int_key
   @param nod_flag  If nod: Length of node pointer, else zero.
   @param key       Points at key
 
+  @note
+  This is in principle a simpler version of _ma_get_pack_key()
+
   @retval pointer to next key
 */
 
@@ -1150,6 +1154,14 @@ uchar *_ma_skip_pack_key(MARIA_KEY *key,
 	page+= length;
 	continue;
       }
+      if ((keyseg->flag & HA_NULL_PART) && length)
+      {
+        /*
+          Keys that can have null use length+1 as the length for date as the
+          number 0 is reserved for keys that have a NULL value
+        */
+        length--;
+      }
       page+= length;
     }
     else
@@ -1846,11 +1858,14 @@ _ma_calc_var_key_length(const MARIA_KEY 
 
     prefix byte(s) The high bit is set if this is a prefix for the prev key
     length         Packed length if the previous was a prefix byte
-    [length]       data bytes ('length' bytes)
+    [data_length]  data bytes ('length' bytes)
     next-key-seg   Next key segments
 
     If the first segment can have NULL:
-    The length is 0 for NULLS and 1+length for not null columns.
+       If key was packed
+         data_length is length of rest of key
+       If key was not packed
+         The data_length is 0 for NULLS and 1+data_length for not null columns
 */
 
 int

=== modified file 'storage/maria/ma_state.c'
--- a/storage/maria/ma_state.c	2008-08-18 22:21:22 +0000
+++ b/storage/maria/ma_state.c	2008-08-28 18:52:23 +0000
@@ -91,7 +91,7 @@ my_bool _ma_setup_live_state(MARIA_HA *i
     It's enough to compare trids here (instead of calling
     tranman_can_read_from) as history->trid is a commit_trid
   */
-  while (trn->trid < history->trid)
+  while (trn->trid < history->trid && history->trid != ~(TrID)0)
     history= history->next;
   pthread_mutex_unlock(&share->intern_lock);
   /* The current item can't be deleted as it's the first one visible for us */

=== modified file 'storage/maria/ma_static.c'
--- a/storage/maria/ma_static.c	2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_static.c	2008-10-16 08:54:53 +0000
@@ -13,6 +13,7 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+
 /*
   Static variables for MARIA library. All definied here for easy making of
   a shared library
@@ -64,6 +65,9 @@ HASH maria_stored_state;
 */
 TRN dummy_transaction_object;
 
+/* a WT_RESOURCE_TYPE for transactions waiting on a unique key conflict */
+WT_RESOURCE_TYPE ma_rc_dup_unique={ wt_resource_id_memcmp, 0};
+
 /* Enough for comparing if number is zero */
 uchar maria_zero_string[]= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
@@ -94,3 +98,10 @@ uint32 maria_readnext_vec[]=
   SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER,
   SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER
 };
+
+static int always_valid(const char *filename __attribute__((unused)))
+{
+  return 0;
+}
+
+int (*maria_test_invalid_symlink)(const char *filename)= always_valid;

=== modified file 'storage/maria/ma_write.c'
--- a/storage/maria/ma_write.c	2008-08-25 18:23:18 +0000
+++ b/storage/maria/ma_write.c	2008-10-20 09:16:47 +0000
@@ -180,15 +180,83 @@ int maria_write(MARIA_HA *info, uchar *r
       }
       else
       {
-        if (keyinfo->ck_insert(info,
-                               (*keyinfo->make_key)(info, &int_key, i,
-                                                    buff, record, filepos,
-                                                    info->trn->trid)))
+        while (keyinfo->ck_insert(info,
+                                  (*keyinfo->make_key)(info, &int_key, i,
+                                                       buff, record, filepos,
+                                                       info->trn->trid)))
         {
-          if (local_lock_tree)
-            rw_unlock(&keyinfo->root_lock);
+          TRN *blocker;
           DBUG_PRINT("error",("Got error: %d on write",my_errno));
-          goto err;
+          /*
+            explicit check to filter out temp tables, they aren't
+            transactional and don't have a proper TRN so the code
+            below doesn't work for them.
+            Also, filter out non-thread maria use, and table modified in
+            the same transaction.
+          */
+          if (!local_lock_tree)
+            goto err;
+          if (info->dup_key_trid == info->trn->trid)
+          {
+	    rw_unlock(&keyinfo->root_lock);
+            goto err;
+          }
+          /* Different TrIDs: table must be transactional */
+          DBUG_ASSERT(share->base.born_transactional);
+          /*
+            If transactions are disabled, and dup_key_trid is different from
+            our TrID, it must be ALTER TABLE with dup_key_trid==0 (no
+            transaction). ALTER TABLE does have MARIA_HA::TRN not dummy but
+            puts TrID=0 in rows/keys.
+          */
+          DBUG_ASSERT(share->now_transactional ||
+                      (info->dup_key_trid == 0));
+          blocker= trnman_trid_to_trn(info->trn, info->dup_key_trid);
+          /*
+            if blocker TRN was not found, it means that the conflicting
+            transaction was committed long time ago. It could not be
+            aborted, as it would have to wait on the key tree lock
+            to remove the conflicting key it has inserted.
+          */
+          if (!blocker || blocker->commit_trid != ~(TrID)0)
+          { /* committed */
+            if (blocker)
+              pthread_mutex_unlock(& blocker->state_lock);
+            rw_unlock(&keyinfo->root_lock);
+            goto err;
+          }
+          rw_unlock(&keyinfo->root_lock);
+          {
+            /* running. now we wait */
+            WT_RESOURCE_ID rc;
+            int res;
+
+            rc.type= &ma_rc_dup_unique;
+            rc.value= (intptr)blocker; /* TODO savepoint id when we'll have them */
+            res= wt_thd_will_wait_for(info->trn->wt, blocker->wt, & rc);
+            if (res != WT_OK)
+            {
+              pthread_mutex_unlock(& blocker->state_lock);
+              my_errno= HA_ERR_LOCK_DEADLOCK;
+              goto err;
+            }
+            {
+              const char *old_proc_info= proc_info_hook(0,
+                    "waiting for a resource", __func__, __FILE__, __LINE__);
+
+              res= wt_thd_cond_timedwait(info->trn->wt, &
blocker->state_lock);
+
+              proc_info_hook(0, old_proc_info, __func__, __FILE__, __LINE__);
+            }
+            pthread_mutex_unlock(& blocker->state_lock);
+            if (res != WT_OK)
+            {
+              my_errno= res == WT_TIMEOUT ? HA_ERR_LOCK_WAIT_TIMEOUT
+                                          : HA_ERR_LOCK_DEADLOCK;
+              goto err;
+            }
+          }
+          rw_wrlock(&keyinfo->root_lock);
         }
       }
 
@@ -597,9 +665,22 @@ static int w_search(register MARIA_HA *i
     else /* not HA_FULLTEXT, normal HA_NOSAME key */
     {
       DBUG_PRINT("warning", ("Duplicate key"));
+      /*
+        TODO
+        When the index will support true versioning - with multiple
+        identical values in the UNIQUE index, invisible to each other -
+        the following should be changed to "continue inserting keys, at the
+        end (of the row or statement) wait". Until it's done we cannot properly
+        support deadlock timeouts.
+      */
+      /*
+        transaction that has inserted the conflicting key is in progress.
+        wait for it to be committed or aborted.
+      */
+      info->dup_key_trid= _ma_trid_from_key(&tmp_key);
       info->dup_key_pos= dup_key_pos;
       my_afree((uchar*) temp_buff);
-      my_errno=HA_ERR_FOUND_DUPP_KEY;
+      my_errno= HA_ERR_FOUND_DUPP_KEY;
       DBUG_RETURN(-1);
     }
   }
@@ -1757,11 +1838,11 @@ my_bool _ma_log_new(MARIA_HA *info, my_o
   page_store(log_data + FILEID_STORE_SIZE, page);
 
   /* Store link to next unused page */
-  if (info->used_key_del == 2)
+  if (info->key_del_used == 2)
     page= 0;                                    /* key_del not changed */
   else
-    page= ((share->current_key_del == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
-           share->current_key_del / share->block_size);
+    page= ((share->key_del_current == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
+           share->key_del_current / share->block_size);
 
   page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
   key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr);
@@ -1826,7 +1907,7 @@ my_bool _ma_log_change(MARIA_HA *info, m
     log_pos[0]= KEY_OP_CHECK;
     int2store(log_pos+1, page_length);
     int4store(log_pos+3, crc);
-    log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= (char *) log_pos;
+    log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos;
     log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
     extra_length+= 7;
     translog_parts++;

=== modified file 'storage/maria/maria_chk.c'
--- a/storage/maria/maria_chk.c	2008-07-09 21:25:29 +0000
+++ b/storage/maria/maria_chk.c	2008-10-20 09:16:47 +0000
@@ -443,6 +443,8 @@ static void usage(void)
 #endif
                       printf(", they will be used\n\
                       in a round-robin fashion.\n\
+  --require-control-file  Abort if we can't find/read the maria_log_control\n\
+                          file\n\
   -s, --silent	      Only print errors.  One can use two -s to make\n\
 		      maria_chk very silent.\n\
   -v, --verbose       Print more information. This can be used with\n\
@@ -1187,7 +1189,7 @@ static int maria_chk(HA_CHECK *param, ch
         (void)(my_close(info->dfile.file, MYF(MY_WME))); /* Close new file */
         error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT,
                                        MYF(0));
-        if (_ma_open_datafile(info,info->s, -1))
+        if (_ma_open_datafile(info,info->s, NullS, -1))
           error=1;
         param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */
         param->read_cache.file= info->dfile.file;

=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h	2008-08-25 11:49:47 +0000
+++ b/storage/maria/maria_def.h	2008-10-14 21:23:33 +0000
@@ -29,6 +29,7 @@
 #include "ma_loghandler.h"
 #include "ma_control_file.h"
 #include "ma_state.h"
+#include <waiting_threads.h>
 
 /* For testing recovery */
 #ifdef TO_BE_REMOVED
@@ -336,7 +337,7 @@ typedef struct st_maria_share
   size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
   size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
   invalidator_by_filename invalidator;	/* query cache invalidator */
-  my_off_t current_key_del;		/* delete links for index pages */
+  my_off_t key_del_current;		/* delete links for index pages */
   ulong this_process;			/* processid */
   ulong last_process;			/* For table-change-check */
   ulong last_version;			/* Version on start */
@@ -380,12 +381,13 @@ typedef struct st_maria_share
   */
   my_bool now_transactional;
   my_bool have_versioning;
-  my_bool used_key_del;                         /* != 0 if key_del is locked */
+  my_bool key_del_used;                         /* != 0 if key_del is locked */
 #ifdef THREAD
   THR_LOCK lock;
   void (*lock_restore_status)(void *);
   pthread_mutex_t intern_lock;		/* Locking for use with _locking */
-  pthread_cond_t intern_cond;
+  pthread_mutex_t key_del_lock;
+  pthread_cond_t  key_del_cond;
 #endif
   my_off_t mmaped_length;
   uint nonmmaped_inserts;		/* counter of writing in
@@ -460,7 +462,7 @@ typedef struct st_maria_block_scan
 struct st_maria_handler
 {
   MARIA_SHARE *s;			/* Shared between open:s */
-  struct st_transaction *trn;           /* Pointer to active transaction */
+  struct st_ma_transaction *trn;           /* Pointer to active transaction */
   MARIA_STATUS_INFO *state, state_save;
   MARIA_STATUS_INFO *state_start;       /* State at start of transaction */
   MARIA_ROW cur_row;                    /* The active row that we just read */
@@ -491,13 +493,14 @@ struct st_maria_handler
   uint32 int_keytree_version;		/* -""- */
   int (*read_record)(MARIA_HA *, uchar*, MARIA_RECORD_POS);
   invalidator_by_filename invalidator;	/* query cache invalidator */
-  ulonglong last_auto_increment;  	/* auto value at start of statement */
+  ulonglong last_auto_increment;        /* auto value at start of statement */
   ulong this_unique;			/* uniq filenumber or thread */
   ulong last_unique;			/* last unique number */
   ulong this_loop;			/* counter for this open */
   ulong last_loop;			/* last used counter */
   MARIA_RECORD_POS save_lastpos;
   MARIA_RECORD_POS dup_key_pos;
+  TrID             dup_key_trid;
   my_off_t pos;				/* Intern variable */
   my_off_t last_keypage;		/* Last key page read */
   my_off_t last_search_keypage;		/* Last keypage when searching */
@@ -534,7 +537,7 @@ struct st_maria_handler
   int save_lastinx;
   uint preload_buff_size;		/* When preloading indexes */
   uint16 last_used_keyseg;              /* For MARIAMRG */
-  uint8 used_key_del;                   /* != 0 if key_del is used */
+  uint8 key_del_used;                   /* != 0 if key_del is used */
   my_bool was_locked;			/* Was locked in panic */
   my_bool append_insert_at_end;		/* Set if concurrent insert */
   my_bool quick_mode;
@@ -695,6 +698,19 @@ struct st_maria_handler
 #define get_pack_length(length) ((length) >= 255 ? 3 : 1)
 #define _ma_have_versioning(info) ((info)->row_flag & ROW_FLAG_TRANSID)
 
+/**
+   Sets table's trn and prints debug information
+   @param tbl              MARIA_HA of table
+   @param newtrn           what to put into tbl->trn
+   @note cast of newtrn is because %p of NULL gives warning (NULL is int)
+*/
+#define _ma_set_trn_for_table(tbl, newtrn) do {                         \
+    DBUG_PRINT("info",("table: %p trn: %p -> %p",                       \
+                       (tbl), (tbl)->trn, (void *)(newtrn)));           \
+    (tbl)->trn= (newtrn);                                               \
+  } while (0)
+
+
 #define MARIA_MIN_BLOCK_LENGTH	20		/* Because of delete-link */
 /* Don't use to small record-blocks */
 #define MARIA_EXTEND_BLOCK_LENGTH	20
@@ -781,7 +797,6 @@ typedef struct st_pinned_page
   my_bool changed;
 } MARIA_PINNED_PAGE;
 
-
 /* Prototypes for intern functions */
 extern int _ma_read_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS);
 extern int _ma_read_rnd_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
@@ -1085,7 +1100,8 @@ void _ma_def_scan_restore_pos(MARIA_HA *
 
 extern MARIA_HA *_ma_test_if_reopen(const char *filename);
 my_bool _ma_check_table_is_closed(const char *name, const char *where);
-int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup);
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, const char *org_name,
+                      File file_to_dup);
 int _ma_open_keyfile(MARIA_SHARE *share);
 void _ma_setup_functions(register MARIA_SHARE *share);
 my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size);

=== modified file 'storage/maria/trnman.c'
--- a/storage/maria/trnman.c	2008-07-05 11:03:21 +0000
+++ b/storage/maria/trnman.c	2008-09-01 19:43:11 +0000
@@ -44,9 +44,9 @@ static pthread_mutex_t LOCK_trn_list;
 static TRN *pool;
 
 /* a hash for committed transactions that maps trid to a TRN structure */
-static LF_HASH trid_to_committed_trn;
+static LF_HASH trid_to_trn;
 
-/* an array that maps short_trid of an active transaction to a TRN structure */
+/* an array that maps short_id of an active transaction to a TRN structure */
 static TRN **short_trid_to_active_trn;
 
 /* locks for short_trid_to_active_trn and pool */
@@ -81,6 +81,17 @@ void trnman_reset_locked_tables(TRN *trn
   trn->locked_tables= locked_tables;
 }
 
+static void wt_thd_release_self(TRN *trn)
+{
+  if (trn->wt)
+  {
+    WT_RESOURCE_ID rc;
+    rc.type= &ma_rc_dup_unique;
+    rc.value= (intptr)trn;
+    wt_thd_release(trn->wt, & rc);
+    trn->wt= 0;
+  }
+}
 
 static my_bool
 default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)),
@@ -92,24 +103,6 @@ default_trnman_end_trans_hook(TRN *trn _
 }
 
 
-/*
-  NOTE
-    Just as short_id doubles as loid, this function doubles as
-    short_trid_to_LOCK_OWNER. See the compile-time assert below.
-*/
-
-#ifdef NOT_USED
-static TRN *short_trid_to_TRN(uint16 short_trid)
-{
-  TRN *trn;
-  compile_time_assert(offsetof(TRN, locks) == 0);
-  my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
-  trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]);
-  my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
-  return (TRN *)trn;
-}
-#endif
-
 static uchar *trn_get_hash_key(const uchar *trn, size_t *len,
                               my_bool unused __attribute__ ((unused)))
 {
@@ -136,7 +129,7 @@ int trnman_init(TrID initial_trid)
                                      MYF(MY_WME|MY_ZEROFILL));
   if (unlikely(!short_trid_to_active_trn))
     DBUG_RETURN(1);
-  short_trid_to_active_trn--; /* min short_trid is 1 */
+  short_trid_to_active_trn--; /* min short_id is 1 */
 
   /*
     Initialize lists.
@@ -165,17 +158,13 @@ int trnman_init(TrID initial_trid)
 
   pool= 0;
   global_trid_generator= initial_trid;
-  lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE,
+  lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE,
                0, 0, trn_get_hash_key, 0);
   DBUG_PRINT("info", ("pthread_mutex_init LOCK_trn_list"));
   pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST);
   my_atomic_rwlock_init(&LOCK_short_trid_to_trn);
   my_atomic_rwlock_init(&LOCK_pool);
 
-#ifdef NOT_USED
-  lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000);
-#endif
-
   DBUG_RETURN(0);
 }
 
@@ -190,7 +179,7 @@ void trnman_destroy()
 
   if (short_trid_to_active_trn == NULL) /* trnman already destroyed */
     DBUG_VOID_RETURN;
-  DBUG_ASSERT(trid_to_committed_trn.count == 0);
+  DBUG_ASSERT(trid_to_trn.count == 0);
   DBUG_ASSERT(trnman_active_transactions == 0);
   DBUG_ASSERT(trnman_committed_transactions == 0);
   DBUG_ASSERT(active_list_max.prev == &active_list_min);
@@ -201,20 +190,17 @@ void trnman_destroy()
   {
     TRN *trn= pool;
     pool= pool->next;
-    DBUG_ASSERT(trn->locks.mutex == 0);
-    DBUG_ASSERT(trn->locks.cond == 0);
+    pthread_mutex_destroy(&trn->state_lock);
     my_free((void *)trn, MYF(0));
   }
-  lf_hash_destroy(&trid_to_committed_trn);
+  lf_hash_destroy(&trid_to_trn);
   DBUG_PRINT("info", ("pthread_mutex_destroy LOCK_trn_list"));
   pthread_mutex_destroy(&LOCK_trn_list);
   my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn);
   my_atomic_rwlock_destroy(&LOCK_pool);
   my_free((void *)(short_trid_to_active_trn+1), MYF(0));
   short_trid_to_active_trn= NULL;
-#ifdef NOT_USED
-  lockman_destroy(&maria_lockman);
-#endif
+
   DBUG_VOID_RETURN;
 }
 
@@ -233,11 +219,13 @@ static TrID new_trid()
   DBUG_RETURN(++global_trid_generator);
 }
 
-static void set_short_trid(TRN *trn)
+static uint get_short_trid(TRN *trn)
 {
   int i= (int) ((global_trid_generator + (intptr)trn) * 312089 %
                 SHORT_TRID_MAX + 1);
-  for ( ; !trn->short_id ; i= 1)
+  uint res=0;
+
+  for ( ; !res ; i= 1)
   {
     my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn);
     for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */
@@ -246,12 +234,13 @@ static void set_short_trid(TRN *trn)
       if (short_trid_to_active_trn[i] == NULL &&
           my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
       {
-        trn->short_id= i;
+        res= i;
         break;
       }
     }
     my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn);
   }
+  return res;
 }
 
 /*
@@ -260,9 +249,9 @@ static void set_short_trid(TRN *trn)
     mutex and cond will be used for lock waits
 */
 
-TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond,
-                    void *stack_end)
+TRN *trnman_new_trn(WT_THD *wt)
 {
+  int res;
   TRN *trn;
   DBUG_ENTER("trnman_new_trn");
 
@@ -270,7 +259,7 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
     we have a mutex, to do simple things under it - allocate a TRN,
     increment trnman_active_transactions, set trn->min_read_from.
 
-    Note that all the above is fast. generating short_trid may be slow,
+    Note that all the above is fast. generating short_id may be slow,
     as it involves scanning a large array - so it's done outside of the
     mutex.
   */
@@ -307,8 +296,10 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
       return 0;
     }
     trnman_allocated_transactions++;
+    pthread_mutex_init(&trn->state_lock, MY_MUTEX_INIT_FAST);
   }
-  trn->pins= lf_hash_get_pins(&trid_to_committed_trn, stack_end);
+  trn->wt= wt;
+  trn->pins= lf_hash_get_pins(&trid_to_trn);
   if (!trn->pins)
   {
     trnman_free_trn(trn);
@@ -320,7 +311,6 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
   trn->min_read_from= active_list_min.next->trid;
 
   trn->trid= new_trid();
-  trn->short_id= 0;
 
   trn->next= &active_list_max;
   trn->prev= active_list_max.prev;
@@ -337,25 +327,27 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
     trn->min_read_from= trn->trid + 1;
   }
 
-  trn->commit_trid= 0;
+  trn->commit_trid=  ~(TrID)0;
   trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0;
   trn->used_tables= 0;
 
-  trn->locks.mutex= mutex;
-  trn->locks.cond= cond;
-  trn->locks.waiting_for= 0;
-  trn->locks.all_locks= 0;
-#ifdef NOT_USED
-  trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc);
-#endif
-
   trn->locked_tables= 0;
 
   /*
     only after the following function TRN is considered initialized,
     so it must be done the last
   */
-  set_short_trid(trn);
+  pthread_mutex_lock(&trn->state_lock);
+  trn->short_id= get_short_trid(trn);
+  pthread_mutex_unlock(&trn->state_lock);
+
+  res= lf_hash_insert(&trid_to_trn, trn->pins, &trn);
+  DBUG_ASSERT(res <= 0);
+  if (res)
+  {
+    trnman_end_trn(trn, 0);
+    return 0;
+  }
 
   DBUG_PRINT("exit", ("trn: x%lx  trid: 0x%lu",
                       (ulong) trn, (ulong) trn->trid));
@@ -391,6 +383,7 @@ my_bool trnman_end_trn(TRN *trn, my_bool
   /* if a rollback, all UNDO records should have been executed */
   DBUG_ASSERT(commit || trn->undo_lsn == 0);
   DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
+
   pthread_mutex_lock(&LOCK_trn_list);
 
   /* remove from active list */
@@ -423,58 +416,35 @@ my_bool trnman_end_trn(TRN *trn, my_bool
     }
   }
 
+  pthread_mutex_lock(&trn->state_lock);
+  trn->commit_trid= global_trid_generator;
+  wt_thd_release_self(trn);
+  pthread_mutex_unlock(&trn->state_lock);
+
   /*
     if transaction is committed and it was not the only active transaction -
-    add it to the committed list (which is used for read-from relation)
+    add it to the committed list
   */
   if (commit && active_list_min.next != &active_list_max)
   {
-    trn->commit_trid= global_trid_generator;
     trn->next= &committed_list_max;
     trn->prev= committed_list_max.prev;
     trnman_committed_transactions++;
-
-    res= lf_hash_insert(&trid_to_committed_trn, pins, &trn);
-    /*
-      By going on with life is res<0, we let other threads block on
-      our rows (because they will never see us committed in
-      trid_to_committed_trn) until they timeout. Though correct, this is not a
-      good situation:
-      - if connection reconnects and wants to check if its rows have been
-      committed, it will not be able to do that (it will just lock on them) so
-      connection stays permanently in doubt
-      - internal structures trid_to_committed_trn and committed_list are
-      desynchronized.
-      So we should take Maria down immediately, the two problems being
-      automatically solved at restart.
-    */
-    DBUG_ASSERT(res <= 0);
+    committed_list_max.prev= trn->prev->next= trn;
   }
-  if (res)
+  else
   {
-    /*
-      res == 1 means the condition in the if() above was false.
-      res == -1 means lf_hash_insert failed
-    */
     trn->next= free_me;
     free_me= trn;
   }
-  else
-  {
-    committed_list_max.prev= trn->prev->next= trn;
-  }
   if ((*trnman_end_trans_hook)(trn, commit,
                                active_list_min.next != &active_list_max))
     res= -1;
   trnman_active_transactions--;
+
   pthread_mutex_unlock(&LOCK_trn_list);
 
   /* the rest is done outside of a critical section */
-#ifdef NOT_USED
-  lockman_release_locks(&maria_lockman, &trn->locks);
-#endif
-  trn->locks.mutex= 0;
-  trn->locks.cond= 0;
   my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
   my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0);
   my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
@@ -491,18 +461,13 @@ my_bool trnman_end_trn(TRN *trn, my_bool
     TRN *t= free_me;
     free_me= free_me->next;
 
-    /*
-      ignore OOM here. it's harmless, and there's nothing we could do, anyway
-    */
-    (void)lf_hash_delete(&trid_to_committed_trn, pins, &t->trid,
sizeof(TrID));
+    /* ignore OOM. it's harmless, and we can do nothing here anyway */
+    (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID));
 
     trnman_free_trn(t);
   }
 
   lf_hash_put_pins(pins);
-#ifdef NOT_USED
-  lf_pinbox_put_pins(trn->locks.pins);
-#endif
 
   DBUG_RETURN(res < 0);
 }
@@ -526,6 +491,11 @@ void trnman_free_trn(TRN *trn)
   */
   union { TRN *trn; void *v; } tmp;
 
+
+  pthread_mutex_lock(&trn->state_lock);
+  trn->short_id= 0;
+  pthread_mutex_unlock(&trn->state_lock);
+
   tmp.trn= pool;
 
   my_atomic_rwlock_wrlock(&LOCK_pool);
@@ -580,9 +550,9 @@ int trnman_can_read_from(TRN *trn, TrID 
     return trid == trn->trid;
   }
 
-  found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid,
sizeof(trid));
+  found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
   if (found == NULL)
-    return 0; /* not in the hash of committed transactions = cannot read */
+    return 0; /* not in the hash of transactions = cannot read */
   if (found == MY_ERRPTR)
     return -1;
 
@@ -591,6 +561,33 @@ int trnman_can_read_from(TRN *trn, TrID 
   return can;
 }
 
+TRN *trnman_trid_to_trn(TRN *trn, TrID trid)
+{
+  TRN **found;
+  LF_REQUIRE_PINS(3);
+
+  if (trid < trn->min_read_from)
+    return 0; /* it's committed eons ago */
+
+  found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
+  if (found == NULL || found == MY_ERRPTR)
+    return 0; /* no luck */
+
+  /* we've found something */
+  pthread_mutex_lock(&(*found)->state_lock);
+
+  if ((*found)->short_id == 0)
+  {
+    pthread_mutex_unlock(&(*found)->state_lock);
+    lf_hash_search_unpin(trn->pins);
+    return 0; /* but it was a ghost */
+  }
+  lf_hash_search_unpin(trn->pins);
+
+  /* Gotcha! */
+  return *found; /* note that TRN is returned locked !!! */
+}
+
 /* TODO: the stubs below are waiting for savepoints to be implemented */
 
 void trnman_new_statement(TRN *trn __attribute__ ((unused)))
@@ -768,7 +765,8 @@ TRN *trnman_recreate_trn_from_recovery(u
   TrID old_trid_generator= global_trid_generator;
   TRN *trn;
   DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
-  if (unlikely((trn= trnman_new_trn(NULL, NULL, NULL)) == NULL))
+  global_trid_generator= longid-1; /* force a correct trid in the new trn */
+  if (unlikely((trn= trnman_new_trn(NULL)) == NULL))
     return NULL;
   /* deallocate excessive allocations of trnman_new_trn() */
   global_trid_generator= old_trid_generator;
@@ -776,7 +774,6 @@ TRN *trnman_recreate_trn_from_recovery(u
   short_trid_to_active_trn[trn->short_id]= 0;
   DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL);
   short_trid_to_active_trn[shortid]= trn;
-  trn->trid= longid;
   trn->short_id= shortid;
   return trn;
 }

=== modified file 'storage/maria/trnman.h'
--- a/storage/maria/trnman.h	2008-05-29 15:33:33 +0000
+++ b/storage/maria/trnman.h	2008-08-07 20:57:25 +0000
@@ -19,7 +19,6 @@
 C_MODE_START
 
 #include <lf.h>
-#include "lockman.h"
 #include "trnman_public.h"
 #include "ma_loghandler_lsn.h"
 
@@ -28,32 +27,38 @@ C_MODE_START
   is created. Transaction can always be identified by its trid,
   even after transaction has ended.
 
-  short_trid - 2-byte transaction identifier, identifies a running
+  short_id - 2-byte transaction identifier, identifies a running
   transaction, is reassigned when transaction ends.
-*/
 
-/*
-  short transaction id is at the same time its identifier
-  for a lock manager - its lock owner identifier (loid)
-*/
+  when short_id is 0, TRN is not initialized, for all practical purposes
+  it could be considered unused.
 
-#define short_id locks.loid
+  when commit_trid is ~(TrID)0 the transaction is running, otherwise it's
+  committed.
 
-struct st_transaction
+  state_lock mutex protects the state of a TRN, that is whether a TRN
+  is committed/running/unused. Meaning that modifications of short_id and
+  commit_trid happen under this mutex.
+*/
+
+struct st_ma_transaction
 {
-  LOCK_OWNER           locks; /* must be the first! see short_trid_to_TRN() */
   LF_PINS              *pins;
+  WT_THD               *wt;
+  pthread_mutex_t      state_lock;
   void                 *used_tables;  /* Tables used by transaction */
   TRN                  *next, *prev;
   TrID                 trid, min_read_from, commit_trid;
   LSN		       rec_lsn, undo_lsn;
   LSN_WITH_FLAGS       first_undo_lsn;
   uint                 locked_tables;
-  /* Note! if locks.loid is 0, trn is NOT initialized */
+  uint16               short_id;
 };
 
 #define TRANSACTION_LOGGED_LONG_ID ULL(0x8000000000000000)
 
+extern WT_RESOURCE_TYPE ma_rc_dup_unique;
+
 C_MODE_END
 
 #endif

=== modified file 'storage/maria/trnman_public.h'
--- a/storage/maria/trnman_public.h	2008-07-05 11:03:21 +0000
+++ b/storage/maria/trnman_public.h	2008-08-07 20:57:25 +0000
@@ -24,10 +24,11 @@
 #define _trnman_public_h
 
 #include "ma_loghandler_lsn.h"
+#include <waiting_threads.h>
 
 C_MODE_START
 typedef uint64 TrID; /* our TrID is 6 bytes */
-typedef struct st_transaction TRN;
+typedef struct st_ma_transaction TRN;
 
 #define SHORT_TRID_MAX 65535
 
@@ -38,13 +39,14 @@ extern my_bool (*trnman_end_trans_hook)(
 
 int trnman_init(TrID);
 void trnman_destroy(void);
-TRN *trnman_new_trn(pthread_mutex_t *, pthread_cond_t *, void *);
+TRN *trnman_new_trn(WT_THD *wt);
 my_bool trnman_end_trn(TRN *trn, my_bool commit);
 #define trnman_commit_trn(T) trnman_end_trn(T, TRUE)
 #define trnman_abort_trn(T)  trnman_end_trn(T, FALSE)
 #define trnman_rollback_trn(T)  trnman_end_trn(T, FALSE)
 void trnman_free_trn(TRN *trn);
 int trnman_can_read_from(TRN *trn, TrID trid);
+TRN *trnman_trid_to_trn(TRN *trn, TrID trid);
 void trnman_new_statement(TRN *trn);
 void trnman_rollback_statement(TRN *trn);
 my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,

=== modified file 'storage/maria/unittest/CMakeLists.txt'
--- a/storage/maria/unittest/CMakeLists.txt	2008-06-13 09:47:17 +0000
+++ b/storage/maria/unittest/CMakeLists.txt	2008-10-16 19:44:12 +0000
@@ -94,3 +94,5 @@ SET_TARGET_PROPERTIES(ma_pagecache_consi
 	PROPERTIES COMPILE_FLAGS "${ma_pagecache_common_cppflags} -DTEST_PAGE_SIZE=65536
-DTEST_WRITERS")
 ADD_EXECUTABLE(ma_pagecache_rwconsist_1k-t ma_pagecache_rwconsist.c)
 SET_TARGET_PROPERTIES(ma_pagecache_rwconsist_1k-t PROPERTIES COMPILE_FLAGS
"-DTEST_PAGE_SIZE=1024")
+ADD_EXECUTABLE(ma_pagecache_rwconsist2_1k-t ma_pagecache_rwconsist2.c)
+SET_TARGET_PROPERTIES(ma_pagecache_rwconsist2_1k-t PROPERTIES COMPILE_FLAGS
"-DTEST_PAGE_SIZE=1024")

=== modified file 'storage/maria/unittest/Makefile.am'
--- a/storage/maria/unittest/Makefile.am	2008-06-13 09:47:17 +0000
+++ b/storage/maria/unittest/Makefile.am	2008-10-16 19:44:12 +0000
@@ -39,6 +39,7 @@ noinst_PROGRAMS =	ma_control_file-t trnm
 			ma_pagecache_consist_1kWR-t \
 			ma_pagecache_consist_64kWR-t \
                         ma_pagecache_rwconsist_1k-t \
+                        ma_pagecache_rwconsist2_1k-t \
 			ma_test_loghandler-t \
                         ma_test_loghandler_multigroup-t \
 			ma_test_loghandler_multithread-t \
@@ -101,6 +102,8 @@ ma_pagecache_consist_64kWR_t_CPPFLAGS =	
 
 ma_pagecache_rwconsist_1k_t_SOURCES =	ma_pagecache_rwconsist.c
 ma_pagecache_rwconsist_1k_t_CPPFLAGS = -DTEST_PAGE_SIZE=1024
+ma_pagecache_rwconsist2_1k_t_SOURCES =	ma_pagecache_rwconsist2.c
+ma_pagecache_rwconsist2_1k_t_CPPFLAGS = -DTEST_PAGE_SIZE=1024
 
 # the generic lock manager may not be used in the end and lockman1-t crashes,
 # and lockman2-t takes at least quarter an hour,

=== modified file 'storage/maria/unittest/ma_pagecache_rwconsist.c'
--- a/storage/maria/unittest/ma_pagecache_rwconsist.c	2008-05-29 15:44:11 +0000
+++ b/storage/maria/unittest/ma_pagecache_rwconsist.c	2008-10-20 09:16:47 +0000
@@ -114,7 +114,7 @@ void reader(int num)
     check_page(buff, num);
     pagecache_unlock_by_link(&pagecache, link,
                              PAGECACHE_LOCK_READ_UNLOCK,
-                             PAGECACHE_UNPIN, 0, 0, 0);
+                             PAGECACHE_UNPIN, 0, 0, 0, FALSE);
     {
       int lim= rand() % read_sleep_limit;
       int j;
@@ -149,7 +149,7 @@ void writer(int num)
     check_page(buff, num);
     pagecache_unlock_by_link(&pagecache, link,
                              PAGECACHE_LOCK_WRITE_UNLOCK,
-                             PAGECACHE_UNPIN, 0, 0, 1);
+                             PAGECACHE_UNPIN, 0, 0, 1, FALSE);
     SLEEP;
   }
 }

=== added file 'storage/maria/unittest/ma_pagecache_rwconsist2.c'
--- a/storage/maria/unittest/ma_pagecache_rwconsist2.c	1970-01-01 00:00:00 +0000
+++ b/storage/maria/unittest/ma_pagecache_rwconsist2.c	2008-10-20 13:03:34 +0000
@@ -0,0 +1,360 @@
+/* Copyright (C) 2006-2008 MySQL AB, 2008 Sun Microsystems, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+
+/**
+  @file this unit tests consistence of  long block writing under write lock
+  and simultaneous reading of this block with read request without read lock
+  requirement.
+*/
+
+/*
+  TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in
+  my_atomic-t.c (see BUG#22320).
+*/
+
+#include <tap.h>
+#include <my_sys.h>
+#include <m_string.h>
+#include "test_file.h"
+#include <tap.h>
+
+#define PCACHE_SIZE (TEST_PAGE_SIZE*1024*8)
+
+#ifndef DBUG_OFF
+static const char* default_dbug_option;
+#endif
+
+
+#define SLEEP my_sleep(5)
+
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count= 0;
+static PAGECACHE pagecache;
+
+static uint number_of_readers= 5;
+static uint number_of_writers= 5;
+static uint number_of_read_tests= 20000;
+static uint number_of_write_tests= 1000;
+static uint report_divisor= 50;
+
+/**
+  @brief Dummy pagecache callback.
+*/
+
+static my_bool
+dummy_callback(uchar *page __attribute__((unused)),
+               pgcache_page_no_t page_no __attribute__((unused)),
+               uchar* data_ptr __attribute__((unused)))
+{
+  return 0;
+}
+
+
+/**
+  @brief Dummy pagecache callback.
+*/
+
+static void
+dummy_fail_callback(uchar* data_ptr __attribute__((unused)))
+{
+  return;
+}
+
+
+/**
+  @brief Checks page consistency
+
+  @param buff            pointer to the page content
+  @param task            task ID
+*/
+void check_page(uchar *buff, int task)
+{
+  uint i;
+  DBUG_ENTER("check_page");
+
+  for (i= 1; i < TEST_PAGE_SIZE; i++)
+  {
+    if (buff[0] != buff[i])
+      goto err;
+  }
+  DBUG_VOID_RETURN;
+err:
+  diag("Task %d char #%u '%u' != '%u'", task, i, (uint) buff[0],
+       (uint) buff[i]);
+  DBUG_PRINT("err", ("try to flush"));
+  exit(1);
+}
+
+
+
+void reader(int num)
+{
+  unsigned char buff[TEST_PAGE_SIZE];
+  uint i;
+
+  for (i= 0; i < number_of_read_tests; i++)
+  {
+    if (i % report_divisor == 0)
+      diag("Reader %d - %u", num, i);
+    pagecache_read(&pagecache, &file1, 0, 3, buff,
+                   PAGECACHE_PLAIN_PAGE,
+                   PAGECACHE_LOCK_LEFT_UNLOCKED,
+                   NULL);
+    check_page(buff, num);
+  }
+}
+
+
+void writer(int num)
+{
+  uint i;
+  uchar *buff;
+  PAGECACHE_BLOCK_LINK *link;
+
+  for (i= 0; i < number_of_write_tests; i++)
+  {
+    uchar c= (uchar) rand() % 256;
+
+    if (i % report_divisor == 0)
+      diag("Writer %d - %u", num, i);
+    buff= pagecache_read(&pagecache, &file1, 0, 3, NULL,
+                         PAGECACHE_PLAIN_PAGE,
+                         PAGECACHE_LOCK_WRITE,
+                         &link);
+
+    check_page(buff, num);
+    bfill(buff, TEST_PAGE_SIZE / 2, c);
+    SLEEP;
+    bfill(buff + TEST_PAGE_SIZE/2, TEST_PAGE_SIZE / 2, c);
+    check_page(buff, num);
+    pagecache_unlock_by_link(&pagecache, link,
+                             PAGECACHE_LOCK_WRITE_UNLOCK,
+                             PAGECACHE_UNPIN, 0, 0, 1, FALSE);
+    SLEEP;
+  }
+}
+
+
+static void *test_thread_reader(void *arg)
+{
+  int param=*((int*) arg);
+  my_thread_init();
+  {
+    DBUG_ENTER("test_reader");
+
+    DBUG_PRINT("enter", ("param: %d", param));
+
+    reader(param);
+
+    DBUG_PRINT("info", ("Thread %s ended", my_thread_name()));
+    pthread_mutex_lock(&LOCK_thread_count);
+    ok(1, "reader%d: done", param);
+    thread_count--;
+    pthread_cond_signal(&COND_thread_count); /* Tell main we are ready */
+    pthread_mutex_unlock(&LOCK_thread_count);
+    free((uchar*) arg);
+    my_thread_end();
+  }
+  return 0;
+}
+
+
+static void *test_thread_writer(void *arg)
+{
+  int param=*((int*) arg);
+  my_thread_init();
+  {
+    DBUG_ENTER("test_writer");
+
+    writer(param);
+
+    DBUG_PRINT("info", ("Thread %s ended", my_thread_name()));
+    pthread_mutex_lock(&LOCK_thread_count);
+    ok(1, "writer%d: done", param);
+    thread_count--;
+    pthread_cond_signal(&COND_thread_count); /* Tell main we are ready */
+    pthread_mutex_unlock(&LOCK_thread_count);
+    free((uchar*) arg);
+    my_thread_end();
+  }
+  return 0;
+}
+
+
+int main(int argc __attribute__((unused)),
+         char **argv __attribute__((unused)))
+{
+  pthread_t tid;
+  pthread_attr_t thr_attr;
+  int *param, error, pagen;
+
+  MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+  default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace";
+#else
+  default_dbug_option= "d:t:i:O,/tmp/test_pagecache_consist.trace";
+#endif
+  if (argc > 1)
+  {
+    DBUG_SET(default_dbug_option);
+    DBUG_SET_INITIAL(default_dbug_option);
+  }
+#endif
+
+  {
+  DBUG_ENTER("main");
+  DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+  plan(number_of_writers + number_of_readers);
+  SKIP_BIG_TESTS(number_of_writers + number_of_readers)
+  {
+
+  if ((file1.file= my_open(file1_name,
+                           O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+  {
+    diag( "Got error during file1 creation from open() (errno: %d)\n",
+	    errno);
+    exit(1);
+  }
+  pagecache_file_init(file1, &dummy_callback, &dummy_callback,
+                      &dummy_fail_callback, &dummy_callback, NULL);
+  DBUG_PRINT("info", ("file1: %d", file1.file));
+  if (my_chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO, MYF(MY_WME)))
+    exit(1);
+  my_pwrite(file1.file, (const uchar*) "test file", 9, 0, MYF(0));
+
+  if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+  {
+    diag( "COND_thread_count: %d from pthread_cond_init (errno: %d)\n",
+	    error, errno);
+    exit(1);
+  }
+  if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+  {
+    diag( "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n",
+	    error, errno);
+    exit(1);
+  }
+
+  if ((error= pthread_attr_init(&thr_attr)))
+  {
+    diag("Got error: %d from pthread_attr_init (errno: %d)\n",
+	    error,errno);
+    exit(1);
+  }
+  if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+  {
+    diag(
+	    "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+	    error,errno);
+    exit(1);
+  }
+
+#ifdef HAVE_THR_SETCONCURRENCY
+  (void)(thr_setconcurrency(2));
+#endif
+
+  if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+                             TEST_PAGE_SIZE, 0)) == 0)
+  {
+    diag("Got error: init_pagecache() (errno: %d)\n",
+            errno);
+    exit(1);
+  }
+  DBUG_PRINT("info", ("Page cache %d pages", pagen));
+  {
+    unsigned char *buffr= malloc(TEST_PAGE_SIZE);
+    memset(buffr, '\0', TEST_PAGE_SIZE);
+    pagecache_write(&pagecache, &file1, 0, 3, buffr,
+                    PAGECACHE_PLAIN_PAGE,
+                    PAGECACHE_LOCK_LEFT_UNLOCKED,
+                    PAGECACHE_PIN_LEFT_UNPINNED,
+                    PAGECACHE_WRITE_DELAY,
+                    0, LSN_IMPOSSIBLE);
+  }
+  pthread_mutex_lock(&LOCK_thread_count);
+
+  while (number_of_readers != 0 || number_of_writers != 0)
+  {
+    if (number_of_readers != 0)
+    {
+      param=(int*) malloc(sizeof(int));
+      *param= number_of_readers + number_of_writers;
+      if ((error= pthread_create(&tid, &thr_attr, test_thread_reader,
+                                 (void*) param)))
+      {
+        diag("Got error: %d from pthread_create (errno: %d)\n",
+                error,errno);
+        exit(1);
+      }
+      thread_count++;
+      number_of_readers--;
+    }
+    if (number_of_writers != 0)
+    {
+      param=(int*) malloc(sizeof(int));
+      *param= number_of_writers + number_of_readers;
+      if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+                                 (void*) param)))
+      {
+        diag("Got error: %d from pthread_create (errno: %d)\n",
+                error,errno);
+        exit(1);
+      }
+      thread_count++;
+      number_of_writers--;
+    }
+  }
+  DBUG_PRINT("info", ("Thread started"));
+  pthread_mutex_unlock(&LOCK_thread_count);
+
+  pthread_attr_destroy(&thr_attr);
+
+  /* wait finishing */
+  pthread_mutex_lock(&LOCK_thread_count);
+  while (thread_count)
+  {
+    if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count)))
+      diag("COND_thread_count: %d from pthread_cond_wait\n", error);
+  }
+  pthread_mutex_unlock(&LOCK_thread_count);
+  DBUG_PRINT("info", ("thread ended"));
+
+  end_pagecache(&pagecache, 1);
+  DBUG_PRINT("info", ("Page cache ended"));
+
+  if (my_close(file1.file, MYF(0)) != 0)
+  {
+    diag( "Got error during file1 closing from close() (errno: %d)\n",
+	    errno);
+    exit(1);
+  }
+  my_delete(file1_name, MYF(0));
+
+  DBUG_PRINT("info", ("file1 (%d) closed", file1.file));
+  DBUG_PRINT("info", ("Program end"));
+  } /* SKIP_BIG_TESTS */
+  my_end(0);
+
+  return exit_status();
+  }
+}
+
+#include "../ma_check_standalone.h"

=== modified file 'storage/maria/unittest/ma_pagecache_single.c'
--- a/storage/maria/unittest/ma_pagecache_single.c	2008-05-29 15:44:11 +0000
+++ b/storage/maria/unittest/ma_pagecache_single.c	2008-10-20 09:16:47 +0000
@@ -391,7 +391,7 @@ int simple_pin_no_lock_test()
                   &link, LSN_IMPOSSIBLE);
   pagecache_unlock_by_link(&pagecache, link,
                            PAGECACHE_LOCK_WRITE_UNLOCK,
-                           PAGECACHE_PIN_LEFT_PINNED, 0, 0, 1);
+                           PAGECACHE_PIN_LEFT_PINNED, 0, 0, 1, FALSE);
   if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
   {
     diag("Did not get error in flush_pagecache_blocks 3\n");

=== modified file 'storage/maria/unittest/ma_test_loghandler-t.c'
--- a/storage/maria/unittest/ma_test_loghandler-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler-t.c	2008-10-20 09:16:47 +0000
@@ -227,7 +227,7 @@ int main(int argc __attribute__((unused)
   long_tr_id[5]= 0xff;
 
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   trn->short_id= 0;
   trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
@@ -250,7 +250,7 @@ int main(int argc __attribute__((unused)
     if (i % 2)
     {
       lsn_store(lsn_buff, lsn_base);
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
       /* check auto-count feature */
       parts[TRANSLOG_INTERNAL_PARTS + 1].str= NULL;
@@ -268,9 +268,9 @@ int main(int argc __attribute__((unused)
       lsn_store(lsn_buff, lsn_base);
       if ((rec_len= rand_buffer_size()) < 12)
         rec_len= 12;
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
-      parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+      parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
       parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
       /* check record length auto-counting */
       if (translog_write_record(&lsn,
@@ -290,7 +290,7 @@ int main(int argc __attribute__((unused)
     {
       lsn_store(lsn_buff, lsn_base);
       lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= 23;
       if (translog_write_record(&lsn,
                                 LOGREC_FIXED_RECORD_2LSN_EXAMPLE,
@@ -308,9 +308,9 @@ int main(int argc __attribute__((unused)
       lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
       if ((rec_len= rand_buffer_size()) < 19)
         rec_len= 19;
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= 14;
-      parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+      parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
       parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
       if (translog_write_record(&lsn,
                                 LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE,
@@ -327,7 +327,7 @@ int main(int argc __attribute__((unused)
       ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
     }
     int4store(long_tr_id, i);
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
     if (translog_write_record(&lsn,
                               LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -346,7 +346,7 @@ int main(int argc __attribute__((unused)
 
     if ((rec_len= rand_buffer_size()) < 9)
       rec_len= 9;
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len;
     if (translog_write_record(&lsn,
                               LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,

=== modified file 'storage/maria/unittest/ma_test_loghandler_first_lsn-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c	2008-10-20 09:16:47 +0000
@@ -113,7 +113,7 @@ int main(int argc __attribute__((unused)
 
 
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   if (translog_write_record(&lsn,
                             LOGREC_FIXED_RECORD_0LSN_EXAMPLE,

=== modified file 'storage/maria/unittest/ma_test_loghandler_max_lsn-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c	2008-10-20 09:16:47 +0000
@@ -104,7 +104,7 @@ int main(int argc __attribute__((unused)
 
   /* write more then 1 file */
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   for(i= 0; i < LOG_FILE_SIZE/6; i++)
   {

=== modified file 'storage/maria/unittest/ma_test_loghandler_multigroup-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c	2008-10-20 09:16:47 +0000
@@ -293,7 +293,7 @@ int main(int argc __attribute__((unused)
   long_tr_id[5]= 0xff;
 
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   trn->short_id= 0;
   trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
@@ -301,7 +301,7 @@ int main(int argc __attribute__((unused)
                             trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, parts,
                             NULL, NULL))
   {
-    fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+    fprintf(stderr, "Can't write record #%u\n", 0);
     translog_destroy();
     ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
     exit(1);
@@ -314,7 +314,7 @@ int main(int argc __attribute__((unused)
     if (i % 2)
     {
       lsn_store(lsn_buff, lsn_base);
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
       trn->short_id= i % 0xFFFF;
       if (translog_write_record(&lsn,
@@ -322,8 +322,7 @@ int main(int argc __attribute__((unused)
                                 LSN_STORE_SIZE, TRANSLOG_INTERNAL_PARTS + 1,
                                 parts, NULL, NULL))
       {
-        fprintf(stderr, "1 Can't write reference before record #%lu\n",
-                (ulong) i);
+        fprintf(stderr, "1 Can't write reference before record #%u\n", i);
         translog_destroy();
         ok(0, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
         exit(1);
@@ -331,9 +330,9 @@ int main(int argc __attribute__((unused)
       ok(1, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
       lsn_store(lsn_buff, lsn_base);
       rec_len= get_len();
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
-      parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+      parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
       parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
       trn->short_id= i % 0xFFFF;
       if (translog_write_record(&lsn,
@@ -342,8 +341,7 @@ int main(int argc __attribute__((unused)
                                 TRANSLOG_INTERNAL_PARTS + 2,
                                 parts, NULL, NULL))
       {
-        fprintf(stderr, "1 Can't write var reference before record #%lu\n",
-                (ulong) i);
+        fprintf(stderr, "1 Can't write var reference before record #%u\n", i);
         translog_destroy();
         ok(0, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE");
         exit(1);
@@ -354,7 +352,7 @@ int main(int argc __attribute__((unused)
     {
       lsn_store(lsn_buff, lsn_base);
       lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
-      parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 1].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 1].length= 23;
       trn->short_id= i % 0xFFFF;
       if (translog_write_record(&lsn,
@@ -362,8 +360,7 @@ int main(int argc __attribute__((unused)
                                 trn, NULL, 23, TRANSLOG_INTERNAL_PARTS + 1,
                                 parts, NULL, NULL))
       {
-        fprintf(stderr, "0 Can't write reference before record #%lu\n",
-                (ulong) i);
+        fprintf(stderr, "0 Can't write reference before record #%u\n", i);
         translog_destroy();
         ok(0, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE");
         exit(1);
@@ -372,9 +369,9 @@ int main(int argc __attribute__((unused)
       lsn_store(lsn_buff, lsn_base);
       lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
       rec_len= get_len();
-      parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+      parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
       parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE * 2;
-      parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+      parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
       parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
       trn->short_id= i % 0xFFFF;
       if (translog_write_record(&lsn,
@@ -383,8 +380,7 @@ int main(int argc __attribute__((unused)
                                 TRANSLOG_INTERNAL_PARTS + 2,
                                 parts, NULL, NULL))
       {
-        fprintf(stderr, "0 Can't write var reference before record #%lu\n",
-                (ulong) i);
+        fprintf(stderr, "0 Can't write var reference before record #%u\n", i);
         translog_destroy();
         ok(0, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
         exit(1);
@@ -392,7 +388,7 @@ int main(int argc __attribute__((unused)
       ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
     }
     int4store(long_tr_id, i);
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
     trn->short_id= i % 0xFFFF;
     if (translog_write_record(&lsn,
@@ -400,7 +396,7 @@ int main(int argc __attribute__((unused)
                               trn, NULL, 6,
                               TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
     {
-      fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+      fprintf(stderr, "Can't write record #%u\n", i);
       translog_destroy();
       ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
       exit(1);
@@ -410,7 +406,7 @@ int main(int argc __attribute__((unused)
     lsn_base= lsn;
 
     rec_len= get_len();
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len;
     trn->short_id= i % 0xFFFF;
     if (translog_write_record(&lsn,
@@ -418,7 +414,7 @@ int main(int argc __attribute__((unused)
                               trn, NULL, rec_len,
                               TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
     {
-      fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+      fprintf(stderr, "Can't write variable record #%u\n", i);
       translog_destroy();
       ok(0, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE");
       exit(1);

=== modified file 'storage/maria/unittest/ma_test_loghandler_multithread-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c	2008-06-30 09:59:59 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c	2008-10-20 09:16:47 +0000
@@ -167,7 +167,7 @@ void writer(int num)
 
     int2store(long_tr_id, num);
     int4store(long_tr_id + 2, i);
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
     if (translog_write_record(&lsn,
                               LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -183,7 +183,7 @@ void writer(int num)
       return;
     }
     lsns1[num][i]= lsn;
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= len;
     if (translog_write_record(&lsn,
                               LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
@@ -368,7 +368,7 @@ int main(int argc __attribute__((unused)
       0x11, 0x22, 0x33, 0x44, 0x55, 0x66
     };
 
-    parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+    parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
     parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
     dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
     if (translog_write_record(&first_lsn,

=== modified file 'storage/maria/unittest/ma_test_loghandler_noflush-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_noflush-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_noflush-t.c	2008-10-20 09:16:47 +0000
@@ -93,7 +93,7 @@ int main(int argc __attribute__((unused)
 
   int4store(long_tr_id, 0);
   long_tr_id[5]= 0xff;
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   if (translog_write_record(&first_lsn,
                             LOGREC_FIXED_RECORD_0LSN_EXAMPLE,

=== modified file 'storage/maria/unittest/ma_test_loghandler_nologs-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_nologs-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_nologs-t.c	2008-10-20 09:16:47 +0000
@@ -90,7 +90,7 @@ int main(int argc __attribute__((unused)
 
   /* write more then 1 file */
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   if (translog_write_record(&lsn,
                             LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -98,7 +98,7 @@ int main(int argc __attribute__((unused)
                             TRANSLOG_INTERNAL_PARTS + 1,
                             parts, NULL, NULL))
   {
-    fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+    fprintf(stderr, "Can't write record #0\n");
     translog_destroy();
     exit(1);
   }
@@ -111,7 +111,7 @@ int main(int argc __attribute__((unused)
                               TRANSLOG_INTERNAL_PARTS + 1,
                               parts, NULL, NULL))
     {
-      fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+      fprintf(stderr, "Can't write record #0\n");
       translog_destroy();
       exit(1);
     }
@@ -164,7 +164,7 @@ int main(int argc __attribute__((unused)
   ok(1, "Log init OK");
 
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   if (translog_write_record(&lsn,
                             LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -172,7 +172,7 @@ int main(int argc __attribute__((unused)
                             TRANSLOG_INTERNAL_PARTS + 1,
                             parts, NULL, NULL))
   {
-    fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+    fprintf(stderr, "Can't write record #0\n");
     translog_destroy();
     exit(1);
   }

=== modified file 'storage/maria/unittest/ma_test_loghandler_pagecache-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c	2008-10-20 09:16:47 +0000
@@ -131,7 +131,7 @@ int main(int argc __attribute__((unused)
   }
   my_close(file1.file, MYF(MY_WME));
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
   if (translog_write_record(&lsn,

=== modified file 'storage/maria/unittest/ma_test_loghandler_purge-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_purge-t.c	2008-06-05 16:11:22 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_purge-t.c	2008-10-20 09:16:47 +0000
@@ -90,7 +90,7 @@ int main(int argc __attribute__((unused)
 
   /* write more then 1 file */
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   if (translog_write_record(&lsn,
                             LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -136,7 +136,7 @@ int main(int argc __attribute__((unused)
 
   ok(1, "First file is removed");
 
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= LONG_BUFFER_SIZE;
   if (translog_write_record(&lsn,
 			    LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
@@ -160,7 +160,7 @@ int main(int argc __attribute__((unused)
   ok(1, "Second and third files are not removed");
 
   int4store(long_tr_id, 0);
-  parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+  parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
   parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
   if (translog_write_record(&lsn,
                             LOGREC_FIXED_RECORD_0LSN_EXAMPLE,

=== modified file 'storage/maria/unittest/trnman-t.c'
--- a/storage/maria/unittest/trnman-t.c	2008-06-05 17:16:32 +0000
+++ b/storage/maria/unittest/trnman-t.c	2008-10-20 09:16:47 +0000
@@ -38,15 +38,9 @@ pthread_handler_t test_trnman(void *arg)
 {
   uint   x, y, i, n;
   TRN    *trn[MAX_ITER];
-  pthread_mutex_t mutexes[MAX_ITER];
-  pthread_cond_t conds[MAX_ITER];
   int    m= (*(int *)arg);
 
-  for (i= 0; i < MAX_ITER; i++)
-  {
-    pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
-    pthread_cond_init(&conds[i], 0);
-  }
+  my_thread_init();
 
   for (x= ((int)(intptr)(&m)); m > 0; )
   {
@@ -54,7 +48,7 @@ pthread_handler_t test_trnman(void *arg)
     m-= n= x % MAX_ITER;
     for (i= 0; i < n; i++)
     {
-      trn[i]= trnman_new_trn(&mutexes[i], &conds[i], &m + STACK_SIZE);
+      trn[i]= trnman_new_trn(0);
       if (!trn[i])
       {
         diag("trnman_new_trn() failed");
@@ -67,15 +61,12 @@ pthread_handler_t test_trnman(void *arg)
       trnman_end_trn(trn[i], y & 1);
     }
   }
-  for (i= 0; i < MAX_ITER; i++)
-  {
-    pthread_mutex_destroy(&mutexes[i]);
-    pthread_cond_destroy(&conds[i]);
-  }
   pthread_mutex_lock(&rt_mutex);
   rt_num_threads--;
   pthread_mutex_unlock(&rt_mutex);
 
+  my_thread_end();
+
   return 0;
 }
 #undef MAX_ITER
@@ -111,10 +102,10 @@ void run_test(const char *test, pthread_
 }
 
 #define ok_read_from(T1, T2, RES)                       \
-  i= trnman_can_read_from(trn[T1], trid[T2]);       \
+  i= trnman_can_read_from(trn[T1], trid[T2]);           \
   ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot")
 #define start_transaction(T)                            \
-  trn[T]= trnman_new_trn(&mutexes[T], &conds[T], &i + STACK_SIZE); \
+  trn[T]= trnman_new_trn(0);                            \
   trid[T]= trn[T]->trid
 #define commit(T)               trnman_commit_trn(trn[T])
 #define abort(T)                trnman_abort_trn(trn[T])
@@ -124,16 +115,8 @@ void test_trnman_read_from()
 {
   TRN *trn[Ntrns];
   TrID trid[Ntrns];
-  pthread_mutex_t mutexes[Ntrns];
-  pthread_cond_t conds[Ntrns];
   int i;
 
-  for (i= 0; i < Ntrns; i++)
-  {
-    pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
-    pthread_cond_init(&conds[i], 0);
-  }
-
   start_transaction(0);                    /* start trn1 */
   start_transaction(1);                    /* start trn2 */
   ok_read_from(1, 0, 0);
@@ -149,17 +132,11 @@ void test_trnman_read_from()
   ok_read_from(3, 1, 0);
   commit(3);                               /* commit trn5 */
 
-  for (i= 0; i < Ntrns; i++)
-  {
-    pthread_mutex_destroy(&mutexes[i]);
-    pthread_cond_destroy(&conds[i]);
-  }
 }
 
 int main(int argc __attribute__((unused)), char **argv)
 {
   MY_INIT(argv[0]);
-  my_init();
 
   plan(7);
 

=== modified file 'storage/myisam/ha_myisam.cc'
--- a/storage/myisam/ha_myisam.cc	2008-07-09 07:12:43 +0000
+++ b/storage/myisam/ha_myisam.cc	2008-10-20 09:16:47 +0000
@@ -63,7 +63,7 @@ static void mi_check_print_msg(HA_CHECK 
   THD* thd = (THD*)param->thd;
   Protocol *protocol= thd->protocol;
   uint length, msg_length;
-  char msgbuf[MI_MAX_MSG_BUF];
+  char msgbuf[HA_MAX_MSG_BUF];
   char name[NAME_LEN*2+2];
 
   msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);

=== modified file 'storage/myisam/mi_check.c'
--- a/storage/myisam/mi_check.c	2008-08-24 16:12:12 +0000
+++ b/storage/myisam/mi_check.c	2008-10-20 09:16:47 +0000
@@ -805,9 +805,9 @@ static int chk_index(HA_CHECK *param, MI
 	(flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length,
 			 comp_flag, diff_pos)) >=0)
     {
-      DBUG_DUMP("old",(uchar*) info->lastkey, info->lastkey_length);
-      DBUG_DUMP("new",(uchar*) key, key_length);
-      DBUG_DUMP("new_in_page",(uchar*) old_keypos,(uint) (keypos-old_keypos));
+      DBUG_DUMP("old",info->lastkey, info->lastkey_length);
+      DBUG_DUMP("new",key, key_length);
+      DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos));
 
       if (comp_flag & SEARCH_FIND && flag == 0)
 	mi_check_print_error(param,"Found duplicated key at page %s",llstr(page,llbuff));
@@ -876,8 +876,8 @@ static int chk_index(HA_CHECK *param, MI
       DBUG_PRINT("test",("page: %s  record: %s  filelength: %s",
 			 llstr(page,llbuff),llstr(record,llbuff2),
 			 llstr(info->state->data_file_length,llbuff3)));
-      DBUG_DUMP("key",(uchar*) key,key_length);
-      DBUG_DUMP("new_in_page",(uchar*) old_keypos,(uint) (keypos-old_keypos));
+      DBUG_DUMP("key",key,key_length);
+      DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos));
       goto err;
     }
     param->record_checksum+=(ha_checksum) record;
@@ -1216,6 +1216,7 @@ int chk_data_link(HA_CHECK *param, MI_IN
       param->glob_crc+= (*info->s->calc_check_checksum)(info,record);
       link_used+= (block_info.filepos - start_recpos);
       used+= (pos-start_recpos);
+      break;
     } /* switch */
     if (! got_error)
     {
@@ -4043,7 +4044,7 @@ static int sort_insert_key(MI_SORT_PARAM
       DBUG_RETURN(1);
     }
     a_length=2+nod_flag;
-    key_block->end_pos= (char*) anc_buff+2;
+    key_block->end_pos= anc_buff+2;
     lastkey=0;					/* No previous key in block */
   }
   else

=== modified file 'storage/myisam/mi_examine_log.c'
--- a/storage/myisam/mi_examine_log.c	2008-07-23 12:46:33 +0000
+++ b/storage/myisam/mi_examine_log.c	2008-10-20 09:16:47 +0000
@@ -794,7 +794,7 @@ static int find_record_with_key(struct f
 {
   uint key;
   MI_INFO *info=file_info->isam;
-  uchar tmp_key[MI_MAX_KEY_BUFF];
+  uchar tmp_key[HA_MAX_KEY_BUFF];
 
   for (key=0 ; key < info->s->base.keys ; key++)
   {

=== modified file 'storage/myisam/mi_open.c'
--- a/storage/myisam/mi_open.c	2008-08-25 14:26:49 +0000
+++ b/storage/myisam/mi_open.c	2008-10-20 09:16:47 +0000
@@ -104,7 +104,7 @@ MI_INFO *mi_open(const char *name, int m
     myisam_backup::Backup::begin().
   */
   realpath_err= my_realpath(name_buff,
-                  fn_format(org_name,name,"",MI_NAME_IEXT,4),MYF(0));
+                            fn_format(org_name,name,"",MI_NAME_IEXT,4),MYF(0));
   if (my_is_symlink(org_name) &&
       (realpath_err || (*myisam_test_invalid_symlink)(name_buff)))
   {

=== modified file 'storage/myisam/mi_page.c'
--- a/storage/myisam/mi_page.c	2008-05-09 10:27:23 +0000
+++ b/storage/myisam/mi_page.c	2008-10-20 09:16:47 +0000
@@ -49,7 +49,7 @@ uchar *_mi_fetch_keypage(register MI_INF
   {
     DBUG_PRINT("error",("page %lu had wrong page length: %u",
 			(ulong) page, page_size));
-    DBUG_DUMP("page", (uchar*) tmp, keyinfo->block_length);
+    DBUG_DUMP("page",tmp, keyinfo->block_length);
     info->last_keypage = HA_OFFSET_ERROR;
     mi_print_error(info->s, HA_ERR_CRASHED);
     my_errno = HA_ERR_CRASHED;

=== modified file 'storage/myisam/mi_search.c'
--- a/storage/myisam/mi_search.c	2008-08-25 18:23:18 +0000
+++ b/storage/myisam/mi_search.c	2008-10-20 09:16:47 +0000
@@ -816,7 +816,7 @@ uint _mi_get_pack_key(register MI_KEYDEF
 	    DBUG_PRINT("error",
                        ("Found too long null packed key: %u of %u at %p",
                         length, keyseg->length, *page_pos));
-	    DBUG_DUMP("key",(uchar*) *page_pos,16);
+	    DBUG_DUMP("key",*page_pos,16);
             mi_print_error(keyinfo->share, HA_ERR_CRASHED);
 	    my_errno=HA_ERR_CRASHED;
 	    return 0;
@@ -873,7 +873,7 @@ uint _mi_get_pack_key(register MI_KEYDEF
       {
         DBUG_PRINT("error",("Found too long packed key: %u of %u at %p",
                             length, keyseg->length, *page_pos));
-        DBUG_DUMP("key",(uchar*) *page_pos,16);
+        DBUG_DUMP("key",*page_pos,16);
         mi_print_error(keyinfo->share, HA_ERR_CRASHED);
         my_errno=HA_ERR_CRASHED;
         return 0;                               /* Error */
@@ -945,7 +945,7 @@ uint _mi_get_binary_pack_key(register MI
       DBUG_PRINT("error",
                  ("Found too long binary packed key: %u of %u at %p",
                   length, keyinfo->maxlength, *page_pos));
-      DBUG_DUMP("key",(uchar*) *page_pos,16);
+      DBUG_DUMP("key",*page_pos,16);
       mi_print_error(keyinfo->share, HA_ERR_CRASHED);
       my_errno=HA_ERR_CRASHED;
       DBUG_RETURN(0);                                 /* Wrong key */

=== modified file 'storage/myisam/mi_test_all.sh'
--- a/storage/myisam/mi_test_all.sh	2008-05-09 10:27:23 +0000
+++ b/storage/myisam/mi_test_all.sh	2008-10-20 09:16:47 +0000
@@ -135,7 +135,7 @@ echo "mi_test2$suffix $silent -L -K -R1 
 ./mi_test2$suffix $silent -m10000 -e16384 -E16384 -K -L
 ./myisamchk$suffix -sm test2
 
-/bin/rm myisam.log
+/bin/rm -f myisam.log
 ./mi_test2$suffix $silent -L -K -W -P -m50 -l
 ./myisamlog$suffix
 /bin/rm myisam.log

=== modified file 'storage/myisam/rt_index.c'
--- a/storage/myisam/rt_index.c	2008-08-25 18:23:27 +0000
+++ b/storage/myisam/rt_index.c	2008-10-20 09:16:47 +0000
@@ -378,6 +378,7 @@ err1:
 int rtree_get_first(MI_INFO *info, uint keynr, uint key_length)
 {
   my_off_t root;
+  MI_KEYDEF *keyinfo = info->s->keyinfo + keynr;
 
   if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
   {
@@ -388,7 +389,7 @@ int rtree_get_first(MI_INFO *info, uint 
   info->rtree_recursion_depth = -1;
   info->buff_used = 1;
   
-  return rtree_get_req(info, info->s->keyinfo + keynr, key_length, root, 0);
+  return rtree_get_req(info, keyinfo, key_length, root, 0);
 }
 
 

=== added file 'unittest/mysys/CMakeLists.txt'
--- a/unittest/mysys/CMakeLists.txt	1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/CMakeLists.txt	2008-10-20 09:16:47 +0000
@@ -0,0 +1,37 @@
+# Copyright (C) 2007 MySQL AB
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
+
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
+                    ${CMAKE_SOURCE_DIR}/sql
+                    ${CMAKE_SOURCE_DIR}/regex
+                    ${CMAKE_SOURCE_DIR}/extra/yassl/include
+                    ${CMAKE_SOURCE_DIR}/unittest/mytap)
+ADD_EXECUTABLE(bitmap-t bitmap-t.c)
+TARGET_LINK_LIBRARIES(bitmap-t mytap mysys dbug strings ws2_32)
+
+ADD_EXECUTABLE(base64-t base64-t.c)
+TARGET_LINK_LIBRARIES(base64-t mytap mysys dbug strings ws2_32)
+
+ADD_EXECUTABLE(my_atomic-t my_atomic-t.c)
+TARGET_LINK_LIBRARIES(my_atomic-t mytap mysys dbug strings ws2_32)
+
+ADD_EXECUTABLE(lf-t lf-t.c)
+TARGET_LINK_LIBRARIES(lf-t mytap mysys dbug strings ws2_32)
+
+ADD_EXECUTABLE(waiting_threads-t waiting_threads-t.c)
+TARGET_LINK_LIBRARIES(waiting_threads-t mytap mysys dbug strings ws2_32)

=== removed file 'unittest/mysys/CMakeLists.txt'
--- a/unittest/mysys/CMakeLists.txt	2008-04-02 17:52:11 +0000
+++ b/unittest/mysys/CMakeLists.txt	1970-01-01 00:00:00 +0000
@@ -1,31 +0,0 @@
-# Copyright (C) 2007 MySQL AB
-# 
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-# 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-# 
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
-
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
-                    ${CMAKE_SOURCE_DIR}/sql
-                    ${CMAKE_SOURCE_DIR}/regex
-                    ${CMAKE_SOURCE_DIR}/extra/yassl/include
-                    ${CMAKE_SOURCE_DIR}/unittest/mytap)
-ADD_EXECUTABLE(bitmap-t bitmap-t.c)
-TARGET_LINK_LIBRARIES(bitmap-t mytap mysys dbug strings ws2_32)
-
-ADD_EXECUTABLE(base64-t base64-t.c)
-TARGET_LINK_LIBRARIES(base64-t mytap mysys dbug strings ws2_32)
-
-ADD_EXECUTABLE(my_atomic-t my_atomic-t.c)
-TARGET_LINK_LIBRARIES(my_atomic-t mytap mysys dbug strings ws2_32)

=== modified file 'unittest/mysys/Makefile.am'
--- a/unittest/mysys/Makefile.am	2008-05-29 15:44:11 +0000
+++ b/unittest/mysys/Makefile.am	2008-10-20 09:16:47 +0000
@@ -16,7 +16,9 @@
 INCLUDES =		@ZLIB_INCLUDES@ -I$(top_builddir)/include \
 			-I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap 
 
-noinst_PROGRAMS  =    bitmap-t base64-t my_atomic-t
+noinst_HEADERS = thr_template.c
+
+noinst_PROGRAMS  = bitmap-t base64-t my_atomic-t lf-t waiting_threads-t
 
 LDADD 		= $(top_builddir)/unittest/mytap/libmytap.a \
 		  $(top_builddir)/mysys/libmysys.a \

=== added file 'unittest/mysys/lf-t.c'
--- a/unittest/mysys/lf-t.c	1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/lf-t.c	2008-07-29 14:10:24 +0000
@@ -0,0 +1,168 @@
+/* Copyright (C) 2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "thr_template.c"
+
+#include <lf.h>
+
+int32 inserts= 0, N;
+LF_ALLOCATOR lf_allocator;
+LF_HASH lf_hash;
+
+/*
+  pin allocator - alloc and release an element in a loop
+*/
+pthread_handler_t test_lf_pinbox(void *arg)
+{
+  int    m= *(int *)arg;
+  int32 x= 0;
+  LF_PINS *pins;
+
+  my_thread_init();
+
+  pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+
+  for (x= ((int)(intptr)(&m)); m ; m--)
+  {
+    lf_pinbox_put_pins(pins);
+    pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+  }
+  lf_pinbox_put_pins(pins);
+  pthread_mutex_lock(&mutex);
+  if (!--running_threads) pthread_cond_signal(&cond);
+  pthread_mutex_unlock(&mutex);
+  my_thread_end();
+  return 0;
+}
+
+typedef union {
+  int32 data;
+  void *not_used;
+} TLA;
+
+pthread_handler_t test_lf_alloc(void *arg)
+{
+  int    m= (*(int *)arg)/2;
+  int32 x,y= 0;
+  LF_PINS *pins;
+
+  my_thread_init();
+
+  pins= lf_alloc_get_pins(&lf_allocator);
+
+  for (x= ((int)(intptr)(&m)); m ; m--)
+  {
+    TLA *node1, *node2;
+    x= (x*m+0x87654321) & INT_MAX32;
+    node1= (TLA *)lf_alloc_new(pins);
+    node1->data= x;
+    y+= node1->data;
+    node1->data= 0;
+    node2= (TLA *)lf_alloc_new(pins);
+    node2->data= x;
+    y-= node2->data;
+    node2->data= 0;
+    lf_alloc_free(pins, node1);
+    lf_alloc_free(pins, node2);
+  }
+  lf_alloc_put_pins(pins);
+  pthread_mutex_lock(&mutex);
+  bad+= y;
+
+  if (--N == 0)
+  {
+    diag("%d mallocs, %d pins in stack",
+         lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array);
+#ifdef MY_LF_EXTRA_DEBUG
+    bad|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator);
+#endif
+  }
+  if (!--running_threads) pthread_cond_signal(&cond);
+  pthread_mutex_unlock(&mutex);
+  my_thread_end();
+  return 0;
+}
+
+#define N_TLH 1000
+pthread_handler_t test_lf_hash(void *arg)
+{
+  int    m= (*(int *)arg)/(2*N_TLH);
+  int32 x,y,z,sum= 0, ins= 0;
+  LF_PINS *pins;
+
+  my_thread_init();
+
+  pins= lf_hash_get_pins(&lf_hash);
+
+  for (x= ((int)(intptr)(&m)); m ; m--)
+  {
+    int i;
+    y= x;
+    for (i= 0; i < N_TLH; i++)
+    {
+      x= (x*(m+i)+0x87654321) & INT_MAX32;
+      z= (x<0) ? -x : x;
+      if (lf_hash_insert(&lf_hash, pins, &z))
+      {
+        sum+= z;
+        ins++;
+      }
+    }
+    for (i= 0; i < N_TLH; i++)
+    {
+      y= (y*(m+i)+0x87654321) & INT_MAX32;
+      z= (y<0) ? -y : y;
+      if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
+        sum-= z;
+    }
+  }
+  lf_hash_put_pins(pins);
+  pthread_mutex_lock(&mutex);
+  bad+= sum;
+  inserts+= ins;
+
+  if (--N == 0)
+  {
+    diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
+         lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array,
+         lf_hash.size, inserts);
+    bad|= lf_hash.count;
+  }
+  if (!--running_threads) pthread_cond_signal(&cond);
+  pthread_mutex_unlock(&mutex);
+  my_thread_end();
+  return 0;
+}
+
+
+void do_tests()
+{
+  plan(4);
+
+  lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
+  lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
+               &my_charset_bin);
+
+  bad= my_atomic_initialize();
+  ok(!bad, "my_atomic_initialize() returned %d", bad);
+
+  test_concurrently("lf_pinbox", test_lf_pinbox, N= THREADS, CYCLES);
+  test_concurrently("lf_alloc",  test_lf_alloc,  N= THREADS, CYCLES);
+  test_concurrently("lf_hash",   test_lf_hash,   N= THREADS, CYCLES/10);
+
+  lf_hash_destroy(&lf_hash);
+  lf_alloc_destroy(&lf_allocator);
+}
+

=== modified file 'unittest/mysys/my_atomic-t.c'
--- a/unittest/mysys/my_atomic-t.c	2008-05-29 15:44:11 +0000
+++ b/unittest/mysys/my_atomic-t.c	2008-10-20 09:16:47 +0000
@@ -13,11 +13,7 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
-#include <my_global.h>
-#include <my_sys.h>
-#include <my_atomic.h>
-#include <tap.h>
-#include <lf.h>
+#include "thr_template.c"
 
 /* at least gcc 3.4.5 and 3.4.6 (but not 3.2.3) on RHEL */
 #if __GNUC__ == 3 && __GNUC_MINOR__ == 4
@@ -26,20 +22,12 @@
 #define GCC_BUG_WORKAROUND
 #endif
 
-volatile uint32 a32,b32;
-volatile int32  c32, N;
+volatile uint32 b32;
+volatile int32  c32;
 my_atomic_rwlock_t rwl;
-LF_ALLOCATOR lf_allocator;
-LF_HASH lf_hash;
-pthread_attr_t thr_attr;
-pthread_mutex_t mutex;
-pthread_cond_t cond;
-uint running_threads;
-size_t stacksize= 0;
-#define STACK_SIZE (((int)stacksize-2048)*STACK_DIRECTION)
 
 /* add and sub a random number in a loop. Must get 0 at the end */
-pthread_handler_t test_atomic_add_handler(void *arg)
+pthread_handler_t test_atomic_add(void *arg)
 {
   int    m= (*(int *)arg)/2;
   GCC_BUG_WORKAROUND int32 x;
@@ -47,11 +35,11 @@ pthread_handler_t test_atomic_add_handle
   {
     x= (x*m+0x87654321) & INT_MAX32;
     my_atomic_rwlock_wrlock(&rwl);
-    my_atomic_add32(&a32, x);
+    my_atomic_add32(&bad, x);
     my_atomic_rwlock_wrunlock(&rwl);
 
     my_atomic_rwlock_wrlock(&rwl);
-    my_atomic_add32(&a32, -x);
+    my_atomic_add32(&bad, -x);
     my_atomic_rwlock_wrunlock(&rwl);
   }
   pthread_mutex_lock(&mutex);
@@ -62,13 +50,13 @@ pthread_handler_t test_atomic_add_handle
 
 /*
   1. generate thread number 0..N-1 from b32
-  2. add it to a32
+  2. add it to bad
   3. swap thread numbers in c32
   4. (optionally) one more swap to avoid 0 as a result
-  5. subtract result from a32
-  must get 0 in a32 at the end
+  5. subtract result from bad
+  must get 0 in bad at the end
 */
-pthread_handler_t test_atomic_fas_handler(void *arg)
+pthread_handler_t test_atomic_fas(void *arg)
 {
   int    m= *(int *)arg;
   int32  x;
@@ -78,7 +66,7 @@ pthread_handler_t test_atomic_fas_handle
   my_atomic_rwlock_wrunlock(&rwl);
 
   my_atomic_rwlock_wrlock(&rwl);
-  my_atomic_add32(&a32, x);
+  my_atomic_add32(&bad, x);
   my_atomic_rwlock_wrunlock(&rwl);
 
   for (; m ; m--)
@@ -96,7 +84,7 @@ pthread_handler_t test_atomic_fas_handle
   }
 
   my_atomic_rwlock_wrlock(&rwl);
-  my_atomic_add32(&a32, -x);
+  my_atomic_add32(&bad, -x);
   my_atomic_rwlock_wrunlock(&rwl);
 
   pthread_mutex_lock(&mutex);
@@ -106,28 +94,28 @@ pthread_handler_t test_atomic_fas_handle
 }
 
 /*
-  same as test_atomic_add_handler, but my_atomic_add32 is emulated with
+  same as test_atomic_add, but my_atomic_add32 is emulated with
   my_atomic_cas32 - notice that the slowdown is proportional to the
   number of CPUs
 */
-pthread_handler_t test_atomic_cas_handler(void *arg)
+pthread_handler_t test_atomic_cas(void *arg)
 {
   int    m= (*(int *)arg)/2, ok= 0;
   GCC_BUG_WORKAROUND int32 x, y;
   for (x= ((int)(intptr)(&m)); m ; m--)
   {
     my_atomic_rwlock_wrlock(&rwl);
-    y= my_atomic_load32(&a32);
+    y= my_atomic_load32(&bad);
     my_atomic_rwlock_wrunlock(&rwl);
     x= (x*m+0x87654321) & INT_MAX32;
     do {
       my_atomic_rwlock_wrlock(&rwl);
-      ok= my_atomic_cas32(&a32, &y, (uint32)y+x);
+      ok= my_atomic_cas32(&bad, &y, (uint32)y+x);
       my_atomic_rwlock_wrunlock(&rwl);
     } while (!ok) ;
     do {
       my_atomic_rwlock_wrlock(&rwl);
-      ok= my_atomic_cas32(&a32, &y, y-x);
+      ok= my_atomic_cas32(&bad, &y, y-x);
       my_atomic_rwlock_wrunlock(&rwl);
     } while (!ok) ;
   }
@@ -138,215 +126,21 @@ pthread_handler_t test_atomic_cas_handle
 }
 
 
-/*
-  pin allocator - alloc and release an element in a loop
-*/
-pthread_handler_t test_lf_pinbox(void *arg)
-{
-  int    m= *(int *)arg;
-  int32 x= 0;
-  LF_PINS *pins;
-
-  pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE);
-
-  for (x= ((int)(intptr)(&m)); m ; m--)
-  {
-    lf_pinbox_put_pins(pins);
-    pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE);
-  }
-  lf_pinbox_put_pins(pins);
-  pthread_mutex_lock(&mutex);
-  if (!--running_threads) pthread_cond_signal(&cond);
-  pthread_mutex_unlock(&mutex);
-  return 0;
-}
-
-typedef union {
-  int32 data;
-  void *not_used;
-} TLA;
-
-pthread_handler_t test_lf_alloc(void *arg)
-{
-  int    m= (*(int *)arg)/2;
-  int32 x,y= 0;
-  LF_PINS *pins;
-
-  pins= lf_alloc_get_pins(&lf_allocator, &m + STACK_SIZE);
-
-  for (x= ((int)(intptr)(&m)); m ; m--)
-  {
-    TLA *node1, *node2;
-    x= (x*m+0x87654321) & INT_MAX32;
-    node1= (TLA *)lf_alloc_new(pins);
-    node1->data= x;
-    y+= node1->data;
-    node1->data= 0;
-    node2= (TLA *)lf_alloc_new(pins);
-    node2->data= x;
-    y-= node2->data;
-    node2->data= 0;
-    lf_alloc_free(pins, node1);
-    lf_alloc_free(pins, node2);
-  }
-  lf_alloc_put_pins(pins);
-  my_atomic_rwlock_wrlock(&rwl);
-  my_atomic_add32(&a32, y);
-
-  if (my_atomic_add32(&N, -1) == 1)
-  {
-    diag("%d mallocs, %d pins in stack",
-         lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array);
-#ifdef MY_LF_EXTRA_DEBUG
-    a32|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator);
-#endif
-  }
-  my_atomic_rwlock_wrunlock(&rwl);
-  pthread_mutex_lock(&mutex);
-  if (!--running_threads) pthread_cond_signal(&cond);
-  pthread_mutex_unlock(&mutex);
-  return 0;
-}
-
-#define N_TLH 1000
-pthread_handler_t test_lf_hash(void *arg)
-{
-  int    m= (*(int *)arg)/(2*N_TLH);
-  int32 x,y,z,sum= 0, ins= 0;
-  LF_PINS *pins;
-
-  pins= lf_hash_get_pins(&lf_hash, &m + STACK_SIZE);
-
-  for (x= ((int)(intptr)(&m)); m ; m--)
-  {
-    int i;
-    y= x;
-    for (i= 0; i < N_TLH; i++)
-    {
-      x= (x*(m+i)+0x87654321) & INT_MAX32;
-      z= (x<0) ? -x : x;
-      if (lf_hash_insert(&lf_hash, pins, &z))
-      {
-        sum+= z;
-        ins++;
-      }
-    }
-    for (i= 0; i < N_TLH; i++)
-    {
-      y= (y*(m+i)+0x87654321) & INT_MAX32;
-      z= (y<0) ? -y : y;
-      if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
-        sum-= z;
-    }
-  }
-  lf_hash_put_pins(pins);
-  my_atomic_rwlock_wrlock(&rwl);
-  my_atomic_add32(&a32, sum);
-  my_atomic_add32(&b32, ins);
-
-  if (my_atomic_add32(&N, -1) == 1)
-  {
-    diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
-         lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array,
-         lf_hash.size, b32);
-    a32|= lf_hash.count;
-  }
-  my_atomic_rwlock_wrunlock(&rwl);
-  pthread_mutex_lock(&mutex);
-  if (!--running_threads) pthread_cond_signal(&cond);
-  pthread_mutex_unlock(&mutex);
-  return 0;
-}
-
-
-void test_atomic(const char *test, pthread_handler handler, int n, int m)
+void do_tests()
 {
-  pthread_t t;
-  ulonglong now= my_getsystime();
+  plan(4);
 
-  a32= 0;
-  b32= 0;
-  c32= 0;
+  bad= my_atomic_initialize();
+  ok(!bad, "my_atomic_initialize() returned %d", bad);
 
-  diag("Testing %s with %d threads, %d iterations... ", test, n, m);
-  for (running_threads= n ; n ; n--)
-  {
-    if (pthread_create(&t, &thr_attr, handler, &m) != 0)
-    {
-      diag("Could not create thread");
-      abort();
-    }
-  }
-  pthread_mutex_lock(&mutex);
-  while (running_threads)
-    pthread_cond_wait(&cond, &mutex);
-  pthread_mutex_unlock(&mutex);
-
-  now= my_getsystime()-now;
-  ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32);
-}
-
-
-int main()
-{
-  int err;
-  MY_INIT("my_atomic-t.c");
-
-  diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE);
-  err= my_atomic_initialize();
-
-  plan(7);
-  ok(err == 0, "my_atomic_initialize() returned %d", err);
-
-  pthread_mutex_init(&mutex, 0);
-  pthread_cond_init(&cond, 0);
   my_atomic_rwlock_init(&rwl);
-  lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
-  lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
-               &my_charset_bin);
-  pthread_attr_init(&thr_attr);
-  pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
-#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE
-  pthread_attr_getstacksize(&thr_attr, &stacksize);
-  if (stacksize == 0)
-#endif
-    stacksize= PTHREAD_STACK_MIN;
 
-#ifdef MY_ATOMIC_MODE_RWLOCKS
-#if defined(HPUX11) || defined(__POWERPC__) /* showed to be very slow (scheduler-related)
*/
-#define CYCLES 300
-#else
-#define CYCLES 3000
-#endif
-#else
-#ifdef HPUX11
-#define CYCLES 30000
-#else
-#define CYCLES 300000
-#endif
-#endif
-#define THREADS 100
+  b32= c32= 0;
+  test_concurrently("my_atomic_add32", test_atomic_add, THREADS, CYCLES);
+  b32= c32= 0;
+  test_concurrently("my_atomic_fas32", test_atomic_fas, THREADS, CYCLES);
+  b32= c32= 0;
+  test_concurrently("my_atomic_cas32", test_atomic_cas, THREADS, CYCLES);
 
-  test_atomic("my_atomic_add32",  test_atomic_add_handler, THREADS,CYCLES);
-  test_atomic("my_atomic_fas32",  test_atomic_fas_handler, THREADS,CYCLES);
-  test_atomic("my_atomic_cas32",  test_atomic_cas_handler, THREADS,CYCLES);
-  test_atomic("lf_pinbox",        test_lf_pinbox,          THREADS,CYCLES);
-  test_atomic("lf_alloc",         test_lf_alloc,           THREADS,CYCLES);
-  test_atomic("lf_hash",          test_lf_hash,            THREADS,CYCLES/10);
-
-  lf_hash_destroy(&lf_hash);
-  lf_alloc_destroy(&lf_allocator);
-
-  /*
-    workaround until we know why it crashes randomly on some machine
-    (BUG#22320).
-  */
-  sleep(2);
-  pthread_mutex_destroy(&mutex);
-  pthread_cond_destroy(&cond);
-  pthread_attr_destroy(&thr_attr);
   my_atomic_rwlock_destroy(&rwl);
-  my_end(0);
-  return exit_status();
 }
-

=== added file 'unittest/mysys/thr_template.c'
--- a/unittest/mysys/thr_template.c	1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/thr_template.c	2008-08-29 19:50:04 +0000
@@ -0,0 +1,92 @@
+/* Copyright (C) 2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <tap.h>
+
+volatile uint32 bad;
+pthread_attr_t thr_attr;
+pthread_mutex_t mutex;
+pthread_cond_t cond;
+uint running_threads;
+
+void do_tests();
+
+void test_concurrently(const char *test, pthread_handler handler, int n, int m)
+{
+  pthread_t t;
+  ulonglong now= my_getsystime();
+
+  bad= 0;
+
+  diag("Testing %s with %d threads, %d iterations... ", test, n, m);
+  for (running_threads= n ; n ; n--)
+  {
+    if (pthread_create(&t, &thr_attr, handler, &m) != 0)
+    {
+      diag("Could not create thread");
+      abort();
+    }
+  }
+  pthread_mutex_lock(&mutex);
+  while (running_threads)
+    pthread_cond_wait(&cond, &mutex);
+  pthread_mutex_unlock(&mutex);
+
+  now= my_getsystime()-now;
+  ok(!bad, "tested %s in %g secs (%d)", test, ((double)now)/1e7, bad);
+}
+
+int main(int argc __attribute__((unused)), char **argv)
+{
+  MY_INIT("thd_template");
+
+  if (argv[1] && *argv[1])
+    DBUG_SET_INITIAL(argv[1]);
+
+  pthread_mutex_init(&mutex, 0);
+  pthread_cond_init(&cond, 0);
+  pthread_attr_init(&thr_attr);
+  pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
+
+#ifdef MY_ATOMIC_MODE_RWLOCKS
+#if defined(HPUX11) || defined(__POWERPC__) /* showed to be very slow (scheduler-related)
*/
+#define CYCLES 300
+#else
+#define CYCLES 3000
+#endif
+#else
+#define CYCLES 3000
+#endif
+#define THREADS 30
+
+  diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE);
+
+  do_tests();
+
+  /*
+    workaround until we know why it crashes randomly on some machine
+    (BUG#22320).
+  */
+  sleep(2);
+  pthread_mutex_destroy(&mutex);
+  pthread_cond_destroy(&cond);
+  pthread_attr_destroy(&thr_attr);
+  my_end(0);
+  return exit_status();
+}
+

=== added file 'unittest/mysys/waiting_threads-t.c'
--- a/unittest/mysys/waiting_threads-t.c	1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/waiting_threads-t.c	2008-10-12 10:09:52 +0000
@@ -0,0 +1,269 @@
+/* Copyright (C) 2006 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include "thr_template.c"
+#include <waiting_threads.h>
+#include <m_string.h>
+#include <locale.h>
+
+struct test_wt_thd {
+  WT_THD thd;
+  pthread_mutex_t lock;
+} thds[THREADS];
+
+uint i, cnt;
+pthread_mutex_t lock;
+
+ulong wt_timeout_short=100, wt_deadlock_search_depth_short=4;
+ulong wt_timeout_long=10000, wt_deadlock_search_depth_long=15;
+
+#define reset(ARRAY) bzero(ARRAY, sizeof(ARRAY))
+
+enum { LATEST, RANDOM, YOUNGEST, LOCKS } kill_strategy;
+
+WT_RESOURCE_TYPE restype={ wt_resource_id_memcmp, 0};
+
+#define rnd() ((uint)(my_rnd(&rand) * INT_MAX32))
+
+/*
+  stress test: wait on a random number of random threads.
+  it always succeeds (unless crashes or hangs).
+*/
+pthread_handler_t test_wt(void *arg)
+{
+  int    m, n, i, id, res;
+  struct my_rnd_struct rand;
+
+  my_thread_init();
+
+  pthread_mutex_lock(&lock);
+  id= cnt++;
+  pthread_mutex_unlock(&lock);
+
+  my_rnd_init(&rand, (ulong)(intptr)&m, id);
+  if (kill_strategy == YOUNGEST)
+    thds[id].thd.weight= (ulong)~my_getsystime();
+  if (kill_strategy == LOCKS)
+    thds[id].thd.weight= 0;
+
+  for (m= *(int *)arg; m ; m--)
+  {
+    WT_RESOURCE_ID resid;
+    int blockers[THREADS/10], j, k;
+
+    resid.value= id;
+    resid.type= &restype;
+
+    res= 0;
+
+    for (j= n= (rnd() % THREADS)/10; !res && j >= 0; j--)
+    {
+retry:
+      i= rnd() % (THREADS-1);
+      if (i >= id) i++;
+
+      for (k=n; k >=j; k--)
+        if (blockers[k] == i)
+          goto retry;
+      blockers[j]= i;
+
+      if (kill_strategy == RANDOM)
+        thds[id].thd.weight= rnd();
+
+      pthread_mutex_lock(& thds[i].lock);
+      res= wt_thd_will_wait_for(& thds[id].thd, & thds[i].thd, &resid);
+      pthread_mutex_unlock(& thds[i].lock);
+    }
+
+    if (!res)
+    {
+      pthread_mutex_lock(&lock);
+      res= wt_thd_cond_timedwait(& thds[id].thd, &lock);
+      pthread_mutex_unlock(&lock);
+    }
+
+    if (res)
+    {
+      pthread_mutex_lock(& thds[id].lock);
+      pthread_mutex_lock(&lock);
+      wt_thd_release_all(& thds[id].thd);
+      pthread_mutex_unlock(&lock);
+      pthread_mutex_unlock(& thds[id].lock);
+      if (kill_strategy == LOCKS)
+        thds[id].thd.weight= 0;
+      if (kill_strategy == YOUNGEST)
+        thds[id].thd.weight= (ulong)~my_getsystime();
+    }
+    else if (kill_strategy == LOCKS)
+      thds[id].thd.weight++;
+  }
+
+  pthread_mutex_lock(& thds[id].lock);
+  pthread_mutex_lock(&lock);
+  wt_thd_release_all(& thds[id].thd);
+  pthread_mutex_unlock(&lock);
+  pthread_mutex_unlock(& thds[id].lock);
+
+#ifndef DBUG_OFF
+  {
+#define DEL "(deleted)"
+    char *x=malloc(strlen(thds[id].thd.name)+sizeof(DEL)+1);
+    strxmov(x, thds[id].thd.name, DEL, 0);
+    thds[id].thd.name=x; /* it's a memory leak, go on, shoot me */
+  }
+#endif
+
+  pthread_mutex_lock(&mutex);
+  if (!--running_threads) pthread_cond_signal(&cond);
+  pthread_mutex_unlock(&mutex);
+  DBUG_PRINT("wt", ("exiting"));
+  my_thread_end();
+  return 0;
+}
+
+void do_one_test()
+{
+  double sum, sum0;
+
+  reset(wt_cycle_stats);
+  reset(wt_wait_stats);
+  wt_success_stats=0;
+  cnt=0;
+  test_concurrently("waiting_threads", test_wt, THREADS, CYCLES);
+
+  sum=sum0=0;
+  for (cnt=0; cnt < WT_CYCLE_STATS; cnt++)
+    sum+= wt_cycle_stats[0][cnt] + wt_cycle_stats[1][cnt];
+  for (cnt=0; cnt < WT_CYCLE_STATS; cnt++)
+    if (wt_cycle_stats[0][cnt] + wt_cycle_stats[1][cnt] > 0)
+    {
+      sum0+=wt_cycle_stats[0][cnt] + wt_cycle_stats[1][cnt];
+      diag("deadlock cycles of length %2u: %4u %4u %8.2f %%", cnt,
+           wt_cycle_stats[0][cnt], wt_cycle_stats[1][cnt], 1e2*sum0/sum);
+    }
+  diag("depth exceeded: %u %u",
+       wt_cycle_stats[0][cnt], wt_cycle_stats[1][cnt]);
+  for (cnt=0; cnt < WT_WAIT_STATS; cnt++)
+    if (wt_wait_stats[cnt]>0)
+      diag("deadlock waits up to %7llu us: %5u",
+           wt_wait_table[cnt], wt_wait_stats[cnt]);
+  diag("timed out: %u", wt_wait_stats[cnt]);
+  diag("successes: %u", wt_success_stats);
+}
+
+void do_tests()
+{
+  plan(12);
+  compile_time_assert(THREADS >= 3);
+
+  DBUG_PRINT("wt", ("================= initialization ==================="));
+
+  bad= my_atomic_initialize();
+  ok(!bad, "my_atomic_initialize() returned %d", bad);
+
+  pthread_mutex_init(&lock, 0);
+  wt_init();
+  for (cnt=0; cnt < THREADS; cnt++)
+  {
+    wt_thd_lazy_init(& thds[cnt].thd,
+                     & wt_deadlock_search_depth_short, & wt_timeout_short,
+                     & wt_deadlock_search_depth_long, & wt_timeout_long);
+    pthread_mutex_init(& thds[cnt].lock, 0);
+  }
+  {
+    WT_RESOURCE_ID resid[3];
+    for (i=0; i < 3; i++)
+    {
+      resid[i].value= i+1;
+      resid[i].type= &restype;
+    }
+
+    DBUG_PRINT("wt", ("================= manual test ==================="));
+
+#define ok_wait(X,Y, R) \
+    ok(wt_thd_will_wait_for(& thds[X].thd, & thds[Y].thd, &resid[R]) == 0, \
+      "thd[" #X "] will wait for thd[" #Y "]")
+#define ok_deadlock(X,Y,R) \
+    ok(wt_thd_will_wait_for(& thds[X].thd, & thds[Y].thd, &resid[R]) ==
WT_DEADLOCK, \
+      "thd[" #X "] will wait for thd[" #Y "] - deadlock")
+
+    ok_wait(0,1,0);
+    ok_wait(0,2,0);
+    ok_wait(0,3,0);
+
+    pthread_mutex_lock(&lock);
+    bad= wt_thd_cond_timedwait(& thds[0].thd, &lock);
+    pthread_mutex_unlock(&lock);
+    ok(bad == ETIMEDOUT, "timeout test returned %d", bad);
+
+    ok_wait(0,1,0);
+    ok_wait(1,2,1);
+    ok_deadlock(2,0,2);
+
+    pthread_mutex_lock(&lock);
+    wt_thd_cond_timedwait(& thds[0].thd, &lock);
+    wt_thd_cond_timedwait(& thds[1].thd, &lock);
+    wt_thd_release_all(& thds[0].thd);
+    wt_thd_release_all(& thds[1].thd);
+    wt_thd_release_all(& thds[2].thd);
+    wt_thd_release_all(& thds[3].thd);
+    pthread_mutex_unlock(&lock);
+
+    for (cnt=0; cnt < 3; cnt++)
+    {
+      wt_thd_destroy(& thds[cnt].thd);
+      wt_thd_lazy_init(& thds[cnt].thd,
+                       & wt_deadlock_search_depth_short, & wt_timeout_short,
+                       & wt_deadlock_search_depth_long, & wt_timeout_long);
+    }
+  }
+
+  wt_deadlock_search_depth_short=6;
+  wt_timeout_short=1000;
+  wt_timeout_long= 100;
+  wt_deadlock_search_depth_long=16;
+  DBUG_PRINT("wt", ("================= stress test ==================="));
+
+  diag("timeout_short=%lu us, deadlock_search_depth_short=%lu",
+       wt_timeout_short, wt_deadlock_search_depth_short);
+  diag("timeout_long=%lu us, deadlock_search_depth_long=%lu",
+       wt_timeout_long, wt_deadlock_search_depth_long);
+
+#define test_kill_strategy(X)                   \
+  diag("kill strategy: " #X);                   \
+  kill_strategy=X;                              \
+  do_one_test();
+
+  test_kill_strategy(LATEST);
+  SKIP_BIG_TESTS(1)
+  {
+    test_kill_strategy(RANDOM);
+  }
+  test_kill_strategy(YOUNGEST);
+  test_kill_strategy(LOCKS);
+
+  DBUG_PRINT("wt", ("================= cleanup ==================="));
+  pthread_mutex_lock(&lock);
+  for (cnt=0; cnt < THREADS; cnt++)
+  {
+    wt_thd_release_all(& thds[cnt].thd);
+    wt_thd_destroy(& thds[cnt].thd);
+    pthread_mutex_destroy(& thds[cnt].lock);
+  }
+  pthread_mutex_unlock(&lock);
+  wt_end();
+  pthread_mutex_destroy(&lock);
+}
+

=== removed file 'win/build-vs9.bat.moved'
--- a/win/build-vs9.bat.moved	2008-09-12 08:58:52 +0000
+++ b/win/build-vs9.bat.moved	1970-01-01 00:00:00 +0000
@@ -1,18 +0,0 @@
-@echo off
-
-REM Copyright (C) 2006 MySQL AB
-REM 
-REM This program is free software; you can redistribute it and/or modify
-REM it under the terms of the GNU General Public License as published by
-REM the Free Software Foundation; version 2 of the License.
-REM 
-REM This program is distributed in the hope that it will be useful,
-REM but WITHOUT ANY WARRANTY; without even the implied warranty of
-REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-REM GNU General Public License for more details.
-REM 
-REM You should have received a copy of the GNU General Public License
-REM along with this program; if not, write to the Free Software
-REM Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-cmake -G "Visual Studio 9 2008"
-

=== removed file 'win/build-vs9_x64.bat.moved'
--- a/win/build-vs9_x64.bat.moved	2008-09-12 08:58:52 +0000
+++ b/win/build-vs9_x64.bat.moved	1970-01-01 00:00:00 +0000
@@ -1,18 +0,0 @@
-@echo off
-
-REM Copyright (C) 2006 MySQL AB
-REM 
-REM This program is free software; you can redistribute it and/or modify
-REM it under the terms of the GNU General Public License as published by
-REM the Free Software Foundation; version 2 of the License.
-REM 
-REM This program is distributed in the hope that it will be useful,
-REM but WITHOUT ANY WARRANTY; without even the implied warranty of
-REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-REM GNU General Public License for more details.
-REM 
-REM You should have received a copy of the GNU General Public License
-REM along with this program; if not, write to the Free Software
-REM Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-cmake -G "Visual Studio 9 2008 Win64"
-

Thread
bzr commit into mysql-6.0 branch (guilhem:2742) Guilhem Bichot21 Oct