#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/
2668 Michael Widenius 2008-09-06 [merge]
Automatic merge
added:
include/waiting_threads.h
mysys/waiting_threads.c
unittest/mysys/lf-t.c
unittest/mysys/thr_template.c
unittest/mysys/waiting_threads-t.c
modified:
client/mysqltest.c
configure.in
include/Makefile.am
include/atomic/generic-msvc.h
include/lf.h
include/my_global.h
include/my_pthread.h
include/my_sys.h
mysql-test/r/maria.result
mysql-test/t/maria.test
mysys/CMakeLists.txt
mysys/Makefile.am
mysys/array.c
mysys/lf_alloc-pin.c
mysys/lf_hash.c
mysys/my_static.c
mysys/my_thr_init.c
server-tools/instance-manager/thread_registry.cc
sql-common/client.c
sql/mysqld.cc
sql/set_var.cc
sql/sql_class.cc
sql/sql_class.h
sql/sql_profile.cc
sql/sql_profile.h
storage/maria/ha_maria.cc
storage/maria/ma_blockrec.c
storage/maria/ma_checkpoint.c
storage/maria/ma_commit.c
storage/maria/ma_create.c
storage/maria/ma_delete_table.c
storage/maria/ma_loghandler.c
storage/maria/ma_rename.c
storage/maria/ma_state.c
storage/maria/ma_static.c
storage/maria/ma_write.c
storage/maria/maria_def.h
storage/maria/trnman.c
storage/maria/trnman.h
storage/maria/trnman_public.h
storage/maria/unittest/ma_test_loghandler-t.c
storage/maria/unittest/ma_test_loghandler_first_lsn-t.c
storage/maria/unittest/ma_test_loghandler_max_lsn-t.c
storage/maria/unittest/ma_test_loghandler_multigroup-t.c
storage/maria/unittest/ma_test_loghandler_multithread-t.c
storage/maria/unittest/ma_test_loghandler_noflush-t.c
storage/maria/unittest/ma_test_loghandler_nologs-t.c
storage/maria/unittest/ma_test_loghandler_pagecache-t.c
storage/maria/unittest/ma_test_loghandler_purge-t.c
storage/maria/unittest/trnman-t.c
storage/myisam/mi_check.c
storage/myisam/mi_page.c
storage/myisam/mi_search.c
storage/myisammrg/myrg_create.c
unittest/mysys/CMakeLists.txt
unittest/mysys/Makefile.am
unittest/mysys/my_atomic-t.c
=== modified file 'client/mysqltest.c'
--- a/client/mysqltest.c 2008-07-01 20:47:09 +0000
+++ b/client/mysqltest.c 2008-08-02 06:09:28 +0000
@@ -2815,7 +2815,7 @@ void do_mkdir(struct st_command *command
int error;
static DYNAMIC_STRING ds_dirname;
const struct command_arg mkdir_args[] = {
- "dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to create"
+ {"dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to create"}
};
DBUG_ENTER("do_mkdir");
@@ -2845,7 +2845,7 @@ void do_rmdir(struct st_command *command
int error;
static DYNAMIC_STRING ds_dirname;
const struct command_arg rmdir_args[] = {
- "dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to remove"
+ { "dirname", ARG_STRING, TRUE, &ds_dirname, "Directory to remove" }
};
DBUG_ENTER("do_rmdir");
=== modified file 'configure.in'
--- a/configure.in 2008-06-05 19:48:43 +0000
+++ b/configure.in 2008-07-29 14:10:24 +0000
@@ -250,8 +250,6 @@ test -z "$INSTALL_SCRIPT" && INSTALL_SCR
# Not critical since the generated file is distributed
AC_CHECK_PROGS(YACC, ['bison -y -p MYSQL'])
-AC_CHECK_PROG(PDFMANUAL, pdftex, manual.pdf)
-AC_CHECK_PROG(DVIS, tex, manual.dvi)
#check the return type of sprintf
AC_MSG_CHECKING("return type of sprintf")
@@ -1726,41 +1724,43 @@ fi
AC_ARG_WITH([atomic-ops],
AC_HELP_STRING([--with-atomic-ops=rwlocks|smp|up],
[Implement atomic operations using pthread rwlocks or atomic CPU
- instructions for multi-processor (default) or uniprocessor
- configuration]), , [with_atomic_ops=smp])
+ instructions for multi-processor or uniprocessor
+ configuration. By default gcc built-in sync functions are used,
+ if available and 'smp' configuration otherwise.]))
case "$with_atomic_ops" in
"up") AC_DEFINE([MY_ATOMIC_MODE_DUMMY], [1],
[Assume single-CPU mode, no concurrency]) ;;
"rwlocks") AC_DEFINE([MY_ATOMIC_MODE_RWLOCKS], [1],
[Use pthread rwlocks for atomic ops]) ;;
"smp") ;;
+ "")
+ AC_CACHE_CHECK([whether the compiler provides atomic builtins],
+ [mysql_cv_gcc_atomic_builtins], [AC_TRY_RUN([
+ int main()
+ {
+ int foo= -10; int bar= 10;
+ if (!__sync_fetch_and_add(&foo, bar) || foo)
+ return -1;
+ bar= __sync_lock_test_and_set(&foo, bar);
+ if (bar || foo != 10)
+ return -1;
+ bar= __sync_val_compare_and_swap(&bar, foo, 15);
+ if (bar)
+ return -1;
+ return 0;
+ }
+ ], [mysql_cv_gcc_atomic_builtins=yes_but_disabled],
+ [mysql_cv_gcc_atomic_builtins=no],
+ [mysql_cv_gcc_atomic_builtins=no])])
+
+ if test "x$mysql_cv_gcc_atomic_builtins" = xyes; then
+ AC_DEFINE(HAVE_GCC_ATOMIC_BUILTINS, 1,
+ [Define to 1 if compiler provides atomic builtins.])
+ fi
+ ;;
*) AC_MSG_ERROR(["$with_atomic_ops" is not a valid value for --with-atomic-ops]) ;;
esac
-AC_CACHE_CHECK([whether the compiler provides atomic builtins],
- [mysql_cv_gcc_atomic_builtins], [AC_TRY_RUN([
- int main()
- {
- int foo= -10; int bar= 10;
- if (!__sync_fetch_and_add(&foo, bar) || foo)
- return -1;
- bar= __sync_lock_test_and_set(&foo, bar);
- if (bar || foo != 10)
- return -1;
- bar= __sync_val_compare_and_swap(&bar, foo, 15);
- if (bar)
- return -1;
- return 0;
- }
-], [mysql_cv_gcc_atomic_builtins=yes],
- [mysql_cv_gcc_atomic_builtins=no],
- [mysql_cv_gcc_atomic_builtins=no])])
-
-if test "x$mysql_cv_gcc_atomic_builtins" = disabled_xyes; then
- AC_DEFINE(HAVE_GCC_ATOMIC_BUILTINS, 1,
- [Define to 1 if compiler provides atomic builtins.])
-fi
-
# Force static compilation to avoid linking problems/get more speed
AC_ARG_WITH(mysqld-ldflags,
[ --with-mysqld-ldflags Extra linking arguments for mysqld],
@@ -2702,7 +2702,7 @@ then
AC_DEFINE([THREAD], [1],
[Define if you want to have threaded code. This may be undef on client code])
# Avoid _PROGRAMS names
- THREAD_LOBJECTS="thr_alarm.o thr_lock.o thr_mutex.o thr_rwlock.o my_pthread.o my_thr_init.o mf_keycache.o"
+ THREAD_LOBJECTS="thr_alarm.o thr_lock.o thr_mutex.o thr_rwlock.o my_pthread.o my_thr_init.o mf_keycache.o waiting_threads.o"
AC_SUBST(THREAD_LOBJECTS)
server_scripts="mysqld_safe mysql_install_db"
sql_server_dirs="strings mysys dbug extra regex"
=== modified file 'include/Makefile.am'
--- a/include/Makefile.am 2008-04-28 16:24:05 +0000
+++ b/include/Makefile.am 2008-08-29 18:51:02 +0000
@@ -36,7 +36,8 @@ noinst_HEADERS = config-win.h config-net
mysql_version.h.in my_handler.h my_time.h \
my_vle.h my_user.h my_atomic.h atomic/nolock.h \
atomic/rwlock.h atomic/x86-gcc.h atomic/generic-msvc.h \
- atomic/gcc_builtins.h my_libwrap.h wqueue.h
+ atomic/gcc_builtins.h my_libwrap.h wqueue.h \
+ waiting_threads.h
# Remove built files and the symlinked directories
CLEANFILES = $(BUILT_SOURCES) readline openssl
=== modified file 'include/atomic/generic-msvc.h'
--- a/include/atomic/generic-msvc.h 2008-01-10 12:21:53 +0000
+++ b/include/atomic/generic-msvc.h 2008-08-31 17:00:02 +0000
@@ -52,8 +52,8 @@ LONG _InterlockedExchangeAdd (LONG volat
#endif /*_M_IX86*/
#define MY_ATOMIC_MODE "msvc-intrinsics"
-#define IL_EXCHG_ADD32 InterlockedExchangeAdd
-#define IL_COMP_EXCHG32 InterlockedCompareExchange
+#define IL_EXCHG_ADD32(X,Y) InterlockedExchangeAdd((volatile LONG *)(X),(Y))
+#define IL_COMP_EXCHG32(X,Y,Z) InterlockedCompareExchange((volatile LONG *)(X),(Y),(Z))
#define IL_COMP_EXCHGptr InterlockedCompareExchangePointer
#define IL_EXCHG32 InterlockedExchange
#define IL_EXCHGptr InterlockedExchangePointer
=== modified file 'include/lf.h'
--- a/include/lf.h 2007-12-18 22:22:55 +0000
+++ b/include/lf.h 2008-07-29 14:10:24 +0000
@@ -110,7 +110,7 @@ typedef struct {
typedef struct {
void * volatile pin[LF_PINBOX_PINS];
LF_PINBOX *pinbox;
- void *stack_ends_here;
+ void **stack_ends_here;
void *purgatory;
uint32 purgatory_count;
uint32 volatile link;
@@ -166,8 +166,8 @@ void lf_pinbox_init(LF_PINBOX *pinbox, u
void lf_pinbox_destroy(LF_PINBOX *pinbox);
lock_wrap(lf_pinbox_get_pins, LF_PINS *,
- (LF_PINBOX *pinbox, void *stack_end),
- (pinbox, stack_end),
+ (LF_PINBOX *pinbox),
+ (pinbox),
&pinbox->pinarray.lock)
lock_wrap_void(lf_pinbox_put_pins,
(LF_PINS *pins),
@@ -182,15 +182,13 @@ lock_wrap_void(lf_pinbox_free,
memory allocator, lf_alloc-pin.c
*/
-struct st_lf_alloc_node {
- struct st_lf_alloc_node *next;
-};
-
typedef struct st_lf_allocator {
LF_PINBOX pinbox;
- struct st_lf_alloc_node * volatile top;
+ uchar * volatile top;
uint element_size;
uint32 volatile mallocs;
+ void (*constructor)(uchar *);
+ void (*destructor)(uchar *);
} LF_ALLOCATOR;
void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset);
@@ -202,8 +200,8 @@ uint lf_alloc_pool_count(LF_ALLOCATOR *a
*/
#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR))
#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR))
-#define _lf_alloc_get_pins(A, ST) _lf_pinbox_get_pins(&(A)->pinbox, (ST))
-#define lf_alloc_get_pins(A, ST) lf_pinbox_get_pins(&(A)->pinbox, (ST))
+#define _lf_alloc_get_pins(A) _lf_pinbox_get_pins(&(A)->pinbox)
+#define lf_alloc_get_pins(A) lf_pinbox_get_pins(&(A)->pinbox)
#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS)
#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS)
#define lf_alloc_direct_free(ALLOC, ADDR) my_free((uchar*)(ADDR), MYF(0))
@@ -220,13 +218,17 @@ lock_wrap(lf_alloc_new, void *,
#define LF_HASH_UNIQUE 1
+/* lf_hash overhead per element (that is, sizeof(LF_SLIST) */
+#define LF_HASH_OVERHEAD (sizeof(int*)*4)
+
typedef struct {
LF_DYNARRAY array; /* hash itself */
LF_ALLOCATOR alloc; /* allocator for elements */
hash_get_key get_key; /* see HASH */
CHARSET_INFO *charset; /* see HASH */
uint key_offset, key_length; /* see HASH */
- uint element_size, flags; /* LF_HASH_UNIQUE, etc */
+ uint element_size; /* size of memcpy'ed area on insert */
+ uint flags; /* LF_HASH_UNIQUE, etc */
int32 volatile size; /* size of array */
int32 volatile count; /* number of elements in the hash */
} LF_HASH;
@@ -242,8 +244,8 @@ int lf_hash_delete(LF_HASH *hash, LF_PIN
shortcut macros to access underlying pinbox functions from an LF_HASH
see _lf_pinbox_get_pins() and _lf_pinbox_put_pins()
*/
-#define _lf_hash_get_pins(HASH, ST) _lf_alloc_get_pins(&(HASH)->alloc, (ST))
-#define lf_hash_get_pins(HASH, ST) lf_alloc_get_pins(&(HASH)->alloc, (ST))
+#define _lf_hash_get_pins(HASH) _lf_alloc_get_pins(&(HASH)->alloc)
+#define lf_hash_get_pins(HASH) lf_alloc_get_pins(&(HASH)->alloc)
#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS)
#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS)
#define lf_hash_search_unpin(PINS) lf_unpin((PINS), 2)
=== modified file 'include/my_global.h'
--- a/include/my_global.h 2008-06-10 14:44:44 +0000
+++ b/include/my_global.h 2008-08-27 12:15:06 +0000
@@ -1518,7 +1518,7 @@ inline void operator delete[](void*, vo
#if !defined(max)
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
+#endif
/*
Only Linux is known to need an explicit sync of the directory to make sure a
file creation/deletion/renaming in(from,to) this directory durable.
@@ -1531,4 +1531,23 @@ inline void operator delete[](void*, vo
#define bool In_C_you_should_use_my_bool_instead()
#endif
+/* Provide __func__ macro definition for platforms that miss it. */
+#if __STDC_VERSION__ < 199901L
+# if __GNUC__ >= 2
+# define __func__ __FUNCTION__
+# else
+# define __func__ "<unknown>"
+# endif
+#elif defined(_MSC_VER)
+# if _MSC_VER < 1300
+# define __func__ "<unknown>"
+# else
+# define __func__ __FUNCTION__
+# endif
+#elif defined(__BORLANDC__)
+# define __func__ __FUNC__
+#else
+# define __func__ "<unknown>"
+#endif
+
#endif /* my_global_h */
=== modified file 'include/my_pthread.h'
--- a/include/my_pthread.h 2008-04-28 16:24:05 +0000
+++ b/include/my_pthread.h 2008-08-31 15:20:20 +0000
@@ -79,25 +79,27 @@ typedef void * (__cdecl *pthread_handler
so it can be used directly as a 64 bit value. The value
stored is in 100ns units.
*/
- union ft64 {
+union ft64 {
FILETIME ft;
__int64 i64;
- };
+};
+
struct timespec {
union ft64 tv;
/* The max timeout value in millisecond for pthread_cond_timedwait */
long max_timeout_msec;
};
-#define set_timespec(ABSTIME,SEC) { \
- GetSystemTimeAsFileTime(&((ABSTIME).tv.ft)); \
- (ABSTIME).tv.i64+= (__int64)(SEC)*10000000; \
- (ABSTIME).max_timeout_msec= (long)((SEC)*1000); \
-}
-#define set_timespec_nsec(ABSTIME,NSEC) { \
- GetSystemTimeAsFileTime(&((ABSTIME).tv.ft)); \
- (ABSTIME).tv.i64+= (__int64)(NSEC)/100; \
- (ABSTIME).max_timeout_msec= (long)((NSEC)/1000000); \
-}
+
+#define set_timespec_time_nsec(ABSTIME,TIME,NSEC) do { \
+ (ABSTIME).tv.i64= (TIME)+(__int64)(NSEC)/100; \
+ (ABSTIME).max_timeout_msec= (long)((NSEC)/1000000); \
+} while(0)
+
+#define set_timespec_nsec(ABSTIME,NSEC) do { \
+ union ft64 tv; \
+ GetSystemTimeAsFileTime(&tv.ft); \
+ set_timespec_time_nsec((ABSTIME), tv.i64, (NSEC)); \
+} while(0)
void win_pthread_init(void);
int win_pthread_setspecific(void *A,void *B,uint length);
@@ -416,43 +418,33 @@ int my_pthread_mutex_trylock(pthread_mut
for calculating an absolute time at which
pthread_cond_timedwait should timeout
*/
-#ifdef HAVE_TIMESPEC_TS_SEC
-#ifndef set_timespec
-#define set_timespec(ABSTIME,SEC) \
-{ \
- (ABSTIME).ts_sec=time(0) + (time_t) (SEC); \
- (ABSTIME).ts_nsec=0; \
-}
-#endif /* !set_timespec */
+
+#define set_timespec(ABSTIME,SEC) set_timespec_nsec((ABSTIME),(SEC)*1000000000ULL)
+
#ifndef set_timespec_nsec
-#define set_timespec_nsec(ABSTIME,NSEC) \
-{ \
- ulonglong now= my_getsystime() + (NSEC/100); \
- (ABSTIME).ts_sec= (now / ULL(10000000)); \
- (ABSTIME).ts_nsec= (now % ULL(10000000) * 100 + ((NSEC) % 100)); \
-}
+#define set_timespec_nsec(ABSTIME,NSEC) \
+ set_timespec_time_nsec((ABSTIME),my_getsystime(),(NSEC))
#endif /* !set_timespec_nsec */
+
+/* adapt for two different flavors of struct timespec */
+#ifdef HAVE_TIMESPEC_TS_SEC
+#define TV_sec ts_sec
+#define TV_nsec ts_nsec
#else
-#ifndef set_timespec
-#define set_timespec(ABSTIME,SEC) \
-{\
- struct timeval tv;\
- gettimeofday(&tv,0);\
- (ABSTIME).tv_sec=tv.tv_sec+(time_t) (SEC);\
- (ABSTIME).tv_nsec=tv.tv_usec*1000;\
-}
-#endif /* !set_timespec */
-#ifndef set_timespec_nsec
-#define set_timespec_nsec(ABSTIME,NSEC) \
-{\
- ulonglong now= my_getsystime() + (NSEC/100); \
- (ABSTIME).tv_sec= (time_t) (now / ULL(10000000)); \
- (ABSTIME).tv_nsec= (long) (now % ULL(10000000) * 100 + ((NSEC) % 100)); \
-}
-#endif /* !set_timespec_nsec */
+#define TV_sec tv_sec
+#define TV_nsec tv_nsec
#endif /* HAVE_TIMESPEC_TS_SEC */
- /* safe_mutex adds checking to mutex for easier debugging */
+#ifndef set_timespec_time_nsec
+#define set_timespec_time_nsec(ABSTIME,TIME,NSEC) do { \
+ ulonglong nsec= (NSEC); \
+ ulonglong now= (TIME) + (nsec/100); \
+ (ABSTIME).TV_sec= (now / ULL(10000000)); \
+ (ABSTIME).TV_nsec= (now % ULL(10000000) * 100 + (nsec % 100)); \
+} while(0)
+#endif /* !set_timespec_time_nsec */
+
+/* safe_mutex adds checking to mutex for easier debugging */
#if defined(__NETWARE__) && !defined(SAFE_MUTEX_DETECT_DESTROY)
#define SAFE_MUTEX_DETECT_DESTROY
@@ -692,6 +684,7 @@ struct st_my_thread_var
struct st_my_thread_var *next,**prev;
void *opt_info;
uint lock_type; /* used by conditional release the queue */
+ void *stack_ends_here;
#ifndef DBUG_OFF
void *dbug;
char name[THREAD_NAME_SIZE+1];
=== modified file 'include/my_sys.h'
--- a/include/my_sys.h 2008-04-29 06:26:37 +0000
+++ b/include/my_sys.h 2008-08-27 12:15:06 +0000
@@ -220,6 +220,9 @@ extern int (*fatal_error_handler_hook)(u
extern uint my_file_limit;
extern ulong my_thread_stack_size;
+extern const char *(*proc_info_hook)(void *, const char *, const char *,
+ const char *, const unsigned int);
+
#ifdef HAVE_LARGE_PAGES
extern my_bool my_use_large_pages;
extern uint my_large_page_size;
=== added file 'include/waiting_threads.h'
--- a/include/waiting_threads.h 1970-01-01 00:00:00 +0000
+++ b/include/waiting_threads.h 2008-09-01 19:43:11 +0000
@@ -0,0 +1,161 @@
+/* Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _waiting_threads_h
+#define _waiting_threads_h
+
+#include <my_global.h>
+#include <my_sys.h>
+
+C_MODE_START
+
+#include <lf.h>
+
+typedef struct st_wt_resource_id WT_RESOURCE_ID;
+
+typedef struct st_wt_resource_type {
+ int (*compare)(void *a, void *b);
+ const void *(*make_key)(WT_RESOURCE_ID *id, uint *len);
+} WT_RESOURCE_TYPE;
+
+struct st_wt_resource_id {
+ WT_RESOURCE_TYPE *type;
+ ulonglong value;
+};
+
+#define WT_WAIT_STATS 24
+#define WT_CYCLE_STATS 32
+extern ulonglong wt_wait_table[WT_WAIT_STATS];
+extern uint32 wt_wait_stats[WT_WAIT_STATS+1];
+extern uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1];
+extern uint32 wt_success_stats;
+
+/*
+ 'lock' protects 'owners', 'state', and 'waiter_count'
+ 'id' is read-only
+
+ a resource is picked up from a hash in a lock-free manner
+ it's returned pinned, so it cannot be freed at once
+ but it may be freed right after the pin is removed
+ to free a resource it should be
+ 1. have no owners
+ 2. have no waiters
+
+ two ways to access a resource:
+ 1. find it in a hash
+ - it's returned pinned.
+ a) take a lock in exclusive mode
+ b) check the state, it should be ACTIVE
+ c) unpin
+ 2. by a direct reference
+ - could only used if a resource cannot be freed
+ e.g. accessing a resource by thd->waiting_for is safe,
+ a resource cannot be freed as there's a thread waiting for it
+*/
+
+typedef struct st_wt_resource {
+ WT_RESOURCE_ID id;
+ uint waiter_count;
+ enum { ACTIVE, FREE } state;
+#ifndef DBUG_OFF
+ pthread_mutex_t *mutex;
+#endif
+ /*
+ before the 'lock' all elements are mutable, after - immutable
+ in the sense that lf_hash_insert() won't memcpy() over them.
+ See wt_init().
+ */
+ rw_lock_t lock;
+ pthread_cond_t cond;
+ DYNAMIC_ARRAY owners;
+} WT_RESOURCE;
+
+typedef struct st_wt_thd {
+ /*
+ XXX
+ there's no protection (mutex) against concurrent access of
+ the dynarray below. it is assumed that a caller will have it
+ automatically (not to protect this array but to protect its
+ own - caller's - data structures, and we'll get it for free.
+ If not, we'll need to add a mutex
+ */
+ DYNAMIC_ARRAY my_resources;
+ /*
+ 'waiting_for' is modified under waiting_for->lock, and only by thd itself
+ 'waiting_for' is read lock-free (using pinning protocol), but a thd object
+ can read its own 'waiting_for' without any locks or tricks.
+ */
+ WT_RESOURCE *waiting_for;
+ LF_PINS *pins;
+
+ /* pointers to values */
+ ulong *timeout_short, *deadlock_search_depth_short;
+ ulong *timeout_long, *deadlock_search_depth_long;
+
+ /*
+ weight relates to the desirability of a transaction being killed if it's
+ part of a deadlock. In a deadlock situation transactions with lower weights
+ are killed first.
+
+ Examples of using the weight to implement different selection strategies:
+
+ 1. Latest
+ Keep all weights equal.
+ 2. Random
+ Assight weights at random.
+ (variant: modify a weight randomly before every lock request)
+ 3. Youngest
+ Set weight to -NOW()
+ 4. Minimum locks
+ count locks granted in your lock manager, store the value as a weight
+ 5. Minimum work
+ depends on the definition of "work". For example, store the number
+ of rows modifies in this transaction (or a length of REDO log for a
+ transaction) as a weight.
+
+ It is only statistically relevant and is not protected by any locks.
+ */
+ ulong volatile weight;
+ /*
+ 'killed' is indirectly protected by waiting_for->lock -
+ a killed thread needs to clear its 'waiting_for', and thus needs a lock.
+ That is a thread needs an exclusive lock to read 'killed' reliably.
+ But other threads may change 'killed' from 0 to 1, a shared
+ lock is enough for that.
+ */
+ my_bool volatile killed;
+#ifndef DBUG_OFF
+ const char *name;
+#endif
+} WT_THD;
+
+#define WT_TIMEOUT ETIMEDOUT
+#define WT_OK 0
+#define WT_DEADLOCK -1
+#define WT_DEPTH_EXCEEDED -2
+
+void wt_init(void);
+void wt_end(void);
+void wt_thd_lazy_init(WT_THD *, ulong *, ulong *, ulong *, ulong *);
+void wt_thd_destroy(WT_THD *);
+int wt_thd_will_wait_for(WT_THD *, WT_THD *, WT_RESOURCE_ID *);
+int wt_thd_cond_timedwait(WT_THD *, pthread_mutex_t *);
+void wt_thd_release(WT_THD *, WT_RESOURCE_ID *);
+#define wt_thd_release_all(THD) wt_thd_release((THD), 0)
+int wt_resource_id_memcmp(void *, void *);
+
+C_MODE_END
+
+#endif
=== modified file 'mysql-test/r/maria.result'
--- a/mysql-test/r/maria.result 2008-08-25 08:35:25 +0000
+++ b/mysql-test/r/maria.result 2008-08-28 18:52:23 +0000
@@ -1900,6 +1900,21 @@ check table t2 extended;
Table Op Msg_type Msg_text
test.t2 check status OK
drop table t2;
+set session deadlock_timeout_long=60000000;
+create table t1 (a int unique) transactional=1;
+insert t1 values (1);
+lock table t1 write concurrent;
+insert t1 values (2);
+set session deadlock_timeout_long=60000000;
+lock table t1 write concurrent;
+insert t1 values (3);
+insert t1 values (2);
+insert t1 values (3);
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+unlock tables;
+ERROR 23000: Duplicate entry '2' for key 'a'
+unlock tables;
+drop table t1;
CREATE TABLE t1 (
col0 float DEFAULT NULL,
col1 date DEFAULT NULL,
=== modified file 'mysql-test/t/maria.test'
--- a/mysql-test/t/maria.test 2008-08-25 08:35:25 +0000
+++ b/mysql-test/t/maria.test 2008-08-28 18:52:23 +0000
@@ -1186,6 +1186,33 @@ insert into t2 values (repeat('x',28)),
check table t2 extended;
drop table t2;
+#
+# an example of a deadlock
+#
+set session deadlock_timeout_long=60000000;
+create table t1 (a int unique) transactional=1;
+insert t1 values (1);
+lock table t1 write concurrent;
+insert t1 values (2);
+connect(con_d,localhost,root,,);
+set session deadlock_timeout_long=60000000;
+lock table t1 write concurrent;
+insert t1 values (3);
+send insert t1 values (2);
+connection default;
+let $wait_condition=select count(*) = 1 from information_schema.processlist where state="waiting for a resource";
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+insert t1 values (3);
+unlock tables;
+connection con_d;
+--error ER_DUP_ENTRY
+reap;
+unlock tables;
+disconnect con_d;
+connection default;
+drop table t1;
+
CREATE TABLE t1 (
col0 float DEFAULT NULL,
col1 date DEFAULT NULL,
=== modified file 'mysys/CMakeLists.txt'
--- a/mysys/CMakeLists.txt 2008-01-10 12:21:53 +0000
+++ b/mysys/CMakeLists.txt 2008-08-31 15:20:20 +0000
@@ -44,7 +44,7 @@ SET(MYSYS_SOURCES array.c charset-def.c
thr_rwlock.c tree.c typelib.c my_vle.c base64.c my_memmem.c my_getpagesize.c
lf_alloc-pin.c lf_dynarray.c lf_hash.c
my_atomic.c my_getncpus.c my_rnd.c
- my_uuid.c wqueue.c
+ my_uuid.c wqueue.c waiting_threads.c
)
IF(NOT SOURCE_SUBLIBS)
=== modified file 'mysys/Makefile.am'
--- a/mysys/Makefile.am 2008-04-28 16:24:05 +0000
+++ b/mysys/Makefile.am 2008-07-29 14:10:24 +0000
@@ -58,7 +58,7 @@ libmysys_a_SOURCES = my_init.c my_get
my_windac.c my_access.c base64.c my_libwrap.c \
wqueue.c
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
- thr_mutex.c thr_rwlock.c \
+ thr_mutex.c thr_rwlock.c waiting_threads.c \
CMakeLists.txt mf_soundex.c \
my_conio.c my_wincond.c my_winthread.c
libmysys_a_LIBADD = @THREAD_LOBJECTS@
=== modified file 'mysys/array.c'
--- a/mysys/array.c 2008-04-28 16:24:05 +0000
+++ b/mysys/array.c 2008-08-08 11:11:27 +0000
@@ -51,19 +51,14 @@ my_bool init_dynamic_array2(DYNAMIC_ARRA
if (init_alloc > 8 && alloc_increment > init_alloc * 2)
alloc_increment=init_alloc*2;
}
-
- if (!init_alloc)
- {
- init_alloc=alloc_increment;
- init_buffer= 0;
- }
array->elements=0;
array->max_element=init_alloc;
array->alloc_increment=alloc_increment;
array->size_of_element=element_size;
if ((array->buffer= init_buffer))
DBUG_RETURN(FALSE);
- if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,
+ if (init_alloc &&
+ !(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,
MYF(MY_WME))))
{
array->max_element=0;
=== modified file 'mysys/lf_alloc-pin.c'
--- a/mysys/lf_alloc-pin.c 2008-02-21 00:51:51 +0000
+++ b/mysys/lf_alloc-pin.c 2008-07-29 14:10:24 +0000
@@ -96,11 +96,10 @@
versioning a pointer - because we use an array, a pointer to pins is 16 bit,
upper 16 bits are used for a version.
- It is assumed that pins belong to a thread and are not transferable
- between threads (LF_PINS::stack_ends_here being a primary reason
+ It is assumed that pins belong to a THD and are not transferable
+ between THD's (LF_PINS::stack_ends_here being a primary reason
for this limitation).
*/
-
#include <my_global.h>
#include <my_sys.h>
#include <lf.h>
@@ -137,10 +136,6 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox
SYNOPSYS
pinbox -
- stack_end - a pointer to the end (top/bottom, depending on the
- STACK_DIRECTION) of stack. Used for safe alloca. There's
- no safety margin deducted, a caller should take care of it,
- if necessary.
DESCRIPTION
get a new LF_PINS structure from a stack of unused pins,
@@ -150,7 +145,7 @@ void lf_pinbox_destroy(LF_PINBOX *pinbox
It is assumed that pins belong to a thread and are not transferable
between threads.
*/
-LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end)
+LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox)
{
uint32 pins, next, top_ver;
LF_PINS *el;
@@ -194,7 +189,7 @@ LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *
el->link= pins;
el->purgatory_count= 0;
el->pinbox= pinbox;
- el->stack_ends_here= stack_end;
+ el->stack_ends_here= & my_thread_var->stack_ends_here;
return el;
}
@@ -325,6 +320,9 @@ static int match_pins(LF_PINS *el, void
#define available_stack_size(CUR,END) (long) ((char*)(END) - (char*)(CUR))
#endif
+#define next_node(P, X) (*((uchar **)(((uchar *)(X)) + (P)->free_ptr_offset)))
+#define anext_node(X) next_node(&allocator->pinbox, (X))
+
/*
Scan the purgatory and free everything that can be freed
*/
@@ -332,7 +330,7 @@ static void _lf_pinbox_real_free(LF_PINS
{
int npins, alloca_size;
void *list, **addr;
- struct st_lf_alloc_node *first, *last= NULL;
+ uchar *first, *last= NULL;
LF_PINBOX *pinbox= pins->pinbox;
LINT_INIT(first);
@@ -341,7 +339,7 @@ static void _lf_pinbox_real_free(LF_PINS
#ifdef HAVE_ALLOCA
alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins;
/* create a sorted list of pinned addresses, to speed up searches */
- if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size)
+ if (available_stack_size(&pinbox, *pins->stack_ends_here) > alloca_size)
{
struct st_harvester hv;
addr= (void **) alloca(alloca_size);
@@ -391,9 +389,9 @@ static void _lf_pinbox_real_free(LF_PINS
}
/* not pinned - freeing */
if (last)
- last= last->next= (struct st_lf_alloc_node *)cur;
+ last= next_node(pinbox, last)= (uchar *)cur;
else
- first= last= (struct st_lf_alloc_node *)cur;
+ first= last= (uchar *)cur;
continue;
found:
/* pinned - keeping */
@@ -412,22 +410,22 @@ LF_REQUIRE_PINS(1)
add it back to the allocator stack
DESCRIPTION
- 'first' and 'last' are the ends of the linked list of st_lf_alloc_node's:
+ 'first' and 'last' are the ends of the linked list of nodes:
first->el->el->....->el->last. Use first==last to free only one element.
*/
-static void alloc_free(struct st_lf_alloc_node *first,
- struct st_lf_alloc_node volatile *last,
+static void alloc_free(uchar *first,
+ uchar volatile *last,
LF_ALLOCATOR *allocator)
{
/*
we need a union here to access type-punned pointer reliably.
otherwise gcc -fstrict-aliasing will not see 'tmp' changed in the loop
*/
- union { struct st_lf_alloc_node * node; void *ptr; } tmp;
+ union { uchar * node; void *ptr; } tmp;
tmp.node= allocator->top;
do
{
- last->next= tmp.node;
+ anext_node(last)= tmp.node;
} while (!my_atomic_casptr((void **)(char *)&allocator->top,
(void **)&tmp.ptr, first) && LF_BACKOFF);
}
@@ -452,6 +450,8 @@ void lf_alloc_init(LF_ALLOCATOR *allocat
allocator->top= 0;
allocator->mallocs= 0;
allocator->element_size= size;
+ allocator->constructor= 0;
+ allocator->destructor= 0;
DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset);
}
@@ -468,10 +468,12 @@ void lf_alloc_init(LF_ALLOCATOR *allocat
*/
void lf_alloc_destroy(LF_ALLOCATOR *allocator)
{
- struct st_lf_alloc_node *node= allocator->top;
+ uchar *node= allocator->top;
while (node)
{
- struct st_lf_alloc_node *tmp= node->next;
+ uchar *tmp= anext_node(node);
+ if (allocator->destructor)
+ allocator->destructor(node);
my_free((void *)node, MYF(0));
node= tmp;
}
@@ -489,7 +491,7 @@ void lf_alloc_destroy(LF_ALLOCATOR *allo
void *_lf_alloc_new(LF_PINS *pins)
{
LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg);
- struct st_lf_alloc_node *node;
+ uchar *node;
for (;;)
{
do
@@ -500,6 +502,8 @@ void *_lf_alloc_new(LF_PINS *pins)
if (!node)
{
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
+ if (allocator->constructor)
+ allocator->constructor(node);
#ifdef MY_LF_EXTRA_DEBUG
if (likely(node != 0))
my_atomic_add32(&allocator->mallocs, 1);
@@ -507,7 +511,7 @@ void *_lf_alloc_new(LF_PINS *pins)
break;
}
if (my_atomic_casptr((void **)(char *)&allocator->top,
- (void *)&node, node->next))
+ (void *)&node, anext_node(node)))
break;
}
_lf_unpin(pins, 0);
@@ -523,8 +527,8 @@ void *_lf_alloc_new(LF_PINS *pins)
uint lf_alloc_pool_count(LF_ALLOCATOR *allocator)
{
uint i;
- struct st_lf_alloc_node *node;
- for (node= allocator->top, i= 0; node; node= node->next, i++)
+ uchar *node;
+ for (node= allocator->top, i= 0; node; node= anext_node(node), i++)
/* no op */;
return i;
}
=== modified file 'mysys/lf_hash.c'
--- a/mysys/lf_hash.c 2008-02-13 17:25:56 +0000
+++ b/mysys/lf_hash.c 2008-07-29 14:10:24 +0000
@@ -299,11 +299,22 @@ static int initialize_bucket(LF_HASH *,
/*
Initializes lf_hash, the arguments are compatible with hash_init
+
+ @@note element_size sets both the size of allocated memory block for
+ lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
+ they are the same, indeed. But LF_HASH::element_size can be decreased
+ after lf_hash_init, and then lf_alloc will allocate larger block that
+ lf_hash_insert will copy over. It is desireable if part of the element
+ is expensive to initialize - for example if there is a mutex or
+ DYNAMIC_ARRAY. In this case they should be initialize in the
+ LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
+ See wt_init() for example.
*/
void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
uint key_offset, uint key_length, hash_get_key get_key,
CHARSET_INFO *charset)
{
+ compile_time_assert(sizeof(LF_SLIST) == LF_HASH_OVERHEAD);
lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
offsetof(LF_SLIST, key));
lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
@@ -453,7 +464,7 @@ void *lf_hash_search(LF_HASH *hash, LF_P
return found ? found+1 : 0;
}
-static const uchar *dummy_key= "";
+static const uchar *dummy_key= (uchar*)"";
/*
RETURN
@@ -473,7 +484,7 @@ static int initialize_bucket(LF_HASH *ha
unlikely(initialize_bucket(hash, el, parent, pins)))
return -1;
dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
- dummy->key= (char*) dummy_key;
+ dummy->key= dummy_key;
dummy->keylen= 0;
if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
{
=== modified file 'mysys/my_static.c'
--- a/mysys/my_static.c 2007-11-16 16:09:51 +0000
+++ b/mysys/my_static.c 2008-08-29 19:50:04 +0000
@@ -92,6 +92,18 @@ int (*error_handler_hook)(uint error,con
int (*fatal_error_handler_hook)(uint error,const char *str,myf MyFlags)=
my_message_no_curses;
+static const char *proc_info_dummy(void *a __attribute__((unused)),
+ const char *b __attribute__((unused)),
+ const char *c __attribute__((unused)),
+ const char *d __attribute__((unused)),
+ const unsigned int e __attribute__((unused)))
+{
+ return 0;
+}
+
+const char *(*proc_info_hook)(void *, const char *, const char *, const char *,
+ const unsigned int)= proc_info_dummy;
+
#ifdef __WIN__
/* from my_getsystime.c */
ulonglong query_performance_frequency, query_performance_offset;
=== modified file 'mysys/my_thr_init.c'
--- a/mysys/my_thr_init.c 2008-04-28 16:24:05 +0000
+++ b/mysys/my_thr_init.c 2008-07-29 14:10:24 +0000
@@ -256,7 +256,7 @@ my_bool my_thread_init(void)
#ifdef EXTRA_DEBUG_THREADS
fprintf(stderr,"my_thread_init(): thread_id: 0x%lx\n",
(ulong) pthread_self());
-#endif
+#endif
#if !defined(__WIN__) || defined(USE_TLS)
if (my_pthread_getspecific(struct st_my_thread_var *,THR_KEY_mysys))
@@ -264,7 +264,7 @@ my_bool my_thread_init(void)
#ifdef EXTRA_DEBUG_THREADS
fprintf(stderr,"my_thread_init() called more than once in thread 0x%lx\n",
(long) pthread_self());
-#endif
+#endif
goto end;
}
if (!(tmp= (struct st_my_thread_var *) calloc(1, sizeof(*tmp))))
@@ -290,6 +290,8 @@ my_bool my_thread_init(void)
pthread_mutex_init(&tmp->mutex,MY_MUTEX_INIT_FAST);
pthread_cond_init(&tmp->suspend, NULL);
+ tmp->stack_ends_here= &tmp + STACK_DIRECTION * my_thread_stack_size;
+
pthread_mutex_lock(&THR_LOCK_threads);
tmp->id= ++thread_id;
++THR_thread_count;
@@ -325,7 +327,7 @@ void my_thread_end(void)
#ifdef EXTRA_DEBUG_THREADS
fprintf(stderr,"my_thread_end(): tmp: 0x%lx pthread_self: 0x%lx thread_id: %ld\n",
(long) tmp, (long) pthread_self(), tmp ? (long) tmp->id : 0L);
-#endif
+#endif
if (tmp && tmp->init)
{
#if !defined(DBUG_OFF)
=== added file 'mysys/waiting_threads.c'
--- a/mysys/waiting_threads.c 1970-01-01 00:00:00 +0000
+++ b/mysys/waiting_threads.c 2008-09-01 19:43:11 +0000
@@ -0,0 +1,908 @@
+/* Copyright (C) 2008 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ "waiting threads" subsystem - a unified interface for threads to wait
+ on each other, with built-in deadlock detection.
+
+ Main concepts
+ ^^^^^^^^^^^^^
+ a thread - is represented by a WT_THD structure. One physical thread
+ can have only one WT_THD descriptor.
+
+ a resource - a thread does not wait for other threads directly,
+ instead it waits for a "resource", which is "owned" by other threads.
+ It waits, exactly, for all "owners" to "release" a resource.
+ It does not have to correspond to a physical resource. For example, it
+ may be convenient in certain cases to force resource == thread.
+ A resource is represented by a WT_RESOURCE structure.
+
+ a resource identifier - a pair of {resource type, value}. A value is
+ an ulonglong number. Represented by a WT_RESOURCE_ID structure.
+
+ a resource type - a pointer to a statically defined instance of
+ WT_RESOURCE_TYPE structure. This structure contains a pointer to
+ a function that knows how to compare values of this resource type.
+ In the simple case it could be wt_resource_id_memcmp().
+
+ Usage
+ ^^^^^
+ to use the interface one needs to use this thread's WT_THD,
+ call wt_thd_will_wait_for() for every thread it needs to wait on,
+ then call wt_thd_cond_timedwait(). When thread releases a resource
+ it should call wt_thd_release() (or wt_thd_release_all()) - it will
+ notify (send a signal) threads waiting in wt_thd_cond_timedwait(),
+ if appropriate.
+
+ Just like with pthread's cond_wait, there could be spurious
+ wake-ups from wt_thd_cond_timedwait(). A caller is expected to
+ handle that.
+
+ wt_thd_will_wait_for() and wt_thd_cond_timedwait() return either
+ WT_OK or WT_DEADLOCK. Additionally wt_thd_cond_timedwait() can return
+ WT_TIMEOUT. Out of memory and other fatal errors are reported as
+ WT_DEADLOCK - and a transaction must be aborted just the same.
+
+ Configuration
+ ^^^^^^^^^^^^^
+ There are four config variables. Two deadlock search depths - short and
+ long - and two timeouts. Deadlock search is performed with the short
+ depth on every wt_thd_will_wait_for() call. wt_thd_cond_timedwait()
+ waits with a short timeout, performs a deadlock search with the long
+ depth, and waits with a long timeout. As most deadlock cycles are supposed
+ to be short, most deadlocks will be detected at once, and waits will
+ rarely be necessary.
+
+ These config variables are thread-local. Different threads may have
+ different search depth and timeout values.
+
+ Also, deadlock detector supports different killing strategies, the victim
+ in a deadlock cycle is selected based on the "weight". See "weight"
+ description in waiting_threads.h for details. It's up to the caller to
+ set weights accordingly.
+
+ Status
+ ^^^^^^
+ We calculate the number of successfull waits (WT_OK returned from
+ wt_thd_cond_timedwait()), a number of timeouts, a deadlock cycle
+ length distribution - number of deadlocks with every length from
+ 1 to WT_CYCLE_STATS, and a wait time distribution - number
+ of waits with a time from 1 us to 1 min in WT_CYCLE_STATS
+ intervals on a log scale.
+*/
+
+/*
+ Note that if your lock system satisfy the following condition:
+
+ there exist four lock levels A, B, C, D, such as
+ A is compatible with B
+ A is not compatible with C
+ D is not compatible with B
+
+ (example A=IX, B=IS, C=S, D=X)
+
+ you need to include lock level in the resource identifier - thread 1
+ waiting for lock A on resource R and thread 2 waiting for lock B
+ on resource R should wait on different WT_RESOURCE structures, on different
+ {lock, resource} pairs. Otherwise the following is possible:
+
+ thread1> take S-lock on R
+ thread2> take IS-lock on R
+ thread3> wants X-lock on R, starts waiting for threads 1 and 2 on R.
+ thread3 is killed (or timeout or whatever)
+ WT_RESOURCE structure for R is still in the hash, as it has two owners
+ thread4> wants an IX-lock on R
+ WT_RESOURCE for R is found in the hash, thread4 starts waiting on it.
+ !! now thread4 is waiting for both thread1 and thread2
+ !! while, in fact, IX-lock and IS-lock are compatible and
+ !! thread4 should not wait for thread2.
+*/
+
+#include <waiting_threads.h>
+#include <m_string.h>
+
+/*
+ status variables:
+ distribution of cycle lengths
+ wait time log distribution
+
+ Note:
+
+ we call deadlock() twice per wait (with different search lengths).
+ it means a deadlock will be counted twice. It's difficult to avoid,
+ as on the second search we could find a *different* deadlock and we
+ *want* to count it too. So we just count all deadlocks - two searches
+ mean two increments on the wt_cycle_stats.
+*/
+
+ulonglong wt_wait_table[WT_WAIT_STATS];
+uint32 wt_wait_stats[WT_WAIT_STATS+1];
+uint32 wt_cycle_stats[2][WT_CYCLE_STATS+1], wt_success_stats;
+
+static my_atomic_rwlock_t cycle_stats_lock, wait_stats_lock, success_stats_lock;
+
+#define increment_success_stats() \
+ do { \
+ my_atomic_rwlock_wrlock(&success_stats_lock); \
+ my_atomic_add32(&wt_success_stats, 1); \
+ my_atomic_rwlock_wrunlock(&success_stats_lock); \
+ } while (0)
+
+#define increment_cycle_stats(X,SLOT) \
+ do { \
+ uint i= (X); \
+ if (i >= WT_CYCLE_STATS) \
+ i= WT_CYCLE_STATS; \
+ my_atomic_rwlock_wrlock(&cycle_stats_lock); \
+ my_atomic_add32(&wt_cycle_stats[SLOT][i], 1); \
+ my_atomic_rwlock_wrunlock(&cycle_stats_lock); \
+ } while (0)
+
+#define increment_wait_stats(X,RET) \
+ do { \
+ uint i; \
+ if ((RET) == ETIMEDOUT) \
+ i= WT_WAIT_STATS; \
+ else \
+ { \
+ ulonglong w=(X)/10; \
+ for (i=0; i < WT_WAIT_STATS && w > wt_wait_table[i]; i++) ; \
+ } \
+ my_atomic_rwlock_wrlock(&wait_stats_lock); \
+ my_atomic_add32(wt_wait_stats+i, 1); \
+ my_atomic_rwlock_wrunlock(&wait_stats_lock); \
+ } while (0)
+
+#define rc_rdlock(X) \
+ do { \
+ WT_RESOURCE *R=(X); \
+ DBUG_PRINT("wt", ("LOCK resid=%lld for READ", R->id.value)); \
+ rw_rdlock(&R->lock); \
+ } while (0)
+#define rc_wrlock(X) \
+ do { \
+ WT_RESOURCE *R=(X); \
+ DBUG_PRINT("wt", ("LOCK resid=%lld for WRITE", R->id.value)); \
+ rw_wrlock(&R->lock); \
+ } while (0)
+#define rc_unlock(X) \
+ do { \
+ WT_RESOURCE *R=(X); \
+ DBUG_PRINT("wt", ("UNLOCK resid=%lld", R->id.value)); \
+ rw_unlock(&R->lock); \
+ } while (0)
+
+/*
+ All resources are stored in a lock-free hash. Different threads
+ may add new resources and perform deadlock detection concurrently.
+*/
+static LF_HASH reshash;
+
+/**
+ WT_RESOURCE constructor
+
+ It's called from lf_hash and takes an offset to LF_SLIST instance.
+ WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
+static void wt_resource_init(uchar *arg)
+{
+ WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
+ DBUG_ENTER("wt_resource_init");
+
+ bzero(rc, sizeof(*rc));
+ my_rwlock_init(&rc->lock, 0);
+ pthread_cond_init(&rc->cond, 0);
+ my_init_dynamic_array(&rc->owners, sizeof(WT_THD *), 0, 5);
+ DBUG_VOID_RETURN;
+}
+
+/**
+ WT_RESOURCE destructor
+
+ It's called from lf_hash and takes an offset to LF_SLIST instance.
+ WT_RESOURCE is located at arg+sizeof(LF_SLIST)
+*/
+static void wt_resource_destroy(uchar *arg)
+{
+ WT_RESOURCE *rc=(WT_RESOURCE*)(arg+LF_HASH_OVERHEAD);
+ DBUG_ENTER("wt_resource_destroy");
+
+ DBUG_ASSERT(rc->owners.elements == 0);
+ rwlock_destroy(&rc->lock);
+ pthread_cond_destroy(&rc->cond);
+ delete_dynamic(&rc->owners);
+ DBUG_VOID_RETURN;
+}
+
+void wt_init()
+{
+ DBUG_ENTER("wt_init");
+
+ lf_hash_init(&reshash, sizeof(WT_RESOURCE), LF_HASH_UNIQUE, 0,
+ sizeof(struct st_wt_resource_id), 0, 0);
+ reshash.alloc.constructor= wt_resource_init;
+ reshash.alloc.destructor= wt_resource_destroy;
+ /*
+ Note a trick: we initialize the hash with the real element size,
+ but fix it later to a shortened element size. This way
+ the allocator will allocate elements correctly, but
+ lf_hash_insert() will only overwrite part of the element with memcpy().
+ lock, condition, and dynamic array will be intact.
+ */
+ reshash.element_size= offsetof(WT_RESOURCE, lock);
+ bzero(wt_wait_stats, sizeof(wt_wait_stats));
+ bzero(wt_cycle_stats, sizeof(wt_cycle_stats));
+ wt_success_stats=0;
+ { /* initialize wt_wait_table[]. from 1 us to 1 min, log scale */
+ int i;
+ double from=log(1); /* 1 us */
+ double to=log(60e6); /* 1 min */
+ for (i=0; i < WT_WAIT_STATS; i++)
+ {
+ wt_wait_table[i]=(ulonglong)exp((to-from)/(WT_WAIT_STATS-1)*i+from);
+ DBUG_ASSERT(i==0 || wt_wait_table[i-1] != wt_wait_table[i]);
+ }
+ }
+ my_atomic_rwlock_init(&cycle_stats_lock);
+ my_atomic_rwlock_init(&success_stats_lock);
+ my_atomic_rwlock_init(&wait_stats_lock);
+ DBUG_VOID_RETURN;
+}
+
+void wt_end()
+{
+ DBUG_ENTER("wt_end");
+
+ DBUG_ASSERT(reshash.count == 0);
+ lf_hash_destroy(&reshash);
+ my_atomic_rwlock_destroy(&cycle_stats_lock);
+ my_atomic_rwlock_destroy(&success_stats_lock);
+ my_atomic_rwlock_destroy(&wait_stats_lock);
+ DBUG_VOID_RETURN;
+}
+
+/**
+ Lazy WT_THD initialization
+
+ Cheap initialization of WT_THD. Only initialized fields that don't require
+ memory allocations - basically, it only does assignments. The rest of the
+ WT_THD structure will be initialized on demand, on the first use.
+ This allows one to initialize lazily all WT_THD structures, even if some
+ (or even most) of them will never be used for deadlock detection.
+
+ @param ds a pointer to deadlock search depth short value
+ @param ts a pointer to deadlock timeout short value
+ @param dl a pointer to deadlock search depth long value
+ @param tl a pointer to deadlock timeout long value
+*/
+void wt_thd_lazy_init(WT_THD *thd, ulong *ds, ulong *ts, ulong *dl, ulong *tl)
+{
+ DBUG_ENTER("wt_thd_lazy_init");
+ thd->waiting_for=0;
+ thd->my_resources.buffer= 0;
+ thd->my_resources.elements= 0;
+ thd->weight=0;
+ thd->deadlock_search_depth_short= ds;
+ thd->timeout_short= ts;
+ thd->deadlock_search_depth_long= dl;
+ thd->timeout_long= tl;
+ /* dynamic array is also initialized lazily - without memory allocations */
+ my_init_dynamic_array(&thd->my_resources, sizeof(WT_RESOURCE *), 0, 5);
+#ifndef DBUG_OFF
+ thd->name=my_thread_name();
+#endif
+ DBUG_VOID_RETURN;
+}
+
+/**
+ Finalize WT_THD initialization
+
+ After lazy WT_THD initialization, parts of the structure are still
+ uninitialized. This function completes the initialization, allocating
+ memory, if necessary. It's called automatically on demand, when WT_THD
+ is about to be used.
+*/
+static int fix_thd_pins(WT_THD *thd)
+{
+ if (unlikely(thd->pins == 0))
+ {
+ thd->pins=lf_hash_get_pins(&reshash);
+#ifndef DBUG_OFF
+ thd->name=my_thread_name();
+#endif
+ }
+ return thd->pins == 0;
+}
+
+void wt_thd_destroy(WT_THD *thd)
+{
+ DBUG_ENTER("wt_thd_destroy");
+
+ DBUG_ASSERT(thd->my_resources.elements == 0);
+
+ if (thd->pins != 0)
+ lf_hash_put_pins(thd->pins);
+
+ delete_dynamic(&thd->my_resources);
+ thd->waiting_for=0;
+ DBUG_VOID_RETURN;
+}
+/**
+ Trivial resource id comparison function - bytewise memcmp.
+
+ It can be used in WT_RESOURCE_TYPE structures where bytewise
+ comparison of values is sufficient.
+*/
+int wt_resource_id_memcmp(void *a, void *b)
+{
+ return memcmp(a, b, sizeof(WT_RESOURCE_ID));
+}
+
+/**
+ arguments for the recursive deadlock_search function
+*/
+struct deadlock_arg {
+ WT_THD *thd; /**< starting point of a search */
+ uint max_depth; /**< search depth limit */
+ WT_THD *victim; /**< a thread to be killed to resolve a deadlock */
+ WT_RESOURCE *rc; /**< see comment at the end of deadlock_search() */
+};
+
+/**
+ helper function to change the victim, according to the weight
+*/
+static void change_victim(WT_THD* found, struct deadlock_arg *arg)
+{
+ if (found->weight < arg->victim->weight)
+ {
+ if (arg->victim != arg->thd)
+ {
+ rc_unlock(arg->victim->waiting_for); /* release the previous victim */
+ DBUG_ASSERT(arg->rc == found->waiting_for);
+ }
+ arg->victim= found;
+ arg->rc= 0;
+ }
+}
+
+/**
+ recursive loop detection in a wait-for graph with a limited search depth
+*/
+static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
+ uint depth)
+{
+ WT_RESOURCE *rc, *volatile *shared_ptr= &blocker->waiting_for;
+ WT_THD *cursor;
+ uint i;
+ int ret= WT_OK;
+ DBUG_ENTER("deadlock_search");
+ DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, depth=%u",
+ arg->thd->name, blocker->name, depth));
+
+ LF_REQUIRE_PINS(1);
+
+ arg->rc= 0;
+
+ if (depth > arg->max_depth)
+ {
+ DBUG_PRINT("wt", ("exit: WT_DEPTH_EXCEEDED (early)"));
+ DBUG_RETURN(WT_DEPTH_EXCEEDED);
+ }
+
+retry:
+ /* safe dereference as explained in lf_alloc-pin.c */
+ do
+ {
+ rc= *shared_ptr;
+ lf_pin(arg->thd->pins, 0, rc);
+ } while (rc != *shared_ptr && LF_BACKOFF);
+
+ if (rc == 0)
+ {
+ DBUG_PRINT("wt", ("exit: OK (early)"));
+ DBUG_RETURN(0);
+ }
+
+ rc_rdlock(rc);
+ if (rc->state != ACTIVE || *shared_ptr != rc)
+ {
+ rc_unlock(rc);
+ lf_unpin(arg->thd->pins, 0);
+ goto retry;
+ }
+ /* as the state is locked, we can unpin now */
+ lf_unpin(arg->thd->pins, 0);
+
+ /*
+ Below is not a pure depth-first search. It's a depth-first with a
+ slightest hint of breadth-first. Depth-first is:
+
+ check(element):
+ foreach current in element->nodes[] do:
+ if current == element return error;
+ check(current);
+
+ while we do
+
+ check(element):
+ foreach current in element->nodes[] do:
+ if current == element return error;
+ foreach current in element->nodes[] do:
+ check(current);
+ */
+ for (i=0; i < rc->owners.elements; i++)
+ {
+ cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+ /*
+ We're only looking for (and detecting) cycles that include 'arg->thd'.
+ That is, only deadlocks that *we* have created. For example,
+ thd->A->B->thd
+ (thd waits for A, A waits for B, while B is waiting for thd).
+ While walking the graph we can encounter other cicles, e.g.
+ thd->A->B->C->A
+ This will not be detected. Instead we will walk it in circles until
+ the search depth limit is reached (the latter guarantees that an
+ infinite loop is impossible). We expect the thread that has created
+ the cycle (one of A, B, and C) to detect its deadlock.
+ */
+ if (cursor == arg->thd)
+ {
+ ret= WT_DEADLOCK;
+ increment_cycle_stats(depth, arg->max_depth ==
+ *arg->thd->deadlock_search_depth_long);
+ arg->victim= cursor;
+ goto end;
+ }
+ }
+ for (i=0; i < rc->owners.elements; i++)
+ {
+ cursor= *dynamic_element(&rc->owners, i, WT_THD**);
+ switch (deadlock_search(arg, cursor, depth+1)) {
+ case WT_OK:
+ break;
+ case WT_DEPTH_EXCEEDED:
+ ret= WT_DEPTH_EXCEEDED;
+ break;
+ case WT_DEADLOCK:
+ ret= WT_DEADLOCK;
+ change_victim(cursor, arg); /* also sets arg->rc to 0 */
+ i= rc->owners.elements; /* jump out of the loop */
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+ if (arg->rc)
+ rc_unlock(arg->rc);
+ }
+end:
+ /*
+ Note that 'rc' is locked in this function, but it's never unlocked there.
+ Instead it's saved in arg->rc and the *caller* is expected to unlock it.
+ It's done to support different killing strategies. This is how it works:
+ Assuming a graph
+
+ thd->A->B->C->thd
+
+ deadlock_search() function starts from thd, locks it (in fact it locks not
+ a thd, but a resource it is waiting on, but below, for simplicity, I'll
+ talk about "locking a thd"). Then it goes down recursively, locks A, and so
+ on. Goes down recursively, locks B. Goes down recursively, locks C.
+ Notices that C is waiting on thd. Deadlock detected. Sets arg->victim=thd.
+ Returns from the last deadlock_search() call. C stays locked!
+ Now it checks whether C is a more appropriate victim then 'thd'.
+ If yes - arg->victim=C, otherwise C is unlocked. Returns. B stays locked.
+ Now it checks whether B is a more appropriate victim then arg->victim.
+ If yes - old arg->victim is unlocked and arg->victim=B,
+ otherwise B is unlocked. Return.
+ And so on.
+
+ In short, a resource is locked in a frame. But it's not unlocked in the
+ same frame, it's unlocked by the caller, and only after the caller checks
+ that it doesn't need to use current WT_THD as a victim. If it does - the
+ lock is kept and the old victim's resource is unlocked. When the recursion
+ is unrolled and we are back to deadlock() function, there are only two
+ locks left - on thd and on the victim.
+ */
+ arg->rc= rc;
+ DBUG_PRINT("wt", ("exit: %s",
+ ret == WT_DEPTH_EXCEEDED ? "WT_DEPTH_EXCEEDED" :
+ ret ? "WT_DEADLOCK" : "OK"));
+ DBUG_RETURN(ret);
+}
+
+/**
+ Deadlock detection in a wait-for graph
+
+ A wrapper for recursive deadlock_search() - prepares deadlock_arg structure,
+ invokes deadlock_search(), increments statistics, notifies the victim.
+
+ @param thd thread that is going to wait. Deadlock is detected
+ if, while walking the graph, we reach a thread that
+ is waiting on thd
+ @param blocker starting point of a search. In wt_thd_cond_timedwait()
+ it's thd, in wt_thd_will_wait_for() it's a thread that
+ thd is going to wait for
+ @param depth starting search depth. In general it's the number of
+ edges in the wait-for graph between thd and the
+ blocker. Practically only two values are used (and
+ supported) - when thd == blocker it's 0, when thd
+ waits directly for blocker, it's 1
+ @param max_depth search depth limit
+*/
+static int deadlock(WT_THD *thd, WT_THD *blocker, uint depth,
+ uint max_depth)
+{
+ struct deadlock_arg arg= {thd, max_depth, 0, 0};
+ int ret;
+ DBUG_ENTER("deadlock");
+ ret= deadlock_search(&arg, blocker, depth);
+ if (ret == WT_DEPTH_EXCEEDED)
+ {
+ increment_cycle_stats(WT_CYCLE_STATS, max_depth ==
+ *thd->deadlock_search_depth_long);
+ ret= WT_OK;
+ }
+ /*
+ if we started with depth==1, blocker was never considered for a victim
+ in deadlock_search(). Do it here.
+ */
+ if (ret == WT_DEADLOCK && depth)
+ change_victim(blocker, &arg);
+ if (arg.rc)
+ rc_unlock(arg.rc);
+ /* notify the victim, if appropriate */
+ if (ret == WT_DEADLOCK && arg.victim != thd)
+ {
+ DBUG_PRINT("wt", ("killing %s", arg.victim->name));
+ arg.victim->killed=1;
+ pthread_cond_broadcast(&arg.victim->waiting_for->cond);
+ rc_unlock(arg.victim->waiting_for);
+ ret= WT_OK;
+ }
+ DBUG_RETURN(ret);
+}
+
+
+/**
+ Delete an element from reshash if it has no waiters or owners
+
+ rc->lock must be locked by the caller and it's unlocked on return.
+*/
+static int unlock_lock_and_free_resource(WT_THD *thd, WT_RESOURCE *rc)
+{
+ uint keylen;
+ const void *key;
+ DBUG_ENTER("unlock_lock_and_free_resource");
+
+ DBUG_ASSERT(rc->state == ACTIVE);
+
+ if (rc->owners.elements || rc->waiter_count)
+ {
+ DBUG_PRINT("wt", ("nothing to do, %d owners, %d waiters",
+ rc->owners.elements, rc->waiter_count));
+ rc_unlock(rc);
+ DBUG_RETURN(0);
+ }
+
+ if (fix_thd_pins(thd))
+ {
+ rc_unlock(rc);
+ DBUG_RETURN(1);
+ }
+
+ /* XXX if (rc->id.type->make_key) key= rc->id.type->make_key(&rc->id, &keylen); else */
+ {
+ key= &rc->id;
+ keylen= sizeof(rc->id);
+ }
+
+ /*
+ To free the element correctly we need to:
+ 1. take its lock (already done).
+ 2. set the state to FREE
+ 3. release the lock
+ 4. remove from the hash
+
+ I *think* it's safe to release the lock while the element is still
+ in the hash. If not, the corrected procedure should be
+ 3. pin; 4; remove; 5; release; 6; unpin and it'll need pin[3].
+ */
+ rc->state=FREE;
+ rc_unlock(rc);
+ DBUG_RETURN(lf_hash_delete(&reshash, thd->pins, key, keylen) == -1);
+}
+
+
+/**
+ register the fact that thd is not waiting anymore
+
+ decrease waiter_count, clear waiting_for, free the resource if appropriate.
+ thd->waiting_for must be locked!
+*/
+static int stop_waiting_locked(WT_THD *thd)
+{
+ int ret;
+ WT_RESOURCE *rc= thd->waiting_for;
+ DBUG_ENTER("stop_waiting_locked");
+
+ DBUG_ASSERT(rc->waiter_count);
+ DBUG_ASSERT(rc->state == ACTIVE);
+ rc->waiter_count--;
+ thd->waiting_for= 0;
+ ret= unlock_lock_and_free_resource(thd, rc);
+ DBUG_RETURN((thd->killed || ret) ? WT_DEADLOCK : WT_OK);
+}
+
+/**
+ register the fact that thd is not waiting anymore
+
+ locks thd->waiting_for and calls stop_waiting_locked().
+*/
+static int stop_waiting(WT_THD *thd)
+{
+ int ret;
+ WT_RESOURCE *rc= thd->waiting_for;
+ DBUG_ENTER("stop_waiting");
+
+ if (!rc)
+ DBUG_RETURN(WT_OK);
+ /*
+ nobody's trying to free the resource now,
+ as its waiter_count is guaranteed to be non-zero
+ */
+ rc_wrlock(rc);
+ ret= stop_waiting_locked(thd);
+ DBUG_RETURN(ret);
+}
+
+/**
+ notify the system that a thread needs to wait for another thread
+
+ called by a *waiter* to declare what resource it will wait for.
+ can be called many times, if many blockers own a blocking resource.
+ but must always be called with the same resource id - a thread cannot
+ wait for more than one resource at a time.
+
+ As a new edge is added to the wait-for graph, a deadlock detection is
+ performed for this new edge.
+*/
+int wt_thd_will_wait_for(WT_THD *thd, WT_THD *blocker, WT_RESOURCE_ID *resid)
+{
+ uint i;
+ WT_RESOURCE *rc;
+ DBUG_ENTER("wt_thd_will_wait_for");
+
+ LF_REQUIRE_PINS(3);
+
+ DBUG_PRINT("wt", ("enter: thd=%s, blocker=%s, resid=%llu",
+ thd->name, blocker->name, resid->value));
+
+ if (fix_thd_pins(thd))
+ DBUG_RETURN(WT_DEADLOCK);
+
+ if (thd->waiting_for == 0)
+ {
+ uint keylen;
+ const void *key;
+ /* XXX if (restype->make_key) key= restype->make_key(resid, &keylen); else */
+ {
+ key= resid;
+ keylen= sizeof(*resid);
+ }
+
+ DBUG_PRINT("wt", ("first blocker"));
+
+retry:
+ while ((rc= lf_hash_search(&reshash, thd->pins, key, keylen)) == 0)
+ {
+ WT_RESOURCE tmp;
+
+ DBUG_PRINT("wt", ("failed to find rc in hash, inserting"));
+ bzero(&tmp, sizeof(tmp));
+ tmp.id= *resid;
+ tmp.state= ACTIVE;
+
+ if (lf_hash_insert(&reshash, thd->pins, &tmp) == -1) /* if OOM */
+ DBUG_RETURN(WT_DEADLOCK);
+ /*
+ Two cases: either lf_hash_insert() failed - because another thread
+ has just inserted a resource with the same id - and we need to retry.
+ Or lf_hash_insert() succeeded, and then we need to repeat
+ lf_hash_search() to find a real address of the newly inserted element.
+ That is, we don't care what lf_hash_insert() has returned.
+ And we need to repeat the loop anyway.
+ */
+ }
+ if (rc == MY_ERRPTR)
+ DBUG_RETURN(WT_DEADLOCK);
+
+ DBUG_PRINT("wt", ("found in hash rc=%p", rc));
+
+ rc_wrlock(rc);
+ if (rc->state != ACTIVE)
+ {
+ DBUG_PRINT("wt", ("but it's not active, retrying"));
+ /* Somebody has freed the element while we weren't looking */
+ rc_unlock(rc);
+ lf_hash_search_unpin(thd->pins);
+ goto retry;
+ }
+
+ lf_hash_search_unpin(thd->pins); /* the element cannot go away anymore */
+ thd->waiting_for= rc;
+ rc->waiter_count++;
+ thd->killed= 0;
+ }
+ else
+ {
+ DBUG_ASSERT(thd->waiting_for->id.type == resid->type);
+ DBUG_ASSERT(resid->type->compare(&thd->waiting_for->id, resid) == 0);
+ DBUG_PRINT("wt", ("adding another blocker"));
+
+ /*
+ we can safely access the resource here, it's in the hash as it has
+ at least one owner, and non-zero waiter_count
+ */
+ rc= thd->waiting_for;
+ rc_wrlock(rc);
+ DBUG_ASSERT(rc->waiter_count);
+ DBUG_ASSERT(rc->state == ACTIVE);
+
+ if (thd->killed)
+ {
+ stop_waiting_locked(thd);
+ DBUG_RETURN(WT_DEADLOCK);
+ }
+ }
+ for (i=0; i < rc->owners.elements; i++)
+ if (*dynamic_element(&rc->owners, i, WT_THD**) == blocker)
+ break;
+ if (i >= rc->owners.elements)
+ {
+ if (push_dynamic(&blocker->my_resources, (void*)&rc))
+ {
+ stop_waiting_locked(thd);
+ DBUG_RETURN(WT_DEADLOCK); /* deadlock and OOM use the same error code */
+ }
+ if (push_dynamic(&rc->owners, (void*)&blocker))
+ {
+ pop_dynamic(&blocker->my_resources);
+ stop_waiting_locked(thd);
+ DBUG_RETURN(WT_DEADLOCK);
+ }
+ }
+ rc_unlock(rc);
+
+ if (deadlock(thd, blocker, 1, *thd->deadlock_search_depth_short))
+ {
+ stop_waiting(thd);
+ DBUG_RETURN(WT_DEADLOCK);
+ }
+ DBUG_RETURN(0);
+}
+
+/**
+ called by a *waiter* to start waiting
+
+ It's supposed to be a drop-in replacement for
+ pthread_cond_timedwait(), and it takes mutex as an argument.
+*/
+int wt_thd_cond_timedwait(WT_THD *thd, pthread_mutex_t *mutex)
+{
+ int ret= WT_TIMEOUT;
+ struct timespec timeout;
+ ulonglong before, after, starttime;
+ WT_RESOURCE *rc= thd->waiting_for;
+ DBUG_ENTER("wt_thd_cond_timedwait");
+ DBUG_PRINT("wt", ("enter: thd=%s, rc=%p", thd->name, rc));
+
+#ifndef DBUG_OFF
+ if (rc->mutex)
+ DBUG_ASSERT(rc->mutex == mutex);
+ else
+ rc->mutex= mutex;
+ safe_mutex_assert_owner(mutex);
+#endif
+
+ before= starttime= my_getsystime();
+
+#ifdef __WIN__
+ /*
+ only for the sake of Windows we distinguish between
+ 'before' and 'starttime'
+ */
+ GetSystemTimeAsFileTime((PFILETIME)&starttime);
+#endif
+
+ rc_wrlock(rc);
+ if (rc->owners.elements == 0 || thd->killed)
+ ret= WT_OK;
+ rc_unlock(rc);
+
+ set_timespec_time_nsec(timeout, starttime, (*thd->timeout_short)*ULL(1000));
+ if (ret == WT_TIMEOUT)
+ ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
+ if (ret == WT_TIMEOUT)
+ {
+ if (deadlock(thd, thd, 0, *thd->deadlock_search_depth_long))
+ ret= WT_DEADLOCK;
+ else if (*thd->timeout_long > *thd->timeout_short)
+ {
+ set_timespec_time_nsec(timeout, starttime, (*thd->timeout_long)*ULL(1000));
+ if (!thd->killed)
+ ret= pthread_cond_timedwait(&rc->cond, mutex, &timeout);
+ }
+ }
+ after= my_getsystime();
+ if (stop_waiting(thd) == WT_DEADLOCK) /* if we're killed */
+ ret= WT_DEADLOCK;
+ increment_wait_stats(after-before, ret);
+ if (ret == WT_OK)
+ increment_success_stats();
+ DBUG_RETURN(ret);
+}
+
+/**
+ called by a *blocker* when it releases a resource
+
+ it's conceptually similar to pthread_cond_broadcast, and must be done
+ under the same mutex as wt_thd_cond_timedwait().
+
+ @param resid a resource to release. 0 to release all resources
+*/
+
+void wt_thd_release(WT_THD *thd, WT_RESOURCE_ID *resid)
+{
+ uint i;
+ DBUG_ENTER("wt_thd_release");
+
+ for (i=0; i < thd->my_resources.elements; i++)
+ {
+ uint j;
+ WT_RESOURCE *rc= *dynamic_element(&thd->my_resources, i, WT_RESOURCE**);
+ if (!resid || (resid->type->compare(&rc->id, resid) == 0))
+ {
+ rc_wrlock(rc);
+ /*
+ nobody's trying to free the resource now,
+ as its owners[] array is not empty (at least thd must be there)
+ */
+ DBUG_ASSERT(rc->state == ACTIVE);
+ for (j=0; j < rc->owners.elements; j++)
+ if (*dynamic_element(&rc->owners, j, WT_THD**) == thd)
+ break;
+ DBUG_ASSERT(j < rc->owners.elements);
+ delete_dynamic_element(&rc->owners, j);
+ if (rc->owners.elements == 0)
+ {
+ pthread_cond_broadcast(&rc->cond);
+#ifndef DBUG_OFF
+ if (rc->mutex)
+ safe_mutex_assert_owner(rc->mutex);
+#endif
+ }
+ unlock_lock_and_free_resource(thd, rc);
+ if (resid)
+ {
+ delete_dynamic_element(&thd->my_resources, i);
+ DBUG_VOID_RETURN;
+ }
+ }
+ }
+ if (!resid)
+ reset_dynamic(&thd->my_resources);
+ DBUG_VOID_RETURN;
+}
+
=== modified file 'server-tools/instance-manager/thread_registry.cc'
--- a/server-tools/instance-manager/thread_registry.cc 2007-08-13 13:11:25 +0000
+++ b/server-tools/instance-manager/thread_registry.cc 2008-08-29 04:41:19 +0000
@@ -21,6 +21,7 @@
#include <thr_alarm.h>
#include <signal.h>
#include "log.h"
+#include <my_sys.h>
#ifndef __WIN__
/* Kick-off signal handler */
=== modified file 'sql-common/client.c'
--- a/sql-common/client.c 2008-03-29 08:02:54 +0000
+++ b/sql-common/client.c 2008-08-08 11:11:27 +0000
@@ -996,7 +996,7 @@ static int add_init_command(struct st_my
{
options->init_commands= (DYNAMIC_ARRAY*)my_malloc(sizeof(DYNAMIC_ARRAY),
MYF(MY_WME));
- init_dynamic_array(options->init_commands,sizeof(char*),0,5 CALLER_INFO);
+ init_dynamic_array(options->init_commands,sizeof(char*),5,5 CALLER_INFO);
}
if (!(tmp= my_strdup(cmd,MYF(MY_WME))) ||
=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc 2008-06-03 19:35:39 +0000
+++ b/sql/mysqld.cc 2008-08-28 12:43:44 +0000
@@ -26,6 +26,7 @@
#include "mysqld_suffix.h"
#include "mysys_err.h"
#include "events.h"
+#include <waiting_threads.h>
#include "../storage/myisam/ha_myisam.h"
@@ -1240,6 +1241,7 @@ void clean_up(bool print_message)
if (tc_log)
tc_log->close();
xid_cache_free();
+ wt_end();
delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
multi_keycache_free();
free_status_vars();
@@ -3665,6 +3667,8 @@ static int init_server_components()
if (table_cache_init() | table_def_init() | hostname_cache_init())
unireg_abort(1);
+ wt_init();
+
query_cache_result_size_limit(query_cache_limit);
query_cache_set_min_res_unit(query_cache_min_res_unit);
query_cache_init();
@@ -3705,6 +3709,9 @@ static int init_server_components()
/* set up the hook before initializing plugins which may use it */
error_handler_hook= my_message_sql;
+ proc_info_hook= (const char *(*)(void *, const char *, const char *,
+ const char *, const unsigned int))
+ set_thd_proc_info;
if (xid_cache_init())
{
@@ -5566,7 +5573,11 @@ enum options_mysqld
OPT_MIN_EXAMINED_ROW_LIMIT,
OPT_LOG_SLOW_SLAVE_STATEMENTS,
OPT_DEBUG_CRC, OPT_DEBUG_ON, OPT_OLD_MODE,
- OPT_SLAVE_EXEC_MODE
+ OPT_SLAVE_EXEC_MODE,
+ OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
+ OPT_DEADLOCK_SEARCH_DEPTH_LONG,
+ OPT_DEADLOCK_TIMEOUT_SHORT,
+ OPT_DEADLOCK_TIMEOUT_LONG
};
@@ -5689,6 +5700,26 @@ struct my_option my_long_options[] =
NO_ARG, 0, 0, 0, 0, 0, 0},
{"datadir", 'h', "Path to the database root.", (uchar**) &mysql_data_home,
(uchar**) &mysql_data_home, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"deadlock-search-depth-short", OPT_DEADLOCK_SEARCH_DEPTH_SHORT,
+ "Short search depth for the two-step deadlock detection",
+ (uchar**) &global_system_variables.wt_deadlock_search_depth_short,
+ (uchar**) &max_system_variables.wt_deadlock_search_depth_short,
+ 0, GET_ULONG, REQUIRED_ARG, 4, 0, 32, 0, 0, 0},
+ {"deadlock-search-depth-long", OPT_DEADLOCK_SEARCH_DEPTH_LONG,
+ "Long search depth for the two-step deadlock detection",
+ (uchar**) &global_system_variables.wt_deadlock_search_depth_long,
+ (uchar**) &max_system_variables.wt_deadlock_search_depth_long,
+ 0, GET_ULONG, REQUIRED_ARG, 15, 0, 33, 0, 0, 0},
+ {"deadlock-timeout-short", OPT_DEADLOCK_TIMEOUT_SHORT,
+ "Short timeout for the two-step deadlock detection (in microseconds)",
+ (uchar**) &global_system_variables.wt_timeout_short,
+ (uchar**) &max_system_variables.wt_timeout_short,
+ 0, GET_ULONG, REQUIRED_ARG, 100, 0, ULONG_MAX, 0, 0, 0},
+ {"deadlock-timeout-long", OPT_DEADLOCK_TIMEOUT_LONG,
+ "Long timeout for the two-step deadlock detection (in microseconds)",
+ (uchar**) &global_system_variables.wt_timeout_long,
+ (uchar**) &max_system_variables.wt_timeout_long,
+ 0, GET_ULONG, REQUIRED_ARG, 10000, 0, ULONG_MAX, 0, 0, 0},
#ifndef DBUG_OFF
{"debug", '#', "Debug log.", (uchar**) &default_dbug_option,
(uchar**) &default_dbug_option, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
=== modified file 'sql/set_var.cc'
--- a/sql/set_var.cc 2008-04-28 16:24:05 +0000
+++ b/sql/set_var.cc 2008-08-08 11:11:27 +0000
@@ -59,7 +59,7 @@
#include <thr_alarm.h>
#include <myisam.h>
#include <my_dir.h>
-
+#include <waiting_threads.h>
#include "events.h"
/* WITH_NDBCLUSTER_STORAGE_ENGINE */
@@ -227,6 +227,19 @@ static sys_var_long_ptr sys_concurrent_i
static sys_var_long_ptr sys_connect_timeout(&vars, "connect_timeout",
&connect_timeout);
static sys_var_const_str sys_datadir(&vars, "datadir", mysql_real_data_home);
+
+static sys_var_thd_ulong sys_deadlock_search_depth_short(&vars,
+ "deadlock_search_depth_short",
+ &SV::wt_deadlock_search_depth_short);
+static sys_var_thd_ulong sys_deadlock_search_depth_long(&vars,
+ "deadlock_search_depth_long",
+ &SV::wt_deadlock_search_depth_long);
+static sys_var_thd_ulong sys_deadlock_timeout_short(&vars,
+ "deadlock_timeout_short",
+ &SV::wt_timeout_short);
+static sys_var_thd_ulong sys_deadlock_timeout_long(&vars,
+ "deadlock_timeout_long",
+ &SV::wt_timeout_long);
#ifndef DBUG_OFF
static sys_var_thd_dbug sys_dbug(&vars, "debug");
#endif
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2008-07-09 13:09:30 +0000
+++ b/sql/sql_class.cc 2008-08-28 12:43:44 +0000
@@ -248,13 +248,16 @@ int thd_tablespace_op(const THD *thd)
extern "C"
-const char *set_thd_proc_info(THD *thd, const char *info,
- const char *calling_function,
- const char *calling_file,
+const char *set_thd_proc_info(THD *thd, const char *info,
+ const char *calling_function,
+ const char *calling_file,
const unsigned int calling_line)
{
+ if (!thd)
+ thd= current_thd;
+
const char *old_info= thd->proc_info;
- DBUG_PRINT("proc_info", ("%s:%d %s", calling_file, calling_line,
+ DBUG_PRINT("proc_info", ("%s:%d %s", calling_file, calling_line,
(info != NULL) ? info : "(null)"));
#if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
thd->profiling.status_change(info, calling_function, calling_file, calling_line);
@@ -583,6 +586,10 @@ THD::THD()
peer_port= 0; // For SHOW PROCESSLIST
transaction.m_pending_rows_event= 0;
transaction.on= 1;
+ wt_thd_lazy_init(&transaction.wt, &variables.wt_deadlock_search_depth_short,
+ &variables.wt_timeout_short,
+ &variables.wt_deadlock_search_depth_long,
+ &variables.wt_timeout_long);
#ifdef SIGNAL_WITH_VIO_CLOSE
active_vio = 0;
#endif
@@ -829,6 +836,7 @@ void THD::cleanup(void)
lock=locked_tables; locked_tables=0;
close_thread_tables(this);
}
+ wt_thd_destroy(&transaction.wt);
mysql_ha_cleanup(this);
delete_dynamic(&user_var_events);
hash_free(&user_vars);
@@ -836,7 +844,7 @@ void THD::cleanup(void)
my_free((char*) variables.time_format, MYF(MY_ALLOW_ZERO_PTR));
my_free((char*) variables.date_format, MYF(MY_ALLOW_ZERO_PTR));
my_free((char*) variables.datetime_format, MYF(MY_ALLOW_ZERO_PTR));
-
+
sp_cache_clear(&sp_proc_cache);
sp_cache_clear(&sp_func_cache);
@@ -874,12 +882,12 @@ THD::~THD()
#endif
stmt_map.reset(); /* close all prepared statements */
DBUG_ASSERT(lock_info.n_cursors == 0);
- if (!cleanup_done)
- cleanup();
-
ha_close_connection(this);
plugin_thdvar_cleanup(this);
+ if (!cleanup_done)
+ cleanup();
+
DBUG_PRINT("info", ("freeing security context"));
main_security_ctx.destroy();
safeFree(db);
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2008-04-28 16:24:05 +0000
+++ b/sql/sql_class.h 2008-08-28 12:43:44 +0000
@@ -22,6 +22,7 @@
#include "log.h"
#include "rpl_tblmap.h"
+#include <waiting_threads.h>
class Relay_log_info;
@@ -352,6 +353,9 @@ struct system_variables
DATE_TIME_FORMAT *time_format;
my_bool sysdate_is_now;
+ /* deadlock detection */
+ ulong wt_timeout_short, wt_deadlock_search_depth_short;
+ ulong wt_timeout_long, wt_deadlock_search_depth_long;
};
@@ -1327,6 +1331,7 @@ public:
THD_TRANS stmt; // Trans for current statement
bool on; // see ha_enable_transaction()
XID_STATE xid_state;
+ WT_THD wt;
Rows_log_event *m_pending_rows_event;
/*
=== modified file 'sql/sql_profile.cc'
--- a/sql/sql_profile.cc 2008-02-19 20:47:15 +0000
+++ b/sql/sql_profile.cc 2008-08-27 12:15:06 +0000
@@ -38,9 +38,6 @@
#define MAX_QUERY_LENGTH 300
-/* Reserved for systems that can't record the function name in source. */
-const char * const _unknown_func_ = "<unknown>";
-
/**
Connects Information_Schema and Profiling.
*/
=== modified file 'sql/sql_profile.h'
--- a/sql/sql_profile.h 2007-12-14 13:57:37 +0000
+++ b/sql/sql_profile.h 2008-08-27 12:15:06 +0000
@@ -16,27 +16,6 @@
#ifndef _SQL_PROFILE_H
#define _SQL_PROFILE_H
-#if __STDC_VERSION__ < 199901L
-# if __GNUC__ >= 2
-# define __func__ __FUNCTION__
-# else
-# define __func__ _unknown_func_
-extern const char * const _unknown_func_;
-# endif
-#elif defined(_MSC_VER)
-# if _MSC_VER < 1300
-# define __func__ _unknown_func_
-extern const char * const _unknown_func_;
-# else
-# define __func__ __FUNCTION__
-# endif
-#elif defined(__BORLANDC__)
-# define __func__ __FUNC__
-#else
-# define __func__ _unknown_func_
-extern const char * const _unknown_func_;
-#endif
-
extern ST_FIELD_INFO query_profile_statistics_info[];
int fill_query_profile_statistics_info(THD *thd, TABLE_LIST *tables, Item *cond);
int make_profile_table_for_show(THD *thd, ST_SCHEMA_TABLE *schema_table);
=== modified file 'storage/maria/ha_maria.cc'
--- a/storage/maria/ha_maria.cc 2008-08-25 12:13:31 +0000
+++ b/storage/maria/ha_maria.cc 2008-08-28 18:52:23 +0000
@@ -2289,10 +2289,7 @@ int ha_maria::external_lock(THD *thd, in
/* Start of new statement */
if (!trn) /* no transaction yet - open it now */
{
- trn= trnman_new_trn(& thd->mysys_var->mutex,
- & thd->mysys_var->suspend,
- thd->thread_stack + STACK_DIRECTION *
- (my_thread_stack_size - STACK_MIN_SIZE));
+ trn= trnman_new_trn(& thd->transaction.wt);
if (unlikely(!trn))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
THD_TRN= trn;
@@ -2371,9 +2368,8 @@ int ha_maria::external_lock(THD *thd, in
This is a bit excessive, ACID requires this only if there are some
changes to commit (rollback shouldn't be tested).
*/
-#ifdef WAITING_FOR_PATCH_FROM_SANJA
- DBUG_ASSERT(!thd->main_da.is_sent);
-#endif
+ DBUG_ASSERT(!thd->main_da.is_sent ||
+ thd->killed == THD::KILL_CONNECTION);
/* autocommit ? rollback a transaction */
#ifdef MARIA_CANNOT_ROLLBACK
if (ma_commit(trn))
@@ -2484,10 +2480,7 @@ int ha_maria::implicit_commit(THD *thd,
tables may be under LOCK TABLES, and so they will start the next
statement assuming they have a trn (see ha_maria::start_stmt()).
*/
- trn= trnman_new_trn(& thd->mysys_var->mutex,
- & thd->mysys_var->suspend,
- thd->thread_stack + STACK_DIRECTION *
- (my_thread_stack_size - STACK_MIN_SIZE));
+ trn= trnman_new_trn(& thd->transaction.wt);
/* This is just a commit, tables stay locked if they were: */
trnman_reset_locked_tables(trn, locked_tables);
THD_TRN= trn;
=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c 2008-08-11 14:40:32 +0000
+++ b/storage/maria/ma_blockrec.c 2008-08-28 18:52:23 +0000
@@ -3094,7 +3094,7 @@ static my_bool write_block_record(MARIA_
info->rec_buff);
log_pos= store_page_range(log_pos, head_block+1, block_size,
(ulong) block_length, &extents);
- log_array_pos->str= (char*) info->rec_buff;
+ log_array_pos->str= info->rec_buff;
log_array_pos->length= block_length;
log_entry_length+= block_length;
log_array_pos++;
@@ -5408,7 +5408,7 @@ static size_t fill_insert_undo_parts(MAR
log_parts++;
/* Stored bitmap over packed (zero length or all-zero fields) */
- log_parts->str= (char *) info->cur_row.empty_bits;
+ log_parts->str= info->cur_row.empty_bits;
log_parts->length= share->base.pack_bytes;
row_length+= log_parts->length;
log_parts++;
@@ -5416,7 +5416,7 @@ static size_t fill_insert_undo_parts(MAR
if (share->base.max_field_lengths)
{
/* Store length of all not empty char, varchar and blob fields */
- log_parts->str= (char *) field_lengths - 2;
+ log_parts->str= field_lengths - 2;
log_parts->length= info->cur_row.field_lengths_length+2;
int2store(log_parts->str, info->cur_row.field_lengths_length);
row_length+= log_parts->length;
@@ -5428,8 +5428,8 @@ static size_t fill_insert_undo_parts(MAR
/*
Store total blob length to make buffer allocation easier during UNDO
*/
- log_parts->str= (char *) info->length_buff;
- log_parts->length= (uint) (ma_store_length((uchar *) log_parts->str,
+ log_parts->str= info->length_buff;
+ log_parts->length= (uint) (ma_store_length((uchar *) log_parts->str,
info->cur_row.blob_length) -
(uchar*) log_parts->str);
row_length+= log_parts->length;
@@ -5442,7 +5442,7 @@ static size_t fill_insert_undo_parts(MAR
column < end_column;
column++)
{
- log_parts->str= (char*) record + column->offset;
+ log_parts->str= record + column->offset;
log_parts->length= column->length;
row_length+= log_parts->length;
log_parts++;
@@ -5493,7 +5493,7 @@ static size_t fill_insert_undo_parts(MAR
default:
DBUG_ASSERT(0);
}
- log_parts->str= (char*) column_pos;
+ log_parts->str= column_pos;
log_parts->length= column_length;
row_length+= log_parts->length;
log_parts++;
@@ -5512,8 +5512,8 @@ static size_t fill_insert_undo_parts(MAR
*/
if (blob_length)
{
- char *blob_pos;
- memcpy_fixed((uchar*) &blob_pos, record + column->offset + size_length,
+ uchar *blob_pos;
+ memcpy_fixed(&blob_pos, record + column->offset + size_length,
sizeof(blob_pos));
log_parts->str= blob_pos;
log_parts->length= blob_length;
@@ -5612,7 +5612,7 @@ static size_t fill_update_undo_parts(MAR
{
field_data= ma_store_length(field_data,
(uint) (column - share->columndef));
- log_parts->str= (char*) oldrec + column->offset;
+ log_parts->str= oldrec + column->offset;
log_parts->length= column->length;
row_length+= column->length;
log_parts++;
@@ -5719,7 +5719,7 @@ static size_t fill_update_undo_parts(MAR
(ulong) (column - share->columndef));
field_data= ma_store_length(field_data, (ulong) old_column_length);
- log_parts->str= (char*) old_column_pos;
+ log_parts->str= old_column_pos;
log_parts->length= old_column_length;
row_length+= old_column_length;
log_parts++;
@@ -5730,10 +5730,9 @@ static size_t fill_update_undo_parts(MAR
/* Store length of field length data before the field/field_lengths */
field_lengths= (uint) (field_data - start_field_data);
- start_log_parts->str= ((char*)
- (start_field_data -
+ start_log_parts->str= ((start_field_data -
ma_calc_length_for_store_length(field_lengths)));
- ma_store_length((uchar *) start_log_parts->str, field_lengths);
+ ma_store_length((uchar*)start_log_parts->str, field_lengths);
start_log_parts->length= (size_t) (field_data - start_log_parts->str);
row_length+= start_log_parts->length;
DBUG_RETURN(row_length);
=== modified file 'storage/maria/ma_checkpoint.c'
--- a/storage/maria/ma_checkpoint.c 2008-08-25 11:49:47 +0000
+++ b/storage/maria/ma_checkpoint.c 2008-08-28 18:52:23 +0000
@@ -222,7 +222,7 @@ static int really_execute_checkpoint(voi
*/
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 5];
log_array[TRANSLOG_INTERNAL_PARTS + 0].str=
- checkpoint_start_log_horizon_char;
+ (uchar*) checkpoint_start_log_horizon_char;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= total_rec_length=
sizeof(checkpoint_start_log_horizon_char);
for (i= 0; i < (sizeof(record_pieces)/sizeof(record_pieces[0])); i++)
=== modified file 'storage/maria/ma_commit.c'
--- a/storage/maria/ma_commit.c 2008-05-29 15:33:33 +0000
+++ b/storage/maria/ma_commit.c 2008-08-07 20:57:25 +0000
@@ -106,11 +106,7 @@ int maria_begin(MARIA_HA *info)
if (info->s->now_transactional)
{
- TRN *trn;
- struct st_my_thread_var *mysys_var= my_thread_var;
- trn= trnman_new_trn(&mysys_var->mutex,
- &mysys_var->suspend,
- (char*) &mysys_var + STACK_DIRECTION *1024*128);
+ TRN *trn= trnman_new_trn(0);
if (unlikely(!trn))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
=== modified file 'storage/maria/ma_create.c'
--- a/storage/maria/ma_create.c 2008-08-25 11:49:47 +0000
+++ b/storage/maria/ma_create.c 2008-08-28 18:52:23 +0000
@@ -1008,20 +1008,19 @@ int maria_create(const char *name, enum
log_data[0]= test(flags & HA_DONT_TOUCH_DATA);
int2store(log_data + 1, kfile_size_before_extension);
int2store(log_data + 1 + 2, share.base.keystart);
- log_array[TRANSLOG_INTERNAL_PARTS + 0].str= name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar *)name;
/* we store the end-zero, for Recovery to just pass it to my_create() */
- log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
- strlen(log_array[TRANSLOG_INTERNAL_PARTS + 0].str) + 1;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name) + 1;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= log_data;
/* symlink description is also needed for re-creation by Recovery: */
- log_array[TRANSLOG_INTERNAL_PARTS + 2].str=
- (ci->data_file_name ? ci->data_file_name : empty_string);
- log_array[TRANSLOG_INTERNAL_PARTS + 2].length=
- strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str) + 1;
- log_array[TRANSLOG_INTERNAL_PARTS + 3].str=
- (ci->index_file_name ? ci->index_file_name : empty_string);
- log_array[TRANSLOG_INTERNAL_PARTS + 3].length=
- strlen(log_array[TRANSLOG_INTERNAL_PARTS + 3].str) + 1;
+ {
+ const char *s= ci->data_file_name ? ci->data_file_name : empty_string;
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (uchar*)s;
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].length= strlen(s) + 1;
+ s= ci->index_file_name ? ci->index_file_name : empty_string;
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (uchar*)s;
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].length= strlen(s) + 1;
+ }
for (k= TRANSLOG_INTERNAL_PARTS;
k < (sizeof(log_array)/sizeof(log_array[0])); k++)
total_rec_length+= (translog_size_t) log_array[k].length;
=== modified file 'storage/maria/ma_delete_table.c'
--- a/storage/maria/ma_delete_table.c 2008-04-03 13:40:25 +0000
+++ b/storage/maria/ma_delete_table.c 2008-08-06 14:03:27 +0000
@@ -85,7 +85,7 @@ int maria_delete_table(const char *name)
*/
LSN lsn;
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
- log_array[TRANSLOG_INTERNAL_PARTS + 0].str= name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar*)name;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name) + 1;
if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DROP_TABLE,
&dummy_transaction_object, NULL,
=== modified file 'storage/maria/ma_loghandler.c'
--- a/storage/maria/ma_loghandler.c 2008-08-25 11:49:47 +0000
+++ b/storage/maria/ma_loghandler.c 2008-08-28 18:52:23 +0000
@@ -5172,9 +5172,9 @@ static void translog_relative_LSN_encod
{
LEX_CUSTRING *part;
uint lsns_len= lsns * LSN_STORE_SIZE;
- char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
- char *buffer= buffer_src;
- const char *cbuffer;
+ uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
+ uchar *buffer= buffer_src;
+ const uchar *cbuffer;
DBUG_ENTER("translog_relative_LSN_encode");
@@ -5249,7 +5249,7 @@ static void translog_relative_LSN_encod
DBUG_PRINT("info", ("new length of LSNs: %lu economy: %d",
(ulong)part->length, economy));
parts->total_record_length-= economy;
- part->str= (char*)dst_ptr;
+ part->str= dst_ptr;
}
DBUG_VOID_RETURN;
}
@@ -5959,7 +5959,7 @@ static my_bool translog_write_fixed_reco
DBUG_ASSERT(parts->current != 0); /* first part is left for header */
part= parts->parts + (--parts->current);
parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
- part->str= (char*)chunk1_header;
+ part->str= chunk1_header;
*chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
int2store(chunk1_header + 1, short_trid);
=== modified file 'storage/maria/ma_rename.c'
--- a/storage/maria/ma_rename.c 2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_rename.c 2008-08-06 14:03:27 +0000
@@ -69,9 +69,9 @@ int maria_rename(const char *old_name, c
LSN lsn;
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
uint old_name_len= strlen(old_name)+1, new_name_len= strlen(new_name)+1;
- log_array[TRANSLOG_INTERNAL_PARTS + 0].str= old_name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (uchar*)old_name;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= old_name_len;
- log_array[TRANSLOG_INTERNAL_PARTS + 1].str= new_name;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (uchar*)new_name;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= new_name_len;
/*
For this record to be of any use for Recovery, we need the upper
=== modified file 'storage/maria/ma_state.c'
--- a/storage/maria/ma_state.c 2008-08-18 22:21:22 +0000
+++ b/storage/maria/ma_state.c 2008-08-28 18:52:23 +0000
@@ -91,7 +91,7 @@ my_bool _ma_setup_live_state(MARIA_HA *i
It's enough to compare trids here (instead of calling
tranman_can_read_from) as history->trid is a commit_trid
*/
- while (trn->trid < history->trid)
+ while (trn->trid < history->trid && history->trid != ~(TrID)0)
history= history->next;
pthread_mutex_unlock(&share->intern_lock);
/* The current item can't be deleted as it's the first one visible for us */
=== modified file 'storage/maria/ma_static.c'
--- a/storage/maria/ma_static.c 2008-06-26 05:18:28 +0000
+++ b/storage/maria/ma_static.c 2008-08-06 19:30:05 +0000
@@ -64,6 +64,9 @@ HASH maria_stored_state;
*/
TRN dummy_transaction_object;
+/* a WT_RESOURCE_TYPE for transactions waiting on a unique key conflict */
+WT_RESOURCE_TYPE ma_rc_dup_unique={ wt_resource_id_memcmp, 0};
+
/* Enough for comparing if number is zero */
uchar maria_zero_string[]= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
=== modified file 'storage/maria/ma_write.c'
--- a/storage/maria/ma_write.c 2008-08-26 12:34:57 +0000
+++ b/storage/maria/ma_write.c 2008-09-01 19:43:11 +0000
@@ -180,15 +180,68 @@ int maria_write(MARIA_HA *info, uchar *r
}
else
{
- if (keyinfo->ck_insert(info,
- (*keyinfo->make_key)(info, &int_key, i,
- buff, record, filepos,
- info->trn->trid)))
+ while (keyinfo->ck_insert(info,
+ (*keyinfo->make_key)(info, &int_key, i,
+ buff, record, filepos,
+ info->trn->trid)))
{
- if (local_lock_tree)
- rw_unlock(&keyinfo->root_lock);
+ TRN *blocker;
DBUG_PRINT("error",("Got error: %d on write",my_errno));
- goto err;
+ /*
+ explicit check to filter out temp tables, they aren't
+ transactional and don't have a proper TRN so the code
+ below doesn't work for them.
+ Also, filter out non-thread maria use, and table modified in
+ the same transaction.
+ */
+ if (!local_lock_tree || info->dup_key_trid == info->trn->trid)
+ goto err;
+ blocker= trnman_trid_to_trn(info->trn, info->dup_key_trid);
+ /*
+ if blocker TRN was not found, it means that the conflicting
+ transaction was committed long time ago. It could not be
+ aborted, as it would have to wait on the key tree lock
+ to remove the conflicting key it has inserted.
+ */
+ if (!blocker || blocker->commit_trid != ~(TrID)0)
+ { /* committed */
+ if (blocker)
+ pthread_mutex_unlock(& blocker->state_lock);
+ rw_unlock(&keyinfo->root_lock);
+ goto err;
+ }
+ rw_unlock(&keyinfo->root_lock);
+ {
+ /* running. now we wait */
+ WT_RESOURCE_ID rc;
+ int res;
+
+ rc.type= &ma_rc_dup_unique;
+ rc.value= (intptr)blocker; /* TODO savepoint id when we'll have them */
+ res= wt_thd_will_wait_for(info->trn->wt, blocker->wt, & rc);
+ if (res != WT_OK)
+ {
+ pthread_mutex_unlock(& blocker->state_lock);
+ my_errno= HA_ERR_LOCK_DEADLOCK;
+ goto err;
+ }
+ {
+ const char *old_proc_info= proc_info_hook(0,
+ "waiting for a resource", __func__, __FILE__, __LINE__);
+
+ res= wt_thd_cond_timedwait(info->trn->wt, & blocker->state_lock);
+
+ proc_info_hook(0, old_proc_info, __func__, __FILE__, __LINE__);
+ }
+ pthread_mutex_unlock(& blocker->state_lock);
+ if (res != WT_OK)
+ {
+ my_errno= res == WT_TIMEOUT ? HA_ERR_LOCK_WAIT_TIMEOUT
+ : HA_ERR_LOCK_DEADLOCK;
+ goto err;
+ }
+ }
+ rw_wrlock(&keyinfo->root_lock);
}
}
@@ -597,9 +650,22 @@ static int w_search(register MARIA_HA *i
else /* not HA_FULLTEXT, normal HA_NOSAME key */
{
DBUG_PRINT("warning", ("Duplicate key"));
+ /*
+ TODO
+ When the index will support true versioning - with multiple
+ identical values in the UNIQUE index, invisible to each other -
+ the following should be changed to "continue inserting keys, at the
+ end (of the row or statement) wait". Until it's done we cannot properly
+ support deadlock timeouts.
+ */
+ /*
+ transaction that has inserted the conflicting key is in progress.
+ wait for it to be committed or aborted.
+ */
+ info->dup_key_trid= _ma_trid_from_key(&tmp_key);
info->dup_key_pos= dup_key_pos;
my_afree((uchar*) temp_buff);
- my_errno=HA_ERR_FOUND_DUPP_KEY;
+ my_errno= HA_ERR_FOUND_DUPP_KEY;
DBUG_RETURN(-1);
}
}
@@ -1826,7 +1892,7 @@ my_bool _ma_log_change(MARIA_HA *info, m
log_pos[0]= KEY_OP_CHECK;
int2store(log_pos+1, page_length);
int4store(log_pos+3, crc);
- log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= (char *) log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= log_pos;
log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= 7;
extra_length+= 7;
translog_parts++;
=== modified file 'storage/maria/maria_def.h'
--- a/storage/maria/maria_def.h 2008-08-26 12:34:57 +0000
+++ b/storage/maria/maria_def.h 2008-08-28 18:52:23 +0000
@@ -29,6 +29,7 @@
#include "ma_loghandler.h"
#include "ma_control_file.h"
#include "ma_state.h"
+#include <waiting_threads.h>
/* For testing recovery */
#ifdef TO_BE_REMOVED
@@ -461,7 +462,7 @@ typedef struct st_maria_block_scan
struct st_maria_handler
{
MARIA_SHARE *s; /* Shared between open:s */
- struct st_transaction *trn; /* Pointer to active transaction */
+ struct st_ma_transaction *trn; /* Pointer to active transaction */
MARIA_STATUS_INFO *state, state_save;
MARIA_STATUS_INFO *state_start; /* State at start of transaction */
MARIA_ROW cur_row; /* The active row that we just read */
@@ -492,13 +493,14 @@ struct st_maria_handler
uint32 int_keytree_version; /* -""- */
int (*read_record)(MARIA_HA *, uchar*, MARIA_RECORD_POS);
invalidator_by_filename invalidator; /* query cache invalidator */
- ulonglong last_auto_increment; /* auto value at start of statement */
+ ulonglong last_auto_increment; /* auto value at start of statement */
ulong this_unique; /* uniq filenumber or thread */
ulong last_unique; /* last unique number */
ulong this_loop; /* counter for this open */
ulong last_loop; /* last used counter */
MARIA_RECORD_POS save_lastpos;
MARIA_RECORD_POS dup_key_pos;
+ TrID dup_key_trid;
my_off_t pos; /* Intern variable */
my_off_t last_keypage; /* Last key page read */
my_off_t last_search_keypage; /* Last keypage when searching */
@@ -782,7 +784,6 @@ typedef struct st_pinned_page
my_bool changed;
} MARIA_PINNED_PAGE;
-
/* Prototypes for intern functions */
extern int _ma_read_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS);
extern int _ma_read_rnd_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
=== modified file 'storage/maria/trnman.c'
--- a/storage/maria/trnman.c 2008-07-05 11:03:21 +0000
+++ b/storage/maria/trnman.c 2008-09-01 19:43:11 +0000
@@ -44,9 +44,9 @@ static pthread_mutex_t LOCK_trn_list;
static TRN *pool;
/* a hash for committed transactions that maps trid to a TRN structure */
-static LF_HASH trid_to_committed_trn;
+static LF_HASH trid_to_trn;
-/* an array that maps short_trid of an active transaction to a TRN structure */
+/* an array that maps short_id of an active transaction to a TRN structure */
static TRN **short_trid_to_active_trn;
/* locks for short_trid_to_active_trn and pool */
@@ -81,6 +81,17 @@ void trnman_reset_locked_tables(TRN *trn
trn->locked_tables= locked_tables;
}
+static void wt_thd_release_self(TRN *trn)
+{
+ if (trn->wt)
+ {
+ WT_RESOURCE_ID rc;
+ rc.type= &ma_rc_dup_unique;
+ rc.value= (intptr)trn;
+ wt_thd_release(trn->wt, & rc);
+ trn->wt= 0;
+ }
+}
static my_bool
default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)),
@@ -92,24 +103,6 @@ default_trnman_end_trans_hook(TRN *trn _
}
-/*
- NOTE
- Just as short_id doubles as loid, this function doubles as
- short_trid_to_LOCK_OWNER. See the compile-time assert below.
-*/
-
-#ifdef NOT_USED
-static TRN *short_trid_to_TRN(uint16 short_trid)
-{
- TRN *trn;
- compile_time_assert(offsetof(TRN, locks) == 0);
- my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
- trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]);
- my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
- return (TRN *)trn;
-}
-#endif
-
static uchar *trn_get_hash_key(const uchar *trn, size_t *len,
my_bool unused __attribute__ ((unused)))
{
@@ -136,7 +129,7 @@ int trnman_init(TrID initial_trid)
MYF(MY_WME|MY_ZEROFILL));
if (unlikely(!short_trid_to_active_trn))
DBUG_RETURN(1);
- short_trid_to_active_trn--; /* min short_trid is 1 */
+ short_trid_to_active_trn--; /* min short_id is 1 */
/*
Initialize lists.
@@ -165,17 +158,13 @@ int trnman_init(TrID initial_trid)
pool= 0;
global_trid_generator= initial_trid;
- lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE,
+ lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE,
0, 0, trn_get_hash_key, 0);
DBUG_PRINT("info", ("pthread_mutex_init LOCK_trn_list"));
pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST);
my_atomic_rwlock_init(&LOCK_short_trid_to_trn);
my_atomic_rwlock_init(&LOCK_pool);
-#ifdef NOT_USED
- lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000);
-#endif
-
DBUG_RETURN(0);
}
@@ -190,7 +179,7 @@ void trnman_destroy()
if (short_trid_to_active_trn == NULL) /* trnman already destroyed */
DBUG_VOID_RETURN;
- DBUG_ASSERT(trid_to_committed_trn.count == 0);
+ DBUG_ASSERT(trid_to_trn.count == 0);
DBUG_ASSERT(trnman_active_transactions == 0);
DBUG_ASSERT(trnman_committed_transactions == 0);
DBUG_ASSERT(active_list_max.prev == &active_list_min);
@@ -201,20 +190,17 @@ void trnman_destroy()
{
TRN *trn= pool;
pool= pool->next;
- DBUG_ASSERT(trn->locks.mutex == 0);
- DBUG_ASSERT(trn->locks.cond == 0);
+ pthread_mutex_destroy(&trn->state_lock);
my_free((void *)trn, MYF(0));
}
- lf_hash_destroy(&trid_to_committed_trn);
+ lf_hash_destroy(&trid_to_trn);
DBUG_PRINT("info", ("pthread_mutex_destroy LOCK_trn_list"));
pthread_mutex_destroy(&LOCK_trn_list);
my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn);
my_atomic_rwlock_destroy(&LOCK_pool);
my_free((void *)(short_trid_to_active_trn+1), MYF(0));
short_trid_to_active_trn= NULL;
-#ifdef NOT_USED
- lockman_destroy(&maria_lockman);
-#endif
+
DBUG_VOID_RETURN;
}
@@ -233,11 +219,13 @@ static TrID new_trid()
DBUG_RETURN(++global_trid_generator);
}
-static void set_short_trid(TRN *trn)
+static uint get_short_trid(TRN *trn)
{
int i= (int) ((global_trid_generator + (intptr)trn) * 312089 %
SHORT_TRID_MAX + 1);
- for ( ; !trn->short_id ; i= 1)
+ uint res=0;
+
+ for ( ; !res ; i= 1)
{
my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn);
for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */
@@ -246,12 +234,13 @@ static void set_short_trid(TRN *trn)
if (short_trid_to_active_trn[i] == NULL &&
my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
{
- trn->short_id= i;
+ res= i;
break;
}
}
my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn);
}
+ return res;
}
/*
@@ -260,9 +249,9 @@ static void set_short_trid(TRN *trn)
mutex and cond will be used for lock waits
*/
-TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond,
- void *stack_end)
+TRN *trnman_new_trn(WT_THD *wt)
{
+ int res;
TRN *trn;
DBUG_ENTER("trnman_new_trn");
@@ -270,7 +259,7 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
we have a mutex, to do simple things under it - allocate a TRN,
increment trnman_active_transactions, set trn->min_read_from.
- Note that all the above is fast. generating short_trid may be slow,
+ Note that all the above is fast. generating short_id may be slow,
as it involves scanning a large array - so it's done outside of the
mutex.
*/
@@ -307,8 +296,10 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
return 0;
}
trnman_allocated_transactions++;
+ pthread_mutex_init(&trn->state_lock, MY_MUTEX_INIT_FAST);
}
- trn->pins= lf_hash_get_pins(&trid_to_committed_trn, stack_end);
+ trn->wt= wt;
+ trn->pins= lf_hash_get_pins(&trid_to_trn);
if (!trn->pins)
{
trnman_free_trn(trn);
@@ -320,7 +311,6 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
trn->min_read_from= active_list_min.next->trid;
trn->trid= new_trid();
- trn->short_id= 0;
trn->next= &active_list_max;
trn->prev= active_list_max.prev;
@@ -337,25 +327,27 @@ TRN *trnman_new_trn(pthread_mutex_t *mut
trn->min_read_from= trn->trid + 1;
}
- trn->commit_trid= 0;
+ trn->commit_trid= ~(TrID)0;
trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0;
trn->used_tables= 0;
- trn->locks.mutex= mutex;
- trn->locks.cond= cond;
- trn->locks.waiting_for= 0;
- trn->locks.all_locks= 0;
-#ifdef NOT_USED
- trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc);
-#endif
-
trn->locked_tables= 0;
/*
only after the following function TRN is considered initialized,
so it must be done the last
*/
- set_short_trid(trn);
+ pthread_mutex_lock(&trn->state_lock);
+ trn->short_id= get_short_trid(trn);
+ pthread_mutex_unlock(&trn->state_lock);
+
+ res= lf_hash_insert(&trid_to_trn, trn->pins, &trn);
+ DBUG_ASSERT(res <= 0);
+ if (res)
+ {
+ trnman_end_trn(trn, 0);
+ return 0;
+ }
DBUG_PRINT("exit", ("trn: x%lx trid: 0x%lu",
(ulong) trn, (ulong) trn->trid));
@@ -391,6 +383,7 @@ my_bool trnman_end_trn(TRN *trn, my_bool
/* if a rollback, all UNDO records should have been executed */
DBUG_ASSERT(commit || trn->undo_lsn == 0);
DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
+
pthread_mutex_lock(&LOCK_trn_list);
/* remove from active list */
@@ -423,58 +416,35 @@ my_bool trnman_end_trn(TRN *trn, my_bool
}
}
+ pthread_mutex_lock(&trn->state_lock);
+ trn->commit_trid= global_trid_generator;
+ wt_thd_release_self(trn);
+ pthread_mutex_unlock(&trn->state_lock);
+
/*
if transaction is committed and it was not the only active transaction -
- add it to the committed list (which is used for read-from relation)
+ add it to the committed list
*/
if (commit && active_list_min.next != &active_list_max)
{
- trn->commit_trid= global_trid_generator;
trn->next= &committed_list_max;
trn->prev= committed_list_max.prev;
trnman_committed_transactions++;
-
- res= lf_hash_insert(&trid_to_committed_trn, pins, &trn);
- /*
- By going on with life is res<0, we let other threads block on
- our rows (because they will never see us committed in
- trid_to_committed_trn) until they timeout. Though correct, this is not a
- good situation:
- - if connection reconnects and wants to check if its rows have been
- committed, it will not be able to do that (it will just lock on them) so
- connection stays permanently in doubt
- - internal structures trid_to_committed_trn and committed_list are
- desynchronized.
- So we should take Maria down immediately, the two problems being
- automatically solved at restart.
- */
- DBUG_ASSERT(res <= 0);
+ committed_list_max.prev= trn->prev->next= trn;
}
- if (res)
+ else
{
- /*
- res == 1 means the condition in the if() above was false.
- res == -1 means lf_hash_insert failed
- */
trn->next= free_me;
free_me= trn;
}
- else
- {
- committed_list_max.prev= trn->prev->next= trn;
- }
if ((*trnman_end_trans_hook)(trn, commit,
active_list_min.next != &active_list_max))
res= -1;
trnman_active_transactions--;
+
pthread_mutex_unlock(&LOCK_trn_list);
/* the rest is done outside of a critical section */
-#ifdef NOT_USED
- lockman_release_locks(&maria_lockman, &trn->locks);
-#endif
- trn->locks.mutex= 0;
- trn->locks.cond= 0;
my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn);
my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0);
my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn);
@@ -491,18 +461,13 @@ my_bool trnman_end_trn(TRN *trn, my_bool
TRN *t= free_me;
free_me= free_me->next;
- /*
- ignore OOM here. it's harmless, and there's nothing we could do, anyway
- */
- (void)lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID));
+ /* ignore OOM. it's harmless, and we can do nothing here anyway */
+ (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID));
trnman_free_trn(t);
}
lf_hash_put_pins(pins);
-#ifdef NOT_USED
- lf_pinbox_put_pins(trn->locks.pins);
-#endif
DBUG_RETURN(res < 0);
}
@@ -526,6 +491,11 @@ void trnman_free_trn(TRN *trn)
*/
union { TRN *trn; void *v; } tmp;
+
+ pthread_mutex_lock(&trn->state_lock);
+ trn->short_id= 0;
+ pthread_mutex_unlock(&trn->state_lock);
+
tmp.trn= pool;
my_atomic_rwlock_wrlock(&LOCK_pool);
@@ -580,9 +550,9 @@ int trnman_can_read_from(TRN *trn, TrID
return trid == trn->trid;
}
- found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid));
+ found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
if (found == NULL)
- return 0; /* not in the hash of committed transactions = cannot read */
+ return 0; /* not in the hash of transactions = cannot read */
if (found == MY_ERRPTR)
return -1;
@@ -591,6 +561,33 @@ int trnman_can_read_from(TRN *trn, TrID
return can;
}
+TRN *trnman_trid_to_trn(TRN *trn, TrID trid)
+{
+ TRN **found;
+ LF_REQUIRE_PINS(3);
+
+ if (trid < trn->min_read_from)
+ return 0; /* it's committed eons ago */
+
+ found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
+ if (found == NULL || found == MY_ERRPTR)
+ return 0; /* no luck */
+
+ /* we've found something */
+ pthread_mutex_lock(&(*found)->state_lock);
+
+ if ((*found)->short_id == 0)
+ {
+ pthread_mutex_unlock(&(*found)->state_lock);
+ lf_hash_search_unpin(trn->pins);
+ return 0; /* but it was a ghost */
+ }
+ lf_hash_search_unpin(trn->pins);
+
+ /* Gotcha! */
+ return *found; /* note that TRN is returned locked !!! */
+}
+
/* TODO: the stubs below are waiting for savepoints to be implemented */
void trnman_new_statement(TRN *trn __attribute__ ((unused)))
@@ -768,7 +765,8 @@ TRN *trnman_recreate_trn_from_recovery(u
TrID old_trid_generator= global_trid_generator;
TRN *trn;
DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
- if (unlikely((trn= trnman_new_trn(NULL, NULL, NULL)) == NULL))
+ global_trid_generator= longid-1; /* force a correct trid in the new trn */
+ if (unlikely((trn= trnman_new_trn(NULL)) == NULL))
return NULL;
/* deallocate excessive allocations of trnman_new_trn() */
global_trid_generator= old_trid_generator;
@@ -776,7 +774,6 @@ TRN *trnman_recreate_trn_from_recovery(u
short_trid_to_active_trn[trn->short_id]= 0;
DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL);
short_trid_to_active_trn[shortid]= trn;
- trn->trid= longid;
trn->short_id= shortid;
return trn;
}
=== modified file 'storage/maria/trnman.h'
--- a/storage/maria/trnman.h 2008-05-29 15:33:33 +0000
+++ b/storage/maria/trnman.h 2008-08-07 20:57:25 +0000
@@ -19,7 +19,6 @@
C_MODE_START
#include <lf.h>
-#include "lockman.h"
#include "trnman_public.h"
#include "ma_loghandler_lsn.h"
@@ -28,32 +27,38 @@ C_MODE_START
is created. Transaction can always be identified by its trid,
even after transaction has ended.
- short_trid - 2-byte transaction identifier, identifies a running
+ short_id - 2-byte transaction identifier, identifies a running
transaction, is reassigned when transaction ends.
-*/
-/*
- short transaction id is at the same time its identifier
- for a lock manager - its lock owner identifier (loid)
-*/
+ when short_id is 0, TRN is not initialized, for all practical purposes
+ it could be considered unused.
-#define short_id locks.loid
+ when commit_trid is ~(TrID)0 the transaction is running, otherwise it's
+ committed.
-struct st_transaction
+ state_lock mutex protects the state of a TRN, that is whether a TRN
+ is committed/running/unused. Meaning that modifications of short_id and
+ commit_trid happen under this mutex.
+*/
+
+struct st_ma_transaction
{
- LOCK_OWNER locks; /* must be the first! see short_trid_to_TRN() */
LF_PINS *pins;
+ WT_THD *wt;
+ pthread_mutex_t state_lock;
void *used_tables; /* Tables used by transaction */
TRN *next, *prev;
TrID trid, min_read_from, commit_trid;
LSN rec_lsn, undo_lsn;
LSN_WITH_FLAGS first_undo_lsn;
uint locked_tables;
- /* Note! if locks.loid is 0, trn is NOT initialized */
+ uint16 short_id;
};
#define TRANSACTION_LOGGED_LONG_ID ULL(0x8000000000000000)
+extern WT_RESOURCE_TYPE ma_rc_dup_unique;
+
C_MODE_END
#endif
=== modified file 'storage/maria/trnman_public.h'
--- a/storage/maria/trnman_public.h 2008-07-05 11:03:21 +0000
+++ b/storage/maria/trnman_public.h 2008-08-07 20:57:25 +0000
@@ -24,10 +24,11 @@
#define _trnman_public_h
#include "ma_loghandler_lsn.h"
+#include <waiting_threads.h>
C_MODE_START
typedef uint64 TrID; /* our TrID is 6 bytes */
-typedef struct st_transaction TRN;
+typedef struct st_ma_transaction TRN;
#define SHORT_TRID_MAX 65535
@@ -38,13 +39,14 @@ extern my_bool (*trnman_end_trans_hook)(
int trnman_init(TrID);
void trnman_destroy(void);
-TRN *trnman_new_trn(pthread_mutex_t *, pthread_cond_t *, void *);
+TRN *trnman_new_trn(WT_THD *wt);
my_bool trnman_end_trn(TRN *trn, my_bool commit);
#define trnman_commit_trn(T) trnman_end_trn(T, TRUE)
#define trnman_abort_trn(T) trnman_end_trn(T, FALSE)
#define trnman_rollback_trn(T) trnman_end_trn(T, FALSE)
void trnman_free_trn(TRN *trn);
int trnman_can_read_from(TRN *trn, TrID trid);
+TRN *trnman_trid_to_trn(TRN *trn, TrID trid);
void trnman_new_statement(TRN *trn);
void trnman_rollback_statement(TRN *trn);
my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
=== modified file 'storage/maria/unittest/ma_test_loghandler-t.c'
--- a/storage/maria/unittest/ma_test_loghandler-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler-t.c 2008-08-04 16:57:41 +0000
@@ -227,7 +227,7 @@ int main(int argc __attribute__((unused)
long_tr_id[5]= 0xff;
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
trn->short_id= 0;
trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
@@ -250,7 +250,7 @@ int main(int argc __attribute__((unused)
if (i % 2)
{
lsn_store(lsn_buff, lsn_base);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
/* check auto-count feature */
parts[TRANSLOG_INTERNAL_PARTS + 1].str= NULL;
@@ -268,9 +268,9 @@ int main(int argc __attribute__((unused)
lsn_store(lsn_buff, lsn_base);
if ((rec_len= rand_buffer_size()) < 12)
rec_len= 12;
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
- parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
/* check record length auto-counting */
if (translog_write_record(&lsn,
@@ -290,7 +290,7 @@ int main(int argc __attribute__((unused)
{
lsn_store(lsn_buff, lsn_base);
lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 23;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_2LSN_EXAMPLE,
@@ -308,9 +308,9 @@ int main(int argc __attribute__((unused)
lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
if ((rec_len= rand_buffer_size()) < 19)
rec_len= 19;
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 14;
- parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
if (translog_write_record(&lsn,
LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE,
@@ -327,7 +327,7 @@ int main(int argc __attribute__((unused)
ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
}
int4store(long_tr_id, i);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -346,7 +346,7 @@ int main(int argc __attribute__((unused)
if ((rec_len= rand_buffer_size()) < 9)
rec_len= 9;
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len;
if (translog_write_record(&lsn,
LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
=== modified file 'storage/maria/unittest/ma_test_loghandler_first_lsn-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c 2008-08-06 14:03:27 +0000
@@ -113,7 +113,7 @@ int main(int argc __attribute__((unused)
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
=== modified file 'storage/maria/unittest/ma_test_loghandler_max_lsn-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c 2008-08-06 14:03:27 +0000
@@ -104,7 +104,7 @@ int main(int argc __attribute__((unused)
/* write more then 1 file */
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
for(i= 0; i < LOG_FILE_SIZE/6; i++)
{
=== modified file 'storage/maria/unittest/ma_test_loghandler_multigroup-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c 2008-06-02 20:53:25 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c 2008-08-04 16:57:41 +0000
@@ -293,7 +293,7 @@ int main(int argc __attribute__((unused)
long_tr_id[5]= 0xff;
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
trn->short_id= 0;
trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
@@ -301,7 +301,7 @@ int main(int argc __attribute__((unused)
trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, parts,
NULL, NULL))
{
- fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ fprintf(stderr, "Can't write record #%u\n", 0);
translog_destroy();
ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
exit(1);
@@ -314,7 +314,7 @@ int main(int argc __attribute__((unused)
if (i % 2)
{
lsn_store(lsn_buff, lsn_base);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
trn->short_id= i % 0xFFFF;
if (translog_write_record(&lsn,
@@ -322,8 +322,7 @@ int main(int argc __attribute__((unused)
LSN_STORE_SIZE, TRANSLOG_INTERNAL_PARTS + 1,
parts, NULL, NULL))
{
- fprintf(stderr, "1 Can't write reference before record #%lu\n",
- (ulong) i);
+ fprintf(stderr, "1 Can't write reference before record #%u\n", i);
translog_destroy();
ok(0, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
exit(1);
@@ -331,9 +330,9 @@ int main(int argc __attribute__((unused)
ok(1, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE");
lsn_store(lsn_buff, lsn_base);
rec_len= get_len();
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE;
- parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
trn->short_id= i % 0xFFFF;
if (translog_write_record(&lsn,
@@ -342,8 +341,7 @@ int main(int argc __attribute__((unused)
TRANSLOG_INTERNAL_PARTS + 2,
parts, NULL, NULL))
{
- fprintf(stderr, "1 Can't write var reference before record #%lu\n",
- (ulong) i);
+ fprintf(stderr, "1 Can't write var reference before record #%u\n", i);
translog_destroy();
ok(0, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE");
exit(1);
@@ -354,7 +352,7 @@ int main(int argc __attribute__((unused)
{
lsn_store(lsn_buff, lsn_base);
lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
- parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 1].length= 23;
trn->short_id= i % 0xFFFF;
if (translog_write_record(&lsn,
@@ -362,8 +360,7 @@ int main(int argc __attribute__((unused)
trn, NULL, 23, TRANSLOG_INTERNAL_PARTS + 1,
parts, NULL, NULL))
{
- fprintf(stderr, "0 Can't write reference before record #%lu\n",
- (ulong) i);
+ fprintf(stderr, "0 Can't write reference before record #%u\n", i);
translog_destroy();
ok(0, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE");
exit(1);
@@ -372,9 +369,9 @@ int main(int argc __attribute__((unused)
lsn_store(lsn_buff, lsn_base);
lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn);
rec_len= get_len();
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= lsn_buff;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE * 2;
- parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 1].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len;
trn->short_id= i % 0xFFFF;
if (translog_write_record(&lsn,
@@ -383,8 +380,7 @@ int main(int argc __attribute__((unused)
TRANSLOG_INTERNAL_PARTS + 2,
parts, NULL, NULL))
{
- fprintf(stderr, "0 Can't write var reference before record #%lu\n",
- (ulong) i);
+ fprintf(stderr, "0 Can't write var reference before record #%u\n", i);
translog_destroy();
ok(0, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
exit(1);
@@ -392,7 +388,7 @@ int main(int argc __attribute__((unused)
ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE");
}
int4store(long_tr_id, i);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
trn->short_id= i % 0xFFFF;
if (translog_write_record(&lsn,
@@ -400,7 +396,7 @@ int main(int argc __attribute__((unused)
trn, NULL, 6,
TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
{
- fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ fprintf(stderr, "Can't write record #%u\n", i);
translog_destroy();
ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE");
exit(1);
@@ -410,7 +406,7 @@ int main(int argc __attribute__((unused)
lsn_base= lsn;
rec_len= get_len();
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len;
trn->short_id= i % 0xFFFF;
if (translog_write_record(&lsn,
@@ -418,7 +414,7 @@ int main(int argc __attribute__((unused)
trn, NULL, rec_len,
TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL))
{
- fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ fprintf(stderr, "Can't write variable record #%u\n", i);
translog_destroy();
ok(0, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE");
exit(1);
=== modified file 'storage/maria/unittest/ma_test_loghandler_multithread-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c 2008-06-30 09:13:08 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c 2008-08-30 21:32:27 +0000
@@ -167,7 +167,7 @@ void writer(int num)
int2store(long_tr_id, num);
int4store(long_tr_id + 2, i);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -183,7 +183,7 @@ void writer(int num)
return;
}
lsns1[num][i]= lsn;
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= len;
if (translog_write_record(&lsn,
LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
@@ -368,7 +368,7 @@ int main(int argc __attribute__((unused)
0x11, 0x22, 0x33, 0x44, 0x55, 0x66
};
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
if (translog_write_record(&first_lsn,
=== modified file 'storage/maria/unittest/ma_test_loghandler_noflush-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_noflush-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_noflush-t.c 2008-08-06 14:03:27 +0000
@@ -93,7 +93,7 @@ int main(int argc __attribute__((unused)
int4store(long_tr_id, 0);
long_tr_id[5]= 0xff;
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&first_lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
=== modified file 'storage/maria/unittest/ma_test_loghandler_nologs-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_nologs-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_nologs-t.c 2008-08-06 14:03:27 +0000
@@ -90,7 +90,7 @@ int main(int argc __attribute__((unused)
/* write more then 1 file */
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -98,7 +98,7 @@ int main(int argc __attribute__((unused)
TRANSLOG_INTERNAL_PARTS + 1,
parts, NULL, NULL))
{
- fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ fprintf(stderr, "Can't write record #0\n");
translog_destroy();
exit(1);
}
@@ -111,7 +111,7 @@ int main(int argc __attribute__((unused)
TRANSLOG_INTERNAL_PARTS + 1,
parts, NULL, NULL))
{
- fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ fprintf(stderr, "Can't write record #0\n");
translog_destroy();
exit(1);
}
@@ -164,7 +164,7 @@ int main(int argc __attribute__((unused)
ok(1, "Log init OK");
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -172,7 +172,7 @@ int main(int argc __attribute__((unused)
TRANSLOG_INTERNAL_PARTS + 1,
parts, NULL, NULL))
{
- fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ fprintf(stderr, "Can't write record #0\n");
translog_destroy();
exit(1);
}
=== modified file 'storage/maria/unittest/ma_test_loghandler_pagecache-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c 2008-08-06 14:03:27 +0000
@@ -131,7 +131,7 @@ int main(int argc __attribute__((unused)
}
my_close(file1.file, MYF(MY_WME));
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
if (translog_write_record(&lsn,
=== modified file 'storage/maria/unittest/ma_test_loghandler_purge-t.c'
--- a/storage/maria/unittest/ma_test_loghandler_purge-t.c 2008-04-10 02:26:36 +0000
+++ b/storage/maria/unittest/ma_test_loghandler_purge-t.c 2008-08-06 14:03:27 +0000
@@ -90,7 +90,7 @@ int main(int argc __attribute__((unused)
/* write more then 1 file */
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
@@ -136,7 +136,7 @@ int main(int argc __attribute__((unused)
ok(1, "First file is removed");
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_buffer;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= LONG_BUFFER_SIZE;
if (translog_write_record(&lsn,
LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE,
@@ -160,7 +160,7 @@ int main(int argc __attribute__((unused)
ok(1, "Second and third files are not removed");
int4store(long_tr_id, 0);
- parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id;
+ parts[TRANSLOG_INTERNAL_PARTS + 0].str= long_tr_id;
parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6;
if (translog_write_record(&lsn,
LOGREC_FIXED_RECORD_0LSN_EXAMPLE,
=== modified file 'storage/maria/unittest/trnman-t.c'
--- a/storage/maria/unittest/trnman-t.c 2008-06-04 09:39:54 +0000
+++ b/storage/maria/unittest/trnman-t.c 2008-08-30 21:32:27 +0000
@@ -38,15 +38,9 @@ pthread_handler_t test_trnman(void *arg)
{
uint x, y, i, n;
TRN *trn[MAX_ITER];
- pthread_mutex_t mutexes[MAX_ITER];
- pthread_cond_t conds[MAX_ITER];
int m= (*(int *)arg);
- for (i= 0; i < MAX_ITER; i++)
- {
- pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
- pthread_cond_init(&conds[i], 0);
- }
+ my_thread_init();
for (x= ((int)(intptr)(&m)); m > 0; )
{
@@ -54,7 +48,7 @@ pthread_handler_t test_trnman(void *arg)
m-= n= x % MAX_ITER;
for (i= 0; i < n; i++)
{
- trn[i]= trnman_new_trn(&mutexes[i], &conds[i], &m + STACK_SIZE);
+ trn[i]= trnman_new_trn(0);
if (!trn[i])
{
diag("trnman_new_trn() failed");
@@ -67,15 +61,12 @@ pthread_handler_t test_trnman(void *arg)
trnman_end_trn(trn[i], y & 1);
}
}
- for (i= 0; i < MAX_ITER; i++)
- {
- pthread_mutex_destroy(&mutexes[i]);
- pthread_cond_destroy(&conds[i]);
- }
pthread_mutex_lock(&rt_mutex);
rt_num_threads--;
pthread_mutex_unlock(&rt_mutex);
+ my_thread_end();
+
return 0;
}
#undef MAX_ITER
@@ -111,10 +102,10 @@ void run_test(const char *test, pthread_
}
#define ok_read_from(T1, T2, RES) \
- i= trnman_can_read_from(trn[T1], trid[T2]); \
+ i= trnman_can_read_from(trn[T1], trid[T2]); \
ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot")
#define start_transaction(T) \
- trn[T]= trnman_new_trn(&mutexes[T], &conds[T], &i + STACK_SIZE); \
+ trn[T]= trnman_new_trn(0); \
trid[T]= trn[T]->trid
#define commit(T) trnman_commit_trn(trn[T])
#define abort(T) trnman_abort_trn(trn[T])
@@ -124,16 +115,8 @@ void test_trnman_read_from()
{
TRN *trn[Ntrns];
TrID trid[Ntrns];
- pthread_mutex_t mutexes[Ntrns];
- pthread_cond_t conds[Ntrns];
int i;
- for (i= 0; i < Ntrns; i++)
- {
- pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST);
- pthread_cond_init(&conds[i], 0);
- }
-
start_transaction(0); /* start trn1 */
start_transaction(1); /* start trn2 */
ok_read_from(1, 0, 0);
@@ -149,17 +132,11 @@ void test_trnman_read_from()
ok_read_from(3, 1, 0);
commit(3); /* commit trn5 */
- for (i= 0; i < Ntrns; i++)
- {
- pthread_mutex_destroy(&mutexes[i]);
- pthread_cond_destroy(&conds[i]);
- }
}
int main(int argc __attribute__((unused)), char **argv)
{
MY_INIT(argv[0]);
- my_init();
plan(7);
=== modified file 'storage/myisam/mi_check.c'
--- a/storage/myisam/mi_check.c 2008-06-26 05:18:28 +0000
+++ b/storage/myisam/mi_check.c 2008-08-28 12:43:44 +0000
@@ -803,9 +803,9 @@ static int chk_index(HA_CHECK *param, MI
(flag=ha_key_cmp(keyinfo->seg,info->lastkey,key,key_length,
comp_flag, diff_pos)) >=0)
{
- DBUG_DUMP("old",(uchar*) info->lastkey, info->lastkey_length);
- DBUG_DUMP("new",(uchar*) key, key_length);
- DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
+ DBUG_DUMP("old",info->lastkey, info->lastkey_length);
+ DBUG_DUMP("new",key, key_length);
+ DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos));
if (comp_flag & SEARCH_FIND && flag == 0)
mi_check_print_error(param,"Found duplicated key at page %s",llstr(page,llbuff));
@@ -874,8 +874,8 @@ static int chk_index(HA_CHECK *param, MI
DBUG_PRINT("test",("page: %s record: %s filelength: %s",
llstr(page,llbuff),llstr(record,llbuff2),
llstr(info->state->data_file_length,llbuff3)));
- DBUG_DUMP("key",(uchar*) key,key_length);
- DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos));
+ DBUG_DUMP("key",key,key_length);
+ DBUG_DUMP("new_in_page",old_keypos,(uint) (keypos-old_keypos));
goto err;
}
param->record_checksum+=(ha_checksum) record;
@@ -4026,7 +4026,7 @@ static int sort_insert_key(MI_SORT_PARAM
DBUG_RETURN(1);
}
a_length=2+nod_flag;
- key_block->end_pos= (char*) anc_buff+2;
+ key_block->end_pos= anc_buff+2;
lastkey=0; /* No previous key in block */
}
else
=== modified file 'storage/myisam/mi_page.c'
--- a/storage/myisam/mi_page.c 2008-03-29 08:02:54 +0000
+++ b/storage/myisam/mi_page.c 2008-08-28 12:43:44 +0000
@@ -49,7 +49,7 @@ uchar *_mi_fetch_keypage(register MI_INF
{
DBUG_PRINT("error",("page %lu had wrong page length: %u",
(ulong) page, page_size));
- DBUG_DUMP("page", (char*) tmp, keyinfo->block_length);
+ DBUG_DUMP("page",tmp, keyinfo->block_length);
info->last_keypage = HA_OFFSET_ERROR;
mi_print_error(info->s, HA_ERR_CRASHED);
my_errno = HA_ERR_CRASHED;
=== modified file 'storage/myisam/mi_search.c'
--- a/storage/myisam/mi_search.c 2008-08-24 13:29:34 +0000
+++ b/storage/myisam/mi_search.c 2008-08-28 18:52:23 +0000
@@ -816,7 +816,7 @@ uint _mi_get_pack_key(register MI_KEYDEF
DBUG_PRINT("error",
("Found too long null packed key: %u of %u at 0x%lx",
length, keyseg->length, (long) *page_pos));
- DBUG_DUMP("key",(char*) *page_pos,16);
+ DBUG_DUMP("key",*page_pos,16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0;
@@ -873,7 +873,7 @@ uint _mi_get_pack_key(register MI_KEYDEF
{
DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx",
length, keyseg->length, (long) *page_pos));
- DBUG_DUMP("key",(char*) *page_pos,16);
+ DBUG_DUMP("key",*page_pos,16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
return 0; /* Error */
@@ -945,7 +945,7 @@ uint _mi_get_binary_pack_key(register MI
DBUG_PRINT("error",
("Found too long binary packed key: %u of %u at 0x%lx",
length, keyinfo->maxlength, (long) *page_pos));
- DBUG_DUMP("key",(char*) *page_pos,16);
+ DBUG_DUMP("key",*page_pos,16);
mi_print_error(keyinfo->share, HA_ERR_CRASHED);
my_errno=HA_ERR_CRASHED;
DBUG_RETURN(0); /* Wrong key */
=== modified file 'storage/myisammrg/myrg_create.c'
--- a/storage/myisammrg/myrg_create.c 2007-08-13 13:11:25 +0000
+++ b/storage/myisammrg/myrg_create.c 2008-08-28 12:43:44 +0000
@@ -46,7 +46,7 @@ int myrg_create(const char *name, const
fn_same(buff,name,4);
*(end=strend(buff))='\n';
end[1]=0;
- if (my_write(file,(char*) buff,(uint) (end-buff+1),
+ if (my_write(file,(uchar*) buff,(uint) (end-buff+1),
MYF(MY_WME | MY_NABP)))
goto err;
}
=== modified file 'unittest/mysys/CMakeLists.txt'
--- a/unittest/mysys/CMakeLists.txt 2008-01-10 12:21:53 +0000
+++ b/unittest/mysys/CMakeLists.txt 2008-09-01 17:27:49 +0000
@@ -26,3 +26,9 @@ TARGET_LINK_LIBRARIES(base64-t mytap mys
ADD_EXECUTABLE(my_atomic-t my_atomic-t.c)
TARGET_LINK_LIBRARIES(my_atomic-t mytap mysys dbug strings wsock32)
+
+ADD_EXECUTABLE(lf-t lf-t.c)
+TARGET_LINK_LIBRARIES(lf-t mytap mysys dbug strings wsock32)
+
+ADD_EXECUTABLE(waiting_threads-t waiting_threads-t.c)
+TARGET_LINK_LIBRARIES(waiting_threads-t mytap mysys dbug strings wsock32)
=== modified file 'unittest/mysys/Makefile.am'
--- a/unittest/mysys/Makefile.am 2008-01-10 12:21:53 +0000
+++ b/unittest/mysys/Makefile.am 2008-08-29 18:51:02 +0000
@@ -16,7 +16,9 @@
INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
-I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
-noinst_PROGRAMS = bitmap-t base64-t my_atomic-t
+noinst_HEADERS = thr_template.c
+
+noinst_PROGRAMS = bitmap-t base64-t my_atomic-t lf-t waiting_threads-t
LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/mysys/libmysys.a \
=== added file 'unittest/mysys/lf-t.c'
--- a/unittest/mysys/lf-t.c 1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/lf-t.c 2008-07-29 14:10:24 +0000
@@ -0,0 +1,168 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "thr_template.c"
+
+#include <lf.h>
+
+int32 inserts= 0, N;
+LF_ALLOCATOR lf_allocator;
+LF_HASH lf_hash;
+
+/*
+ pin allocator - alloc and release an element in a loop
+*/
+pthread_handler_t test_lf_pinbox(void *arg)
+{
+ int m= *(int *)arg;
+ int32 x= 0;
+ LF_PINS *pins;
+
+ my_thread_init();
+
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ lf_pinbox_put_pins(pins);
+ pins= lf_pinbox_get_pins(&lf_allocator.pinbox);
+ }
+ lf_pinbox_put_pins(pins);
+ pthread_mutex_lock(&mutex);
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ my_thread_end();
+ return 0;
+}
+
+typedef union {
+ int32 data;
+ void *not_used;
+} TLA;
+
+pthread_handler_t test_lf_alloc(void *arg)
+{
+ int m= (*(int *)arg)/2;
+ int32 x,y= 0;
+ LF_PINS *pins;
+
+ my_thread_init();
+
+ pins= lf_alloc_get_pins(&lf_allocator);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ TLA *node1, *node2;
+ x= (x*m+0x87654321) & INT_MAX32;
+ node1= (TLA *)lf_alloc_new(pins);
+ node1->data= x;
+ y+= node1->data;
+ node1->data= 0;
+ node2= (TLA *)lf_alloc_new(pins);
+ node2->data= x;
+ y-= node2->data;
+ node2->data= 0;
+ lf_alloc_free(pins, node1);
+ lf_alloc_free(pins, node2);
+ }
+ lf_alloc_put_pins(pins);
+ pthread_mutex_lock(&mutex);
+ bad+= y;
+
+ if (--N == 0)
+ {
+ diag("%d mallocs, %d pins in stack",
+ lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array);
+#ifdef MY_LF_EXTRA_DEBUG
+ bad|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator);
+#endif
+ }
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ my_thread_end();
+ return 0;
+}
+
+#define N_TLH 1000
+pthread_handler_t test_lf_hash(void *arg)
+{
+ int m= (*(int *)arg)/(2*N_TLH);
+ int32 x,y,z,sum= 0, ins= 0;
+ LF_PINS *pins;
+
+ my_thread_init();
+
+ pins= lf_hash_get_pins(&lf_hash);
+
+ for (x= ((int)(intptr)(&m)); m ; m--)
+ {
+ int i;
+ y= x;
+ for (i= 0; i < N_TLH; i++)
+ {
+ x= (x*(m+i)+0x87654321) & INT_MAX32;
+ z= (x<0) ? -x : x;
+ if (lf_hash_insert(&lf_hash, pins, &z))
+ {
+ sum+= z;
+ ins++;
+ }
+ }
+ for (i= 0; i < N_TLH; i++)
+ {
+ y= (y*(m+i)+0x87654321) & INT_MAX32;
+ z= (y<0) ? -y : y;
+ if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
+ sum-= z;
+ }
+ }
+ lf_hash_put_pins(pins);
+ pthread_mutex_lock(&mutex);
+ bad+= sum;
+ inserts+= ins;
+
+ if (--N == 0)
+ {
+ diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
+ lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array,
+ lf_hash.size, inserts);
+ bad|= lf_hash.count;
+ }
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ my_thread_end();
+ return 0;
+}
+
+
+void do_tests()
+{
+ plan(4);
+
+ lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
+ lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
+ &my_charset_bin);
+
+ bad= my_atomic_initialize();
+ ok(!bad, "my_atomic_initialize() returned %d", bad);
+
+ test_concurrently("lf_pinbox", test_lf_pinbox, N= THREADS, CYCLES);
+ test_concurrently("lf_alloc", test_lf_alloc, N= THREADS, CYCLES);
+ test_concurrently("lf_hash", test_lf_hash, N= THREADS, CYCLES/10);
+
+ lf_hash_destroy(&lf_hash);
+ lf_alloc_destroy(&lf_allocator);
+}
+
=== modified file 'unittest/mysys/my_atomic-t.c'
--- a/unittest/mysys/my_atomic-t.c 2008-02-13 17:25:56 +0000
+++ b/unittest/mysys/my_atomic-t.c 2008-07-29 14:10:24 +0000
@@ -13,11 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#include <my_global.h>
-#include <my_sys.h>
-#include <my_atomic.h>
-#include <tap.h>
-#include <lf.h>
+#include "thr_template.c"
/* at least gcc 3.4.5 and 3.4.6 (but not 3.2.3) on RHEL */
#if __GNUC__ == 3 && __GNUC_MINOR__ == 4
@@ -26,20 +22,12 @@
#define GCC_BUG_WORKAROUND
#endif
-volatile uint32 a32,b32;
-volatile int32 c32, N;
+volatile uint32 b32;
+volatile int32 c32;
my_atomic_rwlock_t rwl;
-LF_ALLOCATOR lf_allocator;
-LF_HASH lf_hash;
-pthread_attr_t thr_attr;
-pthread_mutex_t mutex;
-pthread_cond_t cond;
-uint running_threads;
-size_t stacksize= 0;
-#define STACK_SIZE (((int)stacksize-2048)*STACK_DIRECTION)
/* add and sub a random number in a loop. Must get 0 at the end */
-pthread_handler_t test_atomic_add_handler(void *arg)
+pthread_handler_t test_atomic_add(void *arg)
{
int m= (*(int *)arg)/2;
GCC_BUG_WORKAROUND int32 x;
@@ -47,11 +35,11 @@ pthread_handler_t test_atomic_add_handle
{
x= (x*m+0x87654321) & INT_MAX32;
my_atomic_rwlock_wrlock(&rwl);
- my_atomic_add32(&a32, x);
+ my_atomic_add32(&bad, x);
my_atomic_rwlock_wrunlock(&rwl);
my_atomic_rwlock_wrlock(&rwl);
- my_atomic_add32(&a32, -x);
+ my_atomic_add32(&bad, -x);
my_atomic_rwlock_wrunlock(&rwl);
}
pthread_mutex_lock(&mutex);
@@ -62,13 +50,13 @@ pthread_handler_t test_atomic_add_handle
/*
1. generate thread number 0..N-1 from b32
- 2. add it to a32
+ 2. add it to bad
3. swap thread numbers in c32
4. (optionally) one more swap to avoid 0 as a result
- 5. subtract result from a32
- must get 0 in a32 at the end
+ 5. subtract result from bad
+ must get 0 in bad at the end
*/
-pthread_handler_t test_atomic_fas_handler(void *arg)
+pthread_handler_t test_atomic_fas(void *arg)
{
int m= *(int *)arg;
int32 x;
@@ -78,7 +66,7 @@ pthread_handler_t test_atomic_fas_handle
my_atomic_rwlock_wrunlock(&rwl);
my_atomic_rwlock_wrlock(&rwl);
- my_atomic_add32(&a32, x);
+ my_atomic_add32(&bad, x);
my_atomic_rwlock_wrunlock(&rwl);
for (; m ; m--)
@@ -96,7 +84,7 @@ pthread_handler_t test_atomic_fas_handle
}
my_atomic_rwlock_wrlock(&rwl);
- my_atomic_add32(&a32, -x);
+ my_atomic_add32(&bad, -x);
my_atomic_rwlock_wrunlock(&rwl);
pthread_mutex_lock(&mutex);
@@ -106,28 +94,28 @@ pthread_handler_t test_atomic_fas_handle
}
/*
- same as test_atomic_add_handler, but my_atomic_add32 is emulated with
+ same as test_atomic_add, but my_atomic_add32 is emulated with
my_atomic_cas32 - notice that the slowdown is proportional to the
number of CPUs
*/
-pthread_handler_t test_atomic_cas_handler(void *arg)
+pthread_handler_t test_atomic_cas(void *arg)
{
int m= (*(int *)arg)/2, ok= 0;
GCC_BUG_WORKAROUND int32 x, y;
for (x= ((int)(intptr)(&m)); m ; m--)
{
my_atomic_rwlock_wrlock(&rwl);
- y= my_atomic_load32(&a32);
+ y= my_atomic_load32(&bad);
my_atomic_rwlock_wrunlock(&rwl);
x= (x*m+0x87654321) & INT_MAX32;
do {
my_atomic_rwlock_wrlock(&rwl);
- ok= my_atomic_cas32(&a32, &y, (uint32)y+x);
+ ok= my_atomic_cas32(&bad, &y, (uint32)y+x);
my_atomic_rwlock_wrunlock(&rwl);
} while (!ok) ;
do {
my_atomic_rwlock_wrlock(&rwl);
- ok= my_atomic_cas32(&a32, &y, y-x);
+ ok= my_atomic_cas32(&bad, &y, y-x);
my_atomic_rwlock_wrunlock(&rwl);
} while (!ok) ;
}
@@ -137,212 +125,22 @@ pthread_handler_t test_atomic_cas_handle
return 0;
}
-
-/*
- pin allocator - alloc and release an element in a loop
-*/
-pthread_handler_t test_lf_pinbox(void *arg)
-{
- int m= *(int *)arg;
- int32 x= 0;
- LF_PINS *pins;
-
- pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE);
-
- for (x= ((int)(intptr)(&m)); m ; m--)
- {
- lf_pinbox_put_pins(pins);
- pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE);
- }
- lf_pinbox_put_pins(pins);
- pthread_mutex_lock(&mutex);
- if (!--running_threads) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
- return 0;
-}
-
-typedef union {
- int32 data;
- void *not_used;
-} TLA;
-
-pthread_handler_t test_lf_alloc(void *arg)
-{
- int m= (*(int *)arg)/2;
- int32 x,y= 0;
- LF_PINS *pins;
-
- pins= lf_alloc_get_pins(&lf_allocator, &m + STACK_SIZE);
-
- for (x= ((int)(intptr)(&m)); m ; m--)
- {
- TLA *node1, *node2;
- x= (x*m+0x87654321) & INT_MAX32;
- node1= (TLA *)lf_alloc_new(pins);
- node1->data= x;
- y+= node1->data;
- node1->data= 0;
- node2= (TLA *)lf_alloc_new(pins);
- node2->data= x;
- y-= node2->data;
- node2->data= 0;
- lf_alloc_free(pins, node1);
- lf_alloc_free(pins, node2);
- }
- lf_alloc_put_pins(pins);
- my_atomic_rwlock_wrlock(&rwl);
- my_atomic_add32(&a32, y);
-
- if (my_atomic_add32(&N, -1) == 1)
- {
- diag("%d mallocs, %d pins in stack",
- lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array);
-#ifdef MY_LF_EXTRA_DEBUG
- a32|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator);
-#endif
- }
- my_atomic_rwlock_wrunlock(&rwl);
- pthread_mutex_lock(&mutex);
- if (!--running_threads) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
- return 0;
-}
-
-#define N_TLH 1000
-pthread_handler_t test_lf_hash(void *arg)
-{
- int m= (*(int *)arg)/(2*N_TLH);
- int32 x,y,z,sum= 0, ins= 0;
- LF_PINS *pins;
-
- pins= lf_hash_get_pins(&lf_hash, &m + STACK_SIZE);
-
- for (x= ((int)(intptr)(&m)); m ; m--)
- {
- int i;
- y= x;
- for (i= 0; i < N_TLH; i++)
- {
- x= (x*(m+i)+0x87654321) & INT_MAX32;
- z= (x<0) ? -x : x;
- if (lf_hash_insert(&lf_hash, pins, &z))
- {
- sum+= z;
- ins++;
- }
- }
- for (i= 0; i < N_TLH; i++)
- {
- y= (y*(m+i)+0x87654321) & INT_MAX32;
- z= (y<0) ? -y : y;
- if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z)))
- sum-= z;
- }
- }
- lf_hash_put_pins(pins);
- my_atomic_rwlock_wrlock(&rwl);
- my_atomic_add32(&a32, sum);
- my_atomic_add32(&b32, ins);
-
- if (my_atomic_add32(&N, -1) == 1)
- {
- diag("%d mallocs, %d pins in stack, %d hash size, %d inserts",
- lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array,
- lf_hash.size, b32);
- a32|= lf_hash.count;
- }
- my_atomic_rwlock_wrunlock(&rwl);
- pthread_mutex_lock(&mutex);
- if (!--running_threads) pthread_cond_signal(&cond);
- pthread_mutex_unlock(&mutex);
- return 0;
-}
-
-
-void test_atomic(const char *test, pthread_handler handler, int n, int m)
-{
- pthread_t t;
- ulonglong now= my_getsystime();
-
- a32= 0;
- b32= 0;
- c32= 0;
-
- diag("Testing %s with %d threads, %d iterations... ", test, n, m);
- for (running_threads= n ; n ; n--)
- {
- if (pthread_create(&t, &thr_attr, handler, &m) != 0)
- {
- diag("Could not create thread");
- abort();
- }
- }
- pthread_mutex_lock(&mutex);
- while (running_threads)
- pthread_cond_wait(&cond, &mutex);
- pthread_mutex_unlock(&mutex);
-
- now= my_getsystime()-now;
- ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32);
-}
-
-int main()
+void do_tests()
{
- int err;
- MY_INIT("my_atomic-t.c");
-
- diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE);
- err= my_atomic_initialize();
+ plan(4);
- plan(7);
- ok(err == 0, "my_atomic_initialize() returned %d", err);
+ bad= my_atomic_initialize();
+ ok(!bad, "my_atomic_initialize() returned %d", bad);
- pthread_mutex_init(&mutex, 0);
- pthread_cond_init(&cond, 0);
my_atomic_rwlock_init(&rwl);
- lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used));
- lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0,
- &my_charset_bin);
- pthread_attr_init(&thr_attr);
- pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
-#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE
- pthread_attr_getstacksize(&thr_attr, &stacksize);
- if (stacksize == 0)
-#endif
- stacksize = PTHREAD_STACK_MIN;
-
-#ifdef MY_ATOMIC_MODE_RWLOCKS
-#if defined(HPUX11) || defined(__POWERPC__) /* showed to be very slow (scheduler-related) */
-#define CYCLES 300
-#else
-#define CYCLES 3000
-#endif
-#else
-#define CYCLES 300000
-#endif
-#define THREADS 100
+ b32= c32= 0;
+ test_concurrently("my_atomic_add32", test_atomic_add, THREADS, CYCLES);
+ b32= c32= 0;
+ test_concurrently("my_atomic_fas32", test_atomic_fas, THREADS, CYCLES);
+ b32= c32= 0;
+ test_concurrently("my_atomic_cas32", test_atomic_cas, THREADS, CYCLES);
- test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES);
- test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES);
- test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES);
- test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES);
- test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES);
- test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES/10);
-
- lf_hash_destroy(&lf_hash);
- lf_alloc_destroy(&lf_allocator);
-
- /*
- workaround until we know why it crashes randomly on some machine
- (BUG#22320).
- */
- sleep(2);
- pthread_mutex_destroy(&mutex);
- pthread_cond_destroy(&cond);
- pthread_attr_destroy(&thr_attr);
my_atomic_rwlock_destroy(&rwl);
- my_end(0);
- return exit_status();
}
=== added file 'unittest/mysys/thr_template.c'
--- a/unittest/mysys/thr_template.c 1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/thr_template.c 2008-08-29 19:50:04 +0000
@@ -0,0 +1,92 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include <my_global.h>
+#include <my_sys.h>
+#include <my_atomic.h>
+#include <tap.h>
+
+volatile uint32 bad;
+pthread_attr_t thr_attr;
+pthread_mutex_t mutex;
+pthread_cond_t cond;
+uint running_threads;
+
+void do_tests();
+
+void test_concurrently(const char *test, pthread_handler handler, int n, int m)
+{
+ pthread_t t;
+ ulonglong now= my_getsystime();
+
+ bad= 0;
+
+ diag("Testing %s with %d threads, %d iterations... ", test, n, m);
+ for (running_threads= n ; n ; n--)
+ {
+ if (pthread_create(&t, &thr_attr, handler, &m) != 0)
+ {
+ diag("Could not create thread");
+ abort();
+ }
+ }
+ pthread_mutex_lock(&mutex);
+ while (running_threads)
+ pthread_cond_wait(&cond, &mutex);
+ pthread_mutex_unlock(&mutex);
+
+ now= my_getsystime()-now;
+ ok(!bad, "tested %s in %g secs (%d)", test, ((double)now)/1e7, bad);
+}
+
+int main(int argc __attribute__((unused)), char **argv)
+{
+ MY_INIT("thd_template");
+
+ if (argv[1] && *argv[1])
+ DBUG_SET_INITIAL(argv[1]);
+
+ pthread_mutex_init(&mutex, 0);
+ pthread_cond_init(&cond, 0);
+ pthread_attr_init(&thr_attr);
+ pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
+
+#ifdef MY_ATOMIC_MODE_RWLOCKS
+#if defined(HPUX11) || defined(__POWERPC__) /* showed to be very slow (scheduler-related) */
+#define CYCLES 300
+#else
+#define CYCLES 3000
+#endif
+#else
+#define CYCLES 3000
+#endif
+#define THREADS 30
+
+ diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE);
+
+ do_tests();
+
+ /*
+ workaround until we know why it crashes randomly on some machine
+ (BUG#22320).
+ */
+ sleep(2);
+ pthread_mutex_destroy(&mutex);
+ pthread_cond_destroy(&cond);
+ pthread_attr_destroy(&thr_attr);
+ my_end(0);
+ return exit_status();
+}
+
=== added file 'unittest/mysys/waiting_threads-t.c'
--- a/unittest/mysys/waiting_threads-t.c 1970-01-01 00:00:00 +0000
+++ b/unittest/mysys/waiting_threads-t.c 2008-09-01 19:43:11 +0000
@@ -0,0 +1,269 @@
+/* Copyright (C) 2006 MySQL AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#include "thr_template.c"
+#include <waiting_threads.h>
+#include <m_string.h>
+#include <locale.h>
+
+struct test_wt_thd {
+ WT_THD thd;
+ pthread_mutex_t lock;
+} thds[THREADS];
+
+uint i, cnt;
+pthread_mutex_t lock;
+
+ulong wt_timeout_short=100, wt_deadlock_search_depth_short=4;
+ulong wt_timeout_long=10000, wt_deadlock_search_depth_long=15;
+
+#define reset(ARRAY) bzero(ARRAY, sizeof(ARRAY))
+
+enum { LATEST, RANDOM, YOUNGEST, LOCKS } kill_strategy;
+
+WT_RESOURCE_TYPE restype={ wt_resource_id_memcmp, 0};
+
+#define rnd() ((uint)(my_rnd(&rand) * INT_MAX32))
+
+/*
+ stress test: wait on a random number of random threads.
+ it always succeeds (unless crashes or hangs).
+*/
+pthread_handler_t test_wt(void *arg)
+{
+ int m, n, i, id, res;
+ struct my_rnd_struct rand;
+
+ my_thread_init();
+
+ pthread_mutex_lock(&lock);
+ id= cnt++;
+ pthread_mutex_unlock(&lock);
+
+ my_rnd_init(&rand, (ulong)(intptr)&m, id);
+ if (kill_strategy == YOUNGEST)
+ thds[id].thd.weight= ~my_getsystime();
+ if (kill_strategy == LOCKS)
+ thds[id].thd.weight= 0;
+
+ for (m= *(int *)arg; m ; m--)
+ {
+ WT_RESOURCE_ID resid;
+ int blockers[THREADS/10], j, k;
+
+ resid.value= id;
+ resid.type= &restype;
+
+ res= 0;
+
+ for (j= n= (rnd() % THREADS)/10; !res && j >= 0; j--)
+ {
+retry:
+ i= rnd() % (THREADS-1);
+ if (i >= id) i++;
+
+ for (k=n; k >=j; k--)
+ if (blockers[k] == i)
+ goto retry;
+ blockers[j]= i;
+
+ if (kill_strategy == RANDOM)
+ thds[id].thd.weight= rnd();
+
+ pthread_mutex_lock(& thds[i].lock);
+ res= wt_thd_will_wait_for(& thds[id].thd, & thds[i].thd, &resid);
+ pthread_mutex_unlock(& thds[i].lock);
+ }
+
+ if (!res)
+ {
+ pthread_mutex_lock(&lock);
+ res= wt_thd_cond_timedwait(& thds[id].thd, &lock);
+ pthread_mutex_unlock(&lock);
+ }
+
+ if (res)
+ {
+ pthread_mutex_lock(& thds[id].lock);
+ pthread_mutex_lock(&lock);
+ wt_thd_release_all(& thds[id].thd);
+ pthread_mutex_unlock(&lock);
+ pthread_mutex_unlock(& thds[id].lock);
+ if (kill_strategy == LOCKS)
+ thds[id].thd.weight= 0;
+ if (kill_strategy == YOUNGEST)
+ thds[id].thd.weight= ~my_getsystime();
+ }
+ else if (kill_strategy == LOCKS)
+ thds[id].thd.weight++;
+ }
+
+ pthread_mutex_lock(& thds[id].lock);
+ pthread_mutex_lock(&lock);
+ wt_thd_release_all(& thds[id].thd);
+ pthread_mutex_unlock(&lock);
+ pthread_mutex_unlock(& thds[id].lock);
+
+#ifndef DBUG_OFF
+ {
+#define DEL "(deleted)"
+ char *x=malloc(strlen(thds[id].thd.name)+sizeof(DEL)+1);
+ strxmov(x, thds[id].thd.name, DEL, 0);
+ thds[id].thd.name=x; /* it's a memory leak, go on, shoot me */
+ }
+#endif
+
+ pthread_mutex_lock(&mutex);
+ if (!--running_threads) pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ DBUG_PRINT("wt", ("exiting"));
+ my_thread_end();
+ return 0;
+}
+
+void do_one_test()
+{
+ double sum, sum0;
+
+ reset(wt_cycle_stats);
+ reset(wt_wait_stats);
+ wt_success_stats=0;
+ cnt=0;
+ test_concurrently("waiting_threads", test_wt, THREADS, CYCLES);
+
+ sum=sum0=0;
+ for (cnt=0; cnt < WT_CYCLE_STATS; cnt++)
+ sum+= wt_cycle_stats[0][cnt] + wt_cycle_stats[1][cnt];
+ for (cnt=0; cnt < WT_CYCLE_STATS; cnt++)
+ if (wt_cycle_stats[0][cnt] + wt_cycle_stats[1][cnt] > 0)
+ {
+ sum0+=wt_cycle_stats[0][cnt] + wt_cycle_stats[1][cnt];
+ diag("deadlock cycles of length %2u: %4u %4u %8.2f %%", cnt,
+ wt_cycle_stats[0][cnt], wt_cycle_stats[1][cnt], 1e2*sum0/sum);
+ }
+ diag("depth exceeded: %u %u",
+ wt_cycle_stats[0][cnt], wt_cycle_stats[1][cnt]);
+ for (cnt=0; cnt < WT_WAIT_STATS; cnt++)
+ if (wt_wait_stats[cnt]>0)
+ diag("deadlock waits up to %7llu us: %5u",
+ wt_wait_table[cnt], wt_wait_stats[cnt]);
+ diag("timed out: %u", wt_wait_stats[cnt]);
+ diag("successes: %u", wt_success_stats);
+}
+
+void do_tests()
+{
+ plan(12);
+ compile_time_assert(THREADS >= 3);
+
+ DBUG_PRINT("wt", ("================= initialization ==================="));
+
+ bad= my_atomic_initialize();
+ ok(!bad, "my_atomic_initialize() returned %d", bad);
+
+ pthread_mutex_init(&lock, 0);
+ wt_init();
+ for (cnt=0; cnt < THREADS; cnt++)
+ {
+ wt_thd_lazy_init(& thds[cnt].thd,
+ & wt_deadlock_search_depth_short, & wt_timeout_short,
+ & wt_deadlock_search_depth_long, & wt_timeout_long);
+ pthread_mutex_init(& thds[cnt].lock, 0);
+ }
+ {
+ WT_RESOURCE_ID resid[3];
+ for (i=0; i < 3; i++)
+ {
+ resid[i].value= i+1;
+ resid[i].type= &restype;
+ }
+
+ DBUG_PRINT("wt", ("================= manual test ==================="));
+
+#define ok_wait(X,Y, R) \
+ ok(wt_thd_will_wait_for(& thds[X].thd, & thds[Y].thd, &resid[R]) == 0, \
+ "thd[" #X "] will wait for thd[" #Y "]")
+#define ok_deadlock(X,Y,R) \
+ ok(wt_thd_will_wait_for(& thds[X].thd, & thds[Y].thd, &resid[R]) == WT_DEADLOCK, \
+ "thd[" #X "] will wait for thd[" #Y "] - deadlock")
+
+ ok_wait(0,1,0);
+ ok_wait(0,2,0);
+ ok_wait(0,3,0);
+
+ pthread_mutex_lock(&lock);
+ bad= wt_thd_cond_timedwait(& thds[0].thd, &lock);
+ pthread_mutex_unlock(&lock);
+ ok(bad == ETIMEDOUT, "timeout test returned %d", bad);
+
+ ok_wait(0,1,0);
+ ok_wait(1,2,1);
+ ok_deadlock(2,0,2);
+
+ pthread_mutex_lock(&lock);
+ wt_thd_cond_timedwait(& thds[0].thd, &lock);
+ wt_thd_cond_timedwait(& thds[1].thd, &lock);
+ wt_thd_release_all(& thds[0].thd);
+ wt_thd_release_all(& thds[1].thd);
+ wt_thd_release_all(& thds[2].thd);
+ wt_thd_release_all(& thds[3].thd);
+ pthread_mutex_unlock(&lock);
+
+ for (cnt=0; cnt < 3; cnt++)
+ {
+ wt_thd_destroy(& thds[cnt].thd);
+ wt_thd_lazy_init(& thds[cnt].thd,
+ & wt_deadlock_search_depth_short, & wt_timeout_short,
+ & wt_deadlock_search_depth_long, & wt_timeout_long);
+ }
+ }
+
+ wt_deadlock_search_depth_short=6;
+ wt_timeout_short=1000;
+ wt_timeout_long= 100;
+ wt_deadlock_search_depth_long=16;
+ DBUG_PRINT("wt", ("================= stress test ==================="));
+
+ diag("timeout_short=%lu us, deadlock_search_depth_short=%lu",
+ wt_timeout_short, wt_deadlock_search_depth_short);
+ diag("timeout_long=%lu us, deadlock_search_depth_long=%lu",
+ wt_timeout_long, wt_deadlock_search_depth_long);
+
+#define test_kill_strategy(X) \
+ diag("kill strategy: " #X); \
+ kill_strategy=X; \
+ do_one_test();
+
+ test_kill_strategy(LATEST);
+ SKIP_BIG_TESTS(1)
+ {
+ test_kill_strategy(RANDOM);
+ }
+ test_kill_strategy(YOUNGEST);
+ test_kill_strategy(LOCKS);
+
+ DBUG_PRINT("wt", ("================= cleanup ==================="));
+ pthread_mutex_lock(&lock);
+ for (cnt=0; cnt < THREADS; cnt++)
+ {
+ wt_thd_release_all(& thds[cnt].thd);
+ wt_thd_destroy(& thds[cnt].thd);
+ pthread_mutex_destroy(& thds[cnt].lock);
+ }
+ pthread_mutex_unlock(&lock);
+ wt_end();
+ pthread_mutex_destroy(&lock);
+}
+
| Thread |
|---|
| • bzr commit into MySQL/Maria:mysql-maria branch (monty:2668) | Michael Widenius | 5 Sep |