List:Commits« Previous MessageNext Message »
From:Vladislav Vaintroub Date:October 4 2010 12:27am
Subject:bzr commit into mysql-5.5-bugteam branch (vvaintroub:3220) Bug#56405
Bug#56585
View as plain text  
#At file:///H:/bzr-new/mysql-5.5-bugteam/ based on revid:alexander.nozdrin@stripped

 3220 Vladislav Vaintroub	2010-10-04
      A follow-up to the patch for bug #56405 "Deadlock in the MDL deadlock
      detector". This patch addresses performance regression in OLTP_RO/MyISAM
      test on Windows introduced by the fix for bug #56405. Thus it makes
      original patch acceptable as a solution for bug #56585 "Slowdown of
      readonly sysbench benchmarks (e.g point_select) on Windows 5.5".
      
      With this patch, MySQL will use native Windows condition variables and 
      reader-writer locks  if  they are supported by the OS.
      
      This speeds up MyISAM and the effect comes mostly from using native
      rwlocks. Native conditions improve scalability with higher number of 
      concurrent users in other situations, e.g for prlocks.
      
      Benchmark numbers for this patch as measured on Win2008R2 quad
      core machine are attached to the bug report.
      ( direct link http://bugs.mysql.com/file.php?id=15883 )
      
      Note that currently we require at least Windows7/WS2008R2 for 
      reader-writer locks, even though native rwlock is available also on Vista.
      Reason is that "trylock" APIs are missing on Vista, and trylock is used in
      the server (in a single place in query cache).
      
      While this patch could have been written differently, to enable the native
      rwlock optimization also on Vista/WS2008 (e.g using native locks everywhere
      but portable implementation in query cache), this would come at the 
      expense of the code clarity, as it would introduce a new  "try-able" rwlock
      type, to handle Vista case.
      
      Another way to improve performance for the special case 
      (OLTP_RO/MYISAM/Vista) would be to eliminate "trylock" usage from server,
       but this is outside of the scope here.
      
      
      Native conditions variables are used beginning with Vista though the effect
      of using condition variables alone is not measurable in this benchmark.
      But when used together with native rwlocks on Win7, native conditions improve 
      performance in high-concurrency OLTP_RO/MyISAM (128 and more sysbench 
      users).

    modified:
      include/my_pthread.h
      mysys/my_wincond.c
      mysys/my_winthread.c
      mysys/thr_rwlock.c
=== modified file 'include/my_pthread.h'
--- a/include/my_pthread.h	2010-09-29 12:09:07 +0000
+++ b/include/my_pthread.h	2010-10-04 00:27:24 +0000
@@ -48,19 +48,30 @@ typedef struct st_pthread_link {
   struct st_pthread_link *next;
 } pthread_link;
 
-typedef struct {
-  uint32 waiting;
-  CRITICAL_SECTION lock_waiting;
- 
-  enum {
-    SIGNAL= 0,
-    BROADCAST= 1,
-    MAX_EVENTS= 2
-  } EVENTS;
-
-  HANDLE events[MAX_EVENTS];
-  HANDLE broadcast_block_event;
+  /**
+    Implementation of Windows condition variables.
+    We use native conditions on Vista and later, and fallback to own 
+    implementation on earlier OS version.
+  */
+typedef union
+{
+  /* Native condition (used on Vista and later) */
+  CONDITION_VARIABLE native_cond;
 
+  /* Own implementation (used on XP) */
+  struct
+  { 
+    uint32 waiting;
+    CRITICAL_SECTION lock_waiting;
+    enum 
+    {
+      SIGNAL= 0,
+      BROADCAST= 1,
+      MAX_EVENTS= 2
+    } EVENTS;
+    HANDLE events[MAX_EVENTS];
+    HANDLE broadcast_block_event;
+  };
 } pthread_cond_t;
 
 
@@ -679,6 +690,47 @@ extern int rw_pr_destroy(rw_pr_lock_t *)
 
 
 #ifdef NEED_MY_RW_LOCK
+
+#ifdef _WIN32
+
+/**
+  Implementation of Windows rwlock.
+
+  We use native (slim) rwlocks on Win7 and later, and fallback to  portable
+  implementation on earlier Windows.
+
+  slim rwlock are also available on Vista/WS2008, but we do not use it
+  ("trylock" APIs are missing on Vista)
+*/
+typedef union
+{
+  /* Native rwlock (is_srwlock == TRUE) */
+  struct 
+  {
+    SRWLOCK srwlock;             /* native reader writer lock */
+    BOOL have_exclusive_srwlock; /* used for unlock */
+  };
+
+  /*
+    Portable implementation (is_srwlock == FALSE)
+    Fields are identical with Unix my_rw_lock_t fields.
+  */
+  struct 
+  {
+    pthread_mutex_t lock;       /* lock for structure		*/
+    pthread_cond_t  readers;    /* waiting readers		*/
+    pthread_cond_t  writers;    /* waiting writers		*/
+    int state;                  /* -1:writer,0:free,>0:readers	*/
+    int waiters;                /* number of waiting writers	*/
+#ifdef SAFE_MUTEX
+    pthread_t  write_thread;
+#endif
+  };
+} my_rw_lock_t;
+
+
+#else /* _WIN32 */
+
 /*
   On systems which don't support native read/write locks we have
   to use own implementation.
@@ -694,6 +746,8 @@ typedef struct st_my_rw_lock_t {
 #endif
 } my_rw_lock_t;
 
+#endif /*! _WIN32 */
+
 extern int my_rw_init(my_rw_lock_t *);
 extern int my_rw_destroy(my_rw_lock_t *);
 extern int my_rw_rdlock(my_rw_lock_t *);

=== modified file 'mysys/my_wincond.c'
--- a/mysys/my_wincond.c	2009-11-23 17:08:37 +0000
+++ b/mysys/my_wincond.c	2010-10-04 00:27:24 +0000
@@ -24,7 +24,108 @@
 #include <process.h>
 #include <sys/timeb.h>
 
-int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
+
+/*
+  Windows native condition variables. We use runtime loading / function 
+  pointers, because they are not available on XP 
+*/
+
+/* Prototypes and function pointers for condition variable functions */
+typedef VOID (WINAPI * InitializeConditionVariableProc) 
+  (PCONDITION_VARIABLE ConditionVariable);
+
+typedef BOOL (WINAPI * SleepConditionVariableCSProc)
+  (PCONDITION_VARIABLE ConditionVariable,
+  PCRITICAL_SECTION CriticalSection, 
+  DWORD dwMilliseconds);
+
+typedef VOID (WINAPI * WakeAllConditionVariableProc)
+ (PCONDITION_VARIABLE ConditionVariable);
+
+typedef VOID (WINAPI * WakeConditionVariableProc)
+  (PCONDITION_VARIABLE ConditionVariable);
+
+static InitializeConditionVariableProc my_InitializeConditionVariable;
+static SleepConditionVariableCSProc my_SleepConditionVariableCS;
+static WakeAllConditionVariableProc my_WakeAllConditionVariable;
+static WakeConditionVariableProc my_WakeConditionVariable;
+
+
+/**
+ Indicates if we have native condition variables,
+ initialized first time pthread_cond_init is called.
+*/
+
+static BOOL have_native_conditions= FALSE;
+
+
+/**
+  Check if native conditions can be used, load function pointers 
+*/
+
+static void check_native_cond_availability(void)
+{
+  HMODULE module= GetModuleHandle("kernel32");
+
+  my_InitializeConditionVariable= (InitializeConditionVariableProc)
+    GetProcAddress(module, "InitializeConditionVariable");
+  my_SleepConditionVariableCS= (SleepConditionVariableCSProc)
+    GetProcAddress(module, "SleepConditionVariableCS");
+  my_WakeAllConditionVariable= (WakeAllConditionVariableProc)
+    GetProcAddress(module, "WakeAllConditionVariable");
+  my_WakeConditionVariable= (WakeConditionVariableProc)
+    GetProcAddress(module, "WakeConditionVariable");
+
+  if (my_InitializeConditionVariable)
+    have_native_conditions= TRUE;
+}
+
+
+
+/**
+  Convert abstime to milliseconds
+*/
+
+static DWORD get_milliseconds(const struct timespec *abstime)
+{
+  long long millis; 
+  union ft64 now;
+
+  if (abstime == NULL)
+   return INFINITE;
+
+  GetSystemTimeAsFileTime(&now.ft);
+
+  /*
+    Calculate time left to abstime
+    - subtract start time from current time(values are in 100ns units)
+    - convert to millisec by dividing with 10000
+  */
+  millis= (abstime->tv.i64 - now.i64) / 10000;
+  
+  /* Don't allow the timeout to be negative */
+  if (millis < 0)
+    return 0;
+
+  /*
+    Make sure the calculated timeout does not exceed original timeout
+    value which could cause "wait for ever" if system time changes
+  */
+  if (millis > abstime->max_timeout_msec)
+    millis= abstime->max_timeout_msec;
+  
+  if (millis > UINT_MAX)
+    millis= UINT_MAX;
+
+  return (DWORD)millis;
+}
+
+
+/*
+  Old (pre-vista) implementation using events
+*/
+
+static int legacy_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
 {
   cond->waiting= 0;
   InitializeCriticalSection(&cond->lock_waiting);
@@ -53,7 +154,8 @@ int pthread_cond_init(pthread_cond_t *co
   return 0;
 }
 
-int pthread_cond_destroy(pthread_cond_t *cond)
+
+static int legacy_cond_destroy(pthread_cond_t *cond)
 {
   DeleteCriticalSection(&cond->lock_waiting);
 
@@ -65,48 +167,13 @@ int pthread_cond_destroy(pthread_cond_t 
 }
 
 
-int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
-{
-  return pthread_cond_timedwait(cond,mutex,NULL);
-}
-
-
-int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
+static int legacy_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
                            struct timespec *abstime)
 {
   int result;
-  long timeout; 
-  union ft64 now;
-
-  if( abstime != NULL )
-  {
-    GetSystemTimeAsFileTime(&now.ft);
-
-    /*
-      Calculate time left to abstime
-      - subtract start time from current time(values are in 100ns units)
-      - convert to millisec by dividing with 10000
-    */
-    timeout= (long)((abstime->tv.i64 - now.i64) / 10000);
-    
-    /* Don't allow the timeout to be negative */
-    if (timeout < 0)
-      timeout= 0L;
-
-    /*
-      Make sure the calucated timeout does not exceed original timeout
-      value which could cause "wait for ever" if system time changes
-    */
-    if (timeout > abstime->max_timeout_msec)
-      timeout= abstime->max_timeout_msec;
-
-  }
-  else
-  {
-    /* No time specified; don't expire */
-    timeout= INFINITE;
-  }
+  DWORD timeout; 
 
+  timeout= get_milliseconds(abstime);
   /* 
     Block access if previous broadcast hasn't finished.
     This is just for safety and should normally not
@@ -142,7 +209,7 @@ int pthread_cond_timedwait(pthread_cond_
   return result == WAIT_TIMEOUT ? ETIMEDOUT : 0;
 }
 
-int pthread_cond_signal(pthread_cond_t *cond)
+static int legacy_cond_signal(pthread_cond_t *cond)
 {
   EnterCriticalSection(&cond->lock_waiting);
   
@@ -155,7 +222,7 @@ int pthread_cond_signal(pthread_cond_t *
 }
 
 
-int pthread_cond_broadcast(pthread_cond_t *cond)
+static int legacy_cond_broadcast(pthread_cond_t *cond)
 {
   EnterCriticalSection(&cond->lock_waiting);
   /*
@@ -177,6 +244,87 @@ int pthread_cond_broadcast(pthread_cond_
 }
 
 
+/* 
+ Posix API functions. Just choose between native and legacy implementation.
+*/
+
+int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
+{
+  /*
+    Once initialization is used here rather than in my_init(), to 
+    1) avoid  my_init() pitfalls- undefined order in which initialization should
+    run
+    2) be potentially useful C++ (in static constructors that run before main())
+    3) just to simplify the API.
+    Also, the overhead is of my_pthread_once is very small.
+  */
+  static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT;
+  my_pthread_once(&once_control, check_native_cond_availability);
+
+  if (have_native_conditions)
+  {
+    my_InitializeConditionVariable(&cond->native_cond);
+    return 0;
+  }
+  else 
+    return legacy_cond_init(cond, attr);
+}
+
+
+int pthread_cond_destroy(pthread_cond_t *cond)
+{
+  if (have_native_conditions)
+    return 0; /* no destroy function */
+  else
+    return legacy_cond_destroy(cond);
+}
+
+
+int pthread_cond_broadcast(pthread_cond_t *cond)
+{
+  if (have_native_conditions)
+  {
+    my_WakeAllConditionVariable(&cond->native_cond);
+    return 0;
+  }
+  else
+    return legacy_cond_broadcast(cond);
+}
+
+
+int pthread_cond_signal(pthread_cond_t *cond)
+{
+  if (have_native_conditions)
+  {
+    my_WakeConditionVariable(&cond->native_cond);
+    return 0;
+  }
+  else
+    return legacy_cond_signal(cond);
+}
+
+
+int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
+  struct timespec *abstime)
+{
+  if (have_native_conditions)
+  {
+    DWORD timeout= get_milliseconds(abstime);
+    if (!my_SleepConditionVariableCS(&cond->native_cond, mutex, timeout))
+      return ETIMEDOUT;
+    return 0;  
+  }
+  else
+    return legacy_cond_timedwait(cond, mutex, abstime);
+}
+
+
+int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+  return pthread_cond_timedwait(cond, mutex, NULL);
+}
+
+
 int pthread_attr_init(pthread_attr_t *connect_att)
 {
   connect_att->dwStackSize	= 0;

=== modified file 'mysys/my_winthread.c'
--- a/mysys/my_winthread.c	2009-12-19 13:11:48 +0000
+++ b/mysys/my_winthread.c	2010-10-04 00:27:24 +0000
@@ -155,8 +155,19 @@ int pthread_cancel(pthread_t thread)
 int my_pthread_once(my_pthread_once_t *once_control, 
     void (*init_routine)(void))
 {
-  LONG state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS,
-                                          MY_PTHREAD_ONCE_INIT);
+  LONG state;
+
+  /*
+    Do "dirty" read to find out if initialization is already done, to
+    save an interlocked operation in common case. Memory barriers are ensured by 
+    Visual C++ volatile implementation.
+  */
+  if (*once_control == MY_PTHREAD_ONCE_DONE)
+    return 0;
+
+  state= InterlockedCompareExchange(once_control, MY_PTHREAD_ONCE_INPROGRESS,
+                                        MY_PTHREAD_ONCE_INIT);
+
   switch(state)
   {
   case MY_PTHREAD_ONCE_INIT:

=== modified file 'mysys/thr_rwlock.c'
--- a/mysys/thr_rwlock.c	2010-09-29 12:09:07 +0000
+++ b/mysys/thr_rwlock.c	2010-10-04 00:27:24 +0000
@@ -20,6 +20,118 @@
 #if defined(NEED_MY_RW_LOCK)
 #include <errno.h>
 
+#ifdef _WIN32
+
+static BOOL have_srwlock = TRUE;
+/* Prototypes and function pointers for windows  functions */
+typedef VOID (WINAPI* srw_func) (PSRWLOCK SRWLock);
+typedef BOOL (WINAPI* srw_bool_func) (PSRWLOCK SRWLock);
+
+static srw_func my_InitializeSRWLock;
+static srw_func my_AcquireSRWLockExclusive;
+static srw_func my_ReleaseSRWLockExclusive;
+static srw_func my_AcquireSRWLockShared;
+static srw_func my_ReleaseSRWLockShared;
+
+static srw_bool_func my_TryAcquireSRWLockExclusive;
+static srw_bool_func my_TryAcquireSRWLockShared;
+
+/**
+  Check for presence of Windows slim reader writer lock function.
+  Load function pointers.
+*/
+static void check_srwlock_availability(void)
+{
+  HMODULE module= GetModuleHandle("kernel32");
+
+  my_InitializeSRWLock= (srw_func) GetProcAddress(module,
+    "InitializeSRWLock");
+  my_AcquireSRWLockExclusive= (srw_func) GetProcAddress(module,
+    "AcquireSRWLockExclusive");
+  my_AcquireSRWLockShared= (srw_func) GetProcAddress(module,
+    "AcquireSRWLockShared");
+  my_ReleaseSRWLockExclusive= (srw_func) GetProcAddress(module,
+    "ReleaseSRWLockExclusive");
+  my_ReleaseSRWLockShared= (srw_func) GetProcAddress(module,
+    "ReleaseSRWLockShared");
+  my_TryAcquireSRWLockExclusive=  (srw_bool_func) GetProcAddress(module,
+    "TryAcquireSRWLockExclusive");
+  my_TryAcquireSRWLockShared=  (srw_bool_func) GetProcAddress(module,
+    "TryAcquireSRWLockShared");
+
+  /*
+    We currently require TryAcquireSRWLockExclusive. This API is missing on 
+    Vista, this means SRWLock are only used starting with Win7.
+
+    If "trylock" usage for rwlocks is eliminated from server codebase (it is used 
+    in a single place currently, in query cache), then SRWLock can be enabled on 
+    Vista too. In this case  condition below needs to be changed to  e.g check 
+    for my_InitializeSRWLock.
+  */
+
+  if (my_TryAcquireSRWLockExclusive)
+    have_srwlock= TRUE;
+
+}
+
+
+static int srw_init(my_rw_lock_t *rwp)
+{
+  my_InitializeSRWLock(&rwp->srwlock);
+  rwp->have_exclusive_srwlock = FALSE;
+  return 0;
+}
+
+
+static int srw_rdlock(my_rw_lock_t *rwp)
+{
+  my_AcquireSRWLockShared(&rwp->srwlock);
+  return 0;
+}
+
+
+static int srw_tryrdlock(my_rw_lock_t *rwp)
+{
+
+  if (!my_TryAcquireSRWLockShared(&rwp->srwlock))
+    return EBUSY;
+  return 0;
+}
+
+
+static int srw_wrlock(my_rw_lock_t *rwp)
+{
+  my_AcquireSRWLockExclusive(&rwp->srwlock);
+  rwp->have_exclusive_srwlock= TRUE;
+  return 0;
+}
+
+
+static int srw_trywrlock(my_rw_lock_t *rwp)
+{
+  if (!my_TryAcquireSRWLockExclusive(&rwp->srwlock))
+    return EBUSY;
+  rwp->have_exclusive_srwlock= TRUE;
+  return 0;
+}
+
+
+static int srw_unlock(my_rw_lock_t *rwp)
+{
+  if (rwp->have_exclusive_srwlock)
+  {
+    rwp->have_exclusive_srwlock= FALSE;
+    my_ReleaseSRWLockExclusive(&rwp->srwlock);
+  }
+  else
+  {
+    my_ReleaseSRWLockShared(&rwp->srwlock);
+  }
+  return 0;
+}
+
+#endif /*_WIN32 */
+
 /*
   Source base from Sun Microsystems SPILT, simplified for MySQL use
   -- Joshua Chamas
@@ -63,6 +175,22 @@ int my_rw_init(my_rw_lock_t *rwp)
 {
   pthread_condattr_t	cond_attr;
 
+#ifdef _WIN32
+  /*
+    Once initialization is used here rather than in my_init(), in order to
+    - avoid  my_init() pitfalls- (undefined order in which initialization should
+    run)
+    - be potentially useful C++ (static constructors) 
+    - just to simplify  the API. 
+    Also, the overhead is of my_pthread_once is very small.
+  */
+  static my_pthread_once_t once_control= MY_PTHREAD_ONCE_INIT;
+  my_pthread_once(&once_control, check_srwlock_availability);
+
+  if (have_srwlock)
+    return srw_init(rwp);
+#endif
+
   pthread_mutex_init( &rwp->lock, MY_MUTEX_INIT_FAST);
   pthread_condattr_init( &cond_attr );
   pthread_cond_init( &rwp->readers, &cond_attr );
@@ -81,6 +209,10 @@ int my_rw_init(my_rw_lock_t *rwp)
 
 int my_rw_destroy(my_rw_lock_t *rwp)
 {
+#ifdef _WIN32
+  if (have_srwlock)
+    return 0; /* no destroy function */
+#endif
   DBUG_ASSERT(rwp->state == 0);
   pthread_mutex_destroy( &rwp->lock );
   pthread_cond_destroy( &rwp->readers );
@@ -91,6 +223,11 @@ int my_rw_destroy(my_rw_lock_t *rwp)
 
 int my_rw_rdlock(my_rw_lock_t *rwp)
 {
+#ifdef _WIN32
+  if (have_srwlock)
+    return srw_rdlock(rwp);
+#endif
+
   pthread_mutex_lock(&rwp->lock);
 
   /* active or queued writers */
@@ -105,6 +242,12 @@ int my_rw_rdlock(my_rw_lock_t *rwp)
 int my_rw_tryrdlock(my_rw_lock_t *rwp)
 {
   int res;
+
+#ifdef _WIN32
+  if (have_srwlock)
+    return srw_tryrdlock(rwp);
+#endif
+
   pthread_mutex_lock(&rwp->lock);
   if ((rwp->state < 0 ) || rwp->waiters)
     res= EBUSY;					/* Can't get lock */
@@ -120,6 +263,11 @@ int my_rw_tryrdlock(my_rw_lock_t *rwp)
 
 int my_rw_wrlock(my_rw_lock_t *rwp)
 {
+#ifdef _WIN32
+  if (have_srwlock)
+    return srw_wrlock(rwp);
+#endif
+
   pthread_mutex_lock(&rwp->lock);
   rwp->waiters++;				/* another writer queued */
 
@@ -140,6 +288,12 @@ int my_rw_wrlock(my_rw_lock_t *rwp)
 int my_rw_trywrlock(my_rw_lock_t *rwp)
 {
   int res;
+
+#ifdef _WIN32
+  if (have_srwlock)
+    return srw_trywrlock(rwp);
+#endif
+
   pthread_mutex_lock(&rwp->lock);
   if (rwp->state)
     res= EBUSY;					/* Can't get lock */    
@@ -158,6 +312,11 @@ int my_rw_trywrlock(my_rw_lock_t *rwp)
 
 int my_rw_unlock(my_rw_lock_t *rwp)
 {
+#ifdef _WIN32
+  if (have_srwlock)
+    return srw_unlock(rwp);
+#endif
+
   DBUG_PRINT("rw_unlock",
 	     ("state: %d waiters: %d", rwp->state, rwp->waiters));
   pthread_mutex_lock(&rwp->lock);


Attachment: [text/bzr-bundle] bzr/vvaintroub@mysql.com-20101004002724-4lu3224ct87sijnm.bundle
Thread
bzr commit into mysql-5.5-bugteam branch (vvaintroub:3220) Bug#56405Bug#56585Vladislav Vaintroub4 Oct
Re: bzr commit into mysql-5.5-bugteam branch (vvaintroub:3220)Bug#56405 Bug#56585Dmitry Lenev4 Oct
  • RE: bzr commit into mysql-5.5-bugteam branch (vvaintroub:3220) Bug#56405 Bug#56585Vladislav Vaintroub4 Oct