List:Commits« Previous MessageNext Message »
From:Vladislav Vaintroub Date:May 10 2010 3:20pm
Subject:bzr commit into mysql-trunk-innodb branch (vvaintroub:3068) Bug#52102
View as plain text  
#At file:///H:/bzr/mysql-trunk-innodb-winperf/ based on revid:marko.makela@stripped

 3068 Vladislav Vaintroub	2010-05-10
      Bug#52102: performance drop in Innodb (Windows).
      
      
      Fix innodb synchronization primitives on Windows,
      - use critical section for os_mutex_t, 
      - use condition variables if available for os_event_t.
      - fix incorrect implementation of os_mutex_trylock
      
      Also, fix compile problems on 64 bit Windows.

    modified:
      storage/innobase/CMakeLists.txt
      storage/innobase/include/os0file.h
      storage/innobase/include/os0sync.h
      storage/innobase/include/os0sync.ic
      storage/innobase/include/srv0srv.h
      storage/innobase/os/os0file.c
      storage/innobase/os/os0sync.c
      storage/innobase/os/os0thread.c
      storage/innobase/srv/srv0srv.c
      storage/innobase/srv/srv0start.c
=== modified file 'storage/innobase/CMakeLists.txt'
--- a/storage/innobase/CMakeLists.txt	2010-04-22 08:59:35 +0000
+++ b/storage/innobase/CMakeLists.txt	2010-05-10 15:20:10 +0000
@@ -240,10 +240,6 @@ SET(INNOBASE_SOURCES	btr/btr0btr.c btr/b
 			usr/usr0sess.c
 			ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c
 			ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c)
-# Windows atomics do not perform well. Disable Windows atomics by default.
-# See bug#52102 for details.
-#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
-ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
 
 IF(WITH_INNODB)
   # Legacy option

=== modified file 'storage/innobase/include/os0file.h'
--- a/storage/innobase/include/os0file.h	2010-03-29 07:34:42 +0000
+++ b/storage/innobase/include/os0file.h	2010-05-10 15:20:10 +0000
@@ -177,6 +177,10 @@ log. */
 #define OS_WIN95	2	/*!< Microsoft Windows 95 */
 #define OS_WINNT	3	/*!< Microsoft Windows NT 3.x */
 #define OS_WIN2000	4	/*!< Microsoft Windows 2000 */
+#define OS_WINXP	5	/*!< Microsoft Windows XP */
+#define OS_WINVISTA	6	/*!< Microsoft Windows Vista */
+#define OS_WIN7		7	/*!< Microsoft Windows 7 */
+
 
 extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;

=== modified file 'storage/innobase/include/os0sync.h'
--- a/storage/innobase/include/os0sync.h	2009-09-28 17:34:23 +0000
+++ b/storage/innobase/include/os0sync.h	2010-05-10 15:20:10 +0000
@@ -37,29 +37,19 @@ Created 9/6/1995 Heikki Tuuri
 #include "univ.i"
 #include "ut0lst.h"
 
-#ifdef __WIN__
-
+#ifdef _WIN32
+/** Native event (slow)*/
+typedef HANDLE	os_native_event_t;
 /** Native mutex */
-#define os_fast_mutex_t CRITICAL_SECTION
-
-/** Native event */
-typedef HANDLE		os_native_event_t;
-
-/** Operating system event */
-typedef struct os_event_struct	os_event_struct_t;
-/** Operating system event handle */
-typedef os_event_struct_t*	os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event_struct {
-	os_native_event_t		  handle;
-					/*!< Windows event */
-	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
-					/*!< list of all created events */
-};
+typedef CRITICAL_SECTION os_fast_mutex_t;
+/** Native condition variable */
+typedef CONDITION_VARIABLE os_cond_t;
 #else
 /** Native mutex */
 typedef pthread_mutex_t	os_fast_mutex_t;
+/** Native condition variable */
+typedef pthread_cond_t	os_cond_t;
+#endif
 
 /** Operating system event */
 typedef struct os_event_struct	os_event_struct_t;
@@ -68,6 +58,9 @@ typedef os_event_struct_t*	os_event_t;
 
 /** An asynchronous signal sent between threads */
 struct os_event_struct {
+#ifdef _WIN32
+	HANDLE handle;	/*!< kernel event object, slow, used on older Windows */
+#endif
 	os_fast_mutex_t	os_mutex;	/*!< this mutex protects the next
 					fields */
 	ibool		is_set;		/*!< this is TRUE when the event is
@@ -76,12 +69,14 @@ struct os_event_struct {
 					this event */
 	ib_int64_t	signal_count;	/*!< this is incremented each time
 					the event becomes signaled */
-	pthread_cond_t	cond_var;	/*!< condition variable is used in
+	os_cond_t	cond_var;	/*!< condition variable is used in
 					waiting for the event */
 	UT_LIST_NODE_T(os_event_struct_t) os_event_list;
 					/*!< list of all created events */
 };
-#endif
+
+
+
 
 /** Operating system mutex */
 typedef struct os_mutex_struct	os_mutex_str_t;
@@ -198,21 +193,7 @@ os_event_wait_time(
 	os_event_t	event,	/*!< in: event to wait */
 	ulint		time);	/*!< in: timeout in microseconds, or
 				OS_SYNC_INFINITE_TIME */
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return	index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
-/*===================*/
-	ulint			n,	/*!< in: number of events in the
-					array */
-	os_native_event_t*	native_event_array);
-					/*!< in: pointer to an array of event
-					handles */
-#endif
+
 /*********************************************************//**
 Creates an operating system mutex semaphore. Because these are slow, the
 mutex semaphore of InnoDB itself (mutex_t) should be used where possible.

=== modified file 'storage/innobase/include/os0sync.ic'
--- a/storage/innobase/include/os0sync.ic	2009-05-27 09:52:16 +0000
+++ b/storage/innobase/include/os0sync.ic	2010-05-10 15:20:10 +0000
@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
 #endif
 
 /**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
+Acquires ownership of a fast mutex.
 @return	0 if success, != 0 if was reserved by another thread */
 UNIV_INLINE
 ulint
@@ -38,9 +37,9 @@ os_fast_mutex_trylock(
 	os_fast_mutex_t*	fast_mutex)	/*!< in: mutex to acquire */
 {
 #ifdef __WIN__
-	EnterCriticalSection(fast_mutex);
-
-	return(0);
+	if (TryEnterCriticalSection(fast_mutex))
+		return 0;
+	return(1);
 #else
 	/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
 	so that it returns 0 on success. In the operating system

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	2010-04-22 22:15:56 +0000
+++ b/storage/innobase/include/srv0srv.h	2010-05-10 15:20:10 +0000
@@ -112,6 +112,9 @@ OS (provided we compiled Innobase with i
 use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 extern my_bool	srv_use_native_aio;
+#ifdef __WIN__
+extern my_bool	os_sync_use_native_conditions;
+#endif
 extern ulint	srv_n_data_files;
 extern char**	srv_data_file_names;
 extern ulint*	srv_data_file_sizes;
@@ -272,10 +275,10 @@ log buffer and have to flush it */
 extern ulint srv_log_waits;
 
 /* the number of purge threads to use from the worker pool (currently 0 or 1) */
-extern ulint srv_n_purge_threads;
+extern ulong srv_n_purge_threads;
 
 /* the number of records to purge in one batch */
-extern ulint srv_purge_batch_size;
+extern ulong srv_purge_batch_size;
 
 /* variable that counts amount of data read in total (in bytes) */
 extern ulint srv_data_read;

=== modified file 'storage/innobase/os/os0file.c'
--- a/storage/innobase/os/os0file.c	2010-03-29 07:34:42 +0000
+++ b/storage/innobase/os/os0file.c	2010-05-10 15:20:10 +0000
@@ -143,6 +143,7 @@ the completed IO request and calls compl
 
 **********************************************************************/
 
+
 /** Flag: enable debug printout for asynchronous i/o */
 UNIV_INTERN ibool	os_aio_print_debug	= FALSE;
 
@@ -183,7 +184,7 @@ struct os_aio_slot_struct{
 					which pending aio operation was
 					completed */
 #ifdef WIN_ASYNC_IO
-	os_event_t	event;		/*!< event object we need in the
+	HANDLE	handle;		/*!< handle object we need in the
 					OVERLAPPED struct */
 	OVERLAPPED	control;	/*!< Windows control block for the
 					aio request */
@@ -225,7 +226,7 @@ struct os_aio_array_struct{
 				aio array outside the ibuf segment */
 	os_aio_slot_t*	slots;	/*!< Pointer to the slots in the array */
 #ifdef __WIN__
-	os_native_event_t* native_events;
+	HANDLE* handles;
 				/*!< Pointer to an array of OS native
 				event handles where we copied the
 				handles from slots, in the same
@@ -322,10 +323,16 @@ os_get_os_version(void)
 	} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
 		return(OS_WIN95);
 	} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
-		if (os_info.dwMajorVersion <= 4) {
-			return(OS_WINNT);
-		} else {
-			return(OS_WIN2000);
+		switch(os_info.dwMajorVersion){
+		case 3:
+		case 4:
+			return OS_WINNT;
+		case 5:
+			return (os_info.dwMinorVersion == 0)?OS_WIN2000 : OS_WINXP;
+		case 6:
+			return (os_info.dwMinorVersion == 0)?OS_WINVISTA : OS_WIN7;
+		default:
+			return OS_WIN7;
 		}
 	} else {
 		ut_error;
@@ -3229,7 +3236,7 @@ os_aio_array_create(
 	array->cur_seg		= 0;
 	array->slots		= ut_malloc(n * sizeof(os_aio_slot_t));
 #ifdef __WIN__
-	array->native_events	= ut_malloc(n * sizeof(os_native_event_t));
+	array->handles	= ut_malloc(n * sizeof(HANDLE));
 #endif
 
 #if defined(LINUX_NATIVE_AIO)
@@ -3270,13 +3277,13 @@ skip_native_aio:
 		slot->pos = i;
 		slot->reserved = FALSE;
 #ifdef WIN_ASYNC_IO
-		slot->event = os_event_create(NULL);
+		slot->handle= CreateEvent(NULL,TRUE, FALSE, NULL);
 
 		over = &(slot->control);
 
-		over->hEvent = slot->event->handle;
+		over->hEvent = slot->handle;
 
-		*((array->native_events) + i) = over->hEvent;
+		*((array->handles) + i) = over->hEvent;
 
 #elif defined(LINUX_NATIVE_AIO)
 
@@ -3302,12 +3309,12 @@ os_aio_array_free(
 
 	for (i = 0; i < array->n_slots; i++) {
 		os_aio_slot_t*	slot = os_aio_array_get_nth_slot(array, i);
-		os_event_free(slot->event);
+		CloseHandle(slot->handle);
 	}
 #endif /* WIN_ASYNC_IO */
 
 #ifdef __WIN__
-	ut_free(array->native_events);
+	ut_free(array->handles);
 #endif /* __WIN__ */
 	os_mutex_free(array->mutex);
 	os_event_free(array->not_full);
@@ -3453,7 +3460,7 @@ os_aio_array_wake_win_aio_at_shutdown(
 
 	for (i = 0; i < array->n_slots; i++) {
 
-		os_event_set((array->slots + i)->event);
+		SetEvent(array->slots[i].handle);
 	}
 }
 #endif
@@ -3692,7 +3699,7 @@ found:
 	control = &(slot->control);
 	control->Offset = (DWORD)offset;
 	control->OffsetHigh = (DWORD)offset_high;
-	os_event_reset(slot->event);
+	ResetEvent(slot->handle);
 
 #elif defined(LINUX_NATIVE_AIO)
 
@@ -3764,7 +3771,7 @@ os_aio_array_free_slot(
 
 #ifdef WIN_ASYNC_IO
 
-	os_event_reset(slot->event);
+	ResetEvent(slot->handle);
 
 #elif defined(LINUX_NATIVE_AIO)
 
@@ -4198,13 +4205,15 @@ os_aio_windows_handle(
 	n = array->n_slots / array->n_segments;
 
 	if (array == os_aio_sync_array) {
-		os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
+		WaitForSingleObject(os_aio_array_get_nth_slot(array, pos)->handle,INFINITE);
 		i = pos;
 	} else {
 		srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
-		i = os_event_wait_multiple(n,
-					   (array->native_events)
-					   + segment * n);
+		i = WaitForMultipleObjects((DWORD) n, array->handles  + segment * n,  FALSE, INFINITE); 
+	}
+
+	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+		os_thread_exit(NULL);
 	}
 
 	os_mutex_enter(array->mutex);

=== modified file 'storage/innobase/os/os0sync.c'
--- a/storage/innobase/os/os0sync.c	2009-11-04 06:02:00 +0000
+++ b/storage/innobase/os/os0sync.c	2010-05-10 15:20:10 +0000
@@ -76,6 +76,187 @@ event embedded inside a mutex, on free, 
 This version of the free event function doesn't acquire the global lock */
 static void os_event_free_internal(os_event_t	event);
 
+/* Condition variables routines.
+
+On Unix, they map directly to pthread_cond_xxx functions,
+on Windows, to Windows condition functions.
+
+Why using condition variables on Windows?
+We use condition for events on Windows if possible, even if os_event
+resembles Windows kernel event object well API-wise. The reason is
+performance, kernel objects are heavyweights and WaitForSingleObject() is a 
+performance killer causing calling thread to context switch. Besides, Innodb
+is preallocating large number (often millions) of os_events. With kernel event
+objects it takes a big chunk out of non-paged pool, which is better suited
+for tasks like IO than for storing idle event objects.
+*/
+
+static void os_cond_init(os_cond_t *cond);
+static void os_cond_wait(os_cond_t *cond, os_fast_mutex_t *mutex);
+static void os_cond_broadcast(os_cond_t *cond);
+static void os_cond_signal(os_cond_t *cond);
+static void os_cond_destroy(os_cond_t *cond);
+
+
+#ifdef __WIN__
+
+
+/* Windows native condition variables. We use runtime loading / function 
+pointers, because they are not available on XP */
+
+UNIV_INTERN ibool	os_sync_use_native_conditions = FALSE;
+
+/* Prototypes and function pointers for condition variable functions */
+typedef VOID (WINAPI * InitializeConditionVariableProc) 
+	(PCONDITION_VARIABLE ConditionVariable);
+static InitializeConditionVariableProc my_InitializeConditionVariable;
+
+typedef BOOL (WINAPI * SleepConditionVariableCSProc)
+	(PCONDITION_VARIABLE ConditionVariable,
+	PCRITICAL_SECTION CriticalSection, 
+	DWORD dwMilliseconds);
+static SleepConditionVariableCSProc my_SleepConditionVariableCS;
+
+typedef VOID (WINAPI * WakeAllConditionVariableProc)
+	(PCONDITION_VARIABLE ConditionVariable);
+static WakeAllConditionVariableProc my_WakeAllConditionVariable;
+
+typedef VOID (WINAPI * WakeConditionVariableProc)
+	(PCONDITION_VARIABLE ConditionVariable);
+static WakeConditionVariableProc my_WakeConditionVariable;
+#endif
+
+
+
+/*********************************************************//**
+Initialitze condition variable */
+UNIV_INLINE 
+void 
+os_cond_init(
+/*=========*/
+	os_cond_t *cond)
+{
+	ut_a(cond);
+
+#ifdef __WIN__
+	ut_a(my_InitializeConditionVariable != NULL);
+	my_InitializeConditionVariable(cond);
+#else
+	ut_a(pthread_cond_init(cond, NULL) == 0);
+#endif
+}
+
+/*********************************************************//**
+Wait on condition variable */
+static
+void
+os_cond_wait(
+/*=========*/
+	os_cond_t *cond, 
+	os_fast_mutex_t *mutex)
+{
+	ut_a(cond);
+	ut_a(mutex);
+
+#ifdef __WIN__
+	ut_a(my_SleepConditionVariableCS != NULL);
+	ut_a(my_SleepConditionVariableCS(cond, mutex, INFINITE));
+#else
+	ut_a(pthread_cond_wait(cond, mutex) == 0);
+#endif
+}
+
+/*********************************************************//**
+Wakes all threads  waiting for condition variable */
+static
+void
+os_cond_broadcast(
+/*==============*/
+	os_cond_t *cond
+)
+{
+	ut_a(cond);
+
+#ifdef __WIN__
+	ut_a(my_WakeAllConditionVariable != NULL);
+	my_WakeAllConditionVariable(cond);
+#else
+	ut_a(pthread_cond_broadcast(cond) == 0);
+#endif
+}
+
+/*********************************************************//**
+Wakes one thread waiting for condition variable */
+static
+void
+os_cond_signal(
+/*==========*/
+	os_cond_t *cond
+)
+{
+	ut_a(cond);
+
+#ifdef __WIN__
+	ut_a(my_WakeConditionVariable != NULL);
+	my_WakeConditionVariable(cond);
+#else
+	ut_a(pthread_cond_signal(cond) == 0);
+#endif
+}
+
+/*********************************************************//**
+Destroys condition variable */
+static
+void
+os_cond_destroy(
+/*============*/
+	os_cond_t *cond
+)
+{
+#ifdef __WIN__
+	/* Do nothing */
+#else
+	ut_a(pthread_cond_destroy(cond) == 0);
+#endif
+}
+
+
+/*********************************************************//**
+On Windows (Vista and later), load function pointers for condition variable
+handling. Those functions are not available in prior versions, so we have to
+use them via runtime loading, as long as we support XP. */
+static
+void
+os_conditions_init(
+/*===============*/
+)
+{
+#ifdef __WIN__
+	HMODULE hDll;
+
+	if (!os_sync_use_native_conditions)
+		return;
+
+	hDll = GetModuleHandle("kernel32");
+
+	my_InitializeConditionVariable = (InitializeConditionVariableProc)
+		GetProcAddress(hDll, "InitializeConditionVariable");
+	my_SleepConditionVariableCS = (SleepConditionVariableCSProc)
+		GetProcAddress(hDll, "SleepConditionVariableCS");
+	my_WakeAllConditionVariable = (WakeAllConditionVariableProc)
+		GetProcAddress(hDll, "WakeAllConditionVariable");
+	my_WakeConditionVariable = (WakeConditionVariableProc)
+		GetProcAddress(hDll, "WakeConditionVariable");
+
+	/* When using native condition variables, function pointers must be ok */
+	ut_a(my_InitializeConditionVariable);
+	ut_a(my_SleepConditionVariableCS);
+	ut_a(my_WakeAllConditionVariable);
+	ut_a(my_WakeConditionVariable);
+#endif
+}
+
+
 /*********************************************************//**
 Initializes global event and OS 'slow' mutex lists. */
 UNIV_INTERN
@@ -89,6 +270,8 @@ os_sync_init(void)
 	os_sync_mutex = NULL;
 	os_sync_mutex_inited = FALSE;
 
+	os_conditions_init();
+
 	os_sync_mutex = os_mutex_create(NULL);
 
 	os_sync_mutex_inited = TRUE;
@@ -143,23 +326,26 @@ os_event_create(
 	const char*	name)	/*!< in: the name of the event, if NULL
 				the event is created without a name */
 {
+	os_event_t	event;
+
 #ifdef __WIN__
-	os_event_t event;
+	if(!os_sync_use_native_conditions) {
 
-	event = ut_malloc(sizeof(struct os_event_struct));
+		event = ut_malloc(sizeof(struct os_event_struct));
+
+		event->handle = CreateEvent(NULL, /* No security attributes */
+						TRUE, /* Manual reset */
+						FALSE, /* Initial state nonsignaled */
+						(LPCTSTR) name);
+		if (!event->handle) {
+			fprintf(stderr,
+				"InnoDB: Could not create a Windows event semaphore;"
+				" Windows error %lu\n",
+				(ulong) GetLastError());
+		}
+	} else { /* Windows with condition variables */
+#endif
 
-	event->handle = CreateEvent(NULL, /* No security attributes */
-				    TRUE, /* Manual reset */
-				    FALSE, /* Initial state nonsignaled */
-				    (LPCTSTR) name);
-	if (!event->handle) {
-		fprintf(stderr,
-			"InnoDB: Could not create a Windows event semaphore;"
-			" Windows error %lu\n",
-			(ulong) GetLastError());
-	}
-#else /* Unix */
-	os_event_t	event;
 
 	UT_NOT_USED(name);
 
@@ -167,7 +353,7 @@ os_event_create(
 
 	os_fast_mutex_init(&(event->os_mutex));
 
-	ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
+	os_cond_init(&(event->cond_var));
 
 	event->is_set = FALSE;
 
@@ -178,7 +364,10 @@ os_event_create(
 	distinguish between the two cases we initialize signal_count
 	to 1 here. */
 	event->signal_count = 1;
-#endif /* __WIN__ */
+
+#ifdef __WIN__
+	}
+#endif
 
 	/* The os_sync_mutex can be NULL because during startup an event
 	can be created [ because it's embedded in the mutex/rwlock ] before
@@ -208,10 +397,16 @@ os_event_set(
 /*=========*/
 	os_event_t	event)	/*!< in: event to set */
 {
-#ifdef __WIN__
+	
 	ut_a(event);
-	ut_a(SetEvent(event->handle));
-#else
+
+#ifdef __WIN__
+	if (!os_sync_use_native_conditions) {
+		ut_a(SetEvent(event->handle));
+		return;
+	}
+#endif
+
 	ut_a(event);
 
 	os_fast_mutex_lock(&(event->os_mutex));
@@ -221,11 +416,10 @@ os_event_set(
 	} else {
 		event->is_set = TRUE;
 		event->signal_count += 1;
-		ut_a(0 == pthread_cond_broadcast(&(event->cond_var)));
+		os_cond_broadcast(&(event->cond_var));
 	}
 
 	os_fast_mutex_unlock(&(event->os_mutex));
-#endif
 }
 
 /**********************************************************//**
@@ -244,12 +438,14 @@ os_event_reset(
 {
 	ib_int64_t	ret = 0;
 
-#ifdef __WIN__
 	ut_a(event);
 
-	ut_a(ResetEvent(event->handle));
-#else
-	ut_a(event);
+#ifdef __WIN__
+	if(!os_sync_use_native_conditions) {
+		ut_a(ResetEvent(event->handle));
+		return 0;
+	}
+#endif
 
 	os_fast_mutex_lock(&(event->os_mutex));
 
@@ -261,7 +457,6 @@ os_event_reset(
 	ret = event->signal_count;
 
 	os_fast_mutex_unlock(&(event->os_mutex));
-#endif
 	return(ret);
 }
 
@@ -274,16 +469,20 @@ os_event_free_internal(
 	os_event_t	event)	/*!< in: event to free */
 {
 #ifdef __WIN__
-	ut_a(event);
-
-	ut_a(CloseHandle(event->handle));
-#else
+	if(!os_sync_use_native_conditions) {
+		ut_a(event);
+		ut_a(CloseHandle(event->handle));
+	} else {
+#endif
 	ut_a(event);
 
 	/* This is to avoid freeing the mutex twice */
 	os_fast_mutex_free(&(event->os_mutex));
 
-	ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
+	os_cond_destroy(&(event->cond_var));
+
+#ifdef __WIN__
+	}
 #endif
 	/* Remove from the list of events */
 
@@ -303,15 +502,18 @@ os_event_free(
 	os_event_t	event)	/*!< in: event to free */
 
 {
-#ifdef __WIN__
 	ut_a(event);
+#ifdef __WIN__
+	if(!os_sync_use_native_conditions){
+		ut_a(CloseHandle(event->handle));
+	} else { /*Windows with condition variables */
+#endif
 
-	ut_a(CloseHandle(event->handle));
-#else
-	ut_a(event);
 
 	os_fast_mutex_free(&(event->os_mutex));
-	ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
+	os_cond_destroy(&(event->cond_var));
+#ifdef __WIN__
+	}
 #endif
 	/* Remove from the list of events */
 
@@ -355,23 +557,27 @@ os_event_wait_low(
 					returned by previous call of
 					os_event_reset(). */
 {
+	ib_int64_t old_signal_count;
+
 #ifdef __WIN__
-	DWORD	err;
+	if(!os_sync_use_native_conditions) {
+		DWORD	err;
 
-	ut_a(event);
+		ut_a(event);
 
-	UT_NOT_USED(reset_sig_count);
+		UT_NOT_USED(reset_sig_count);
 
-	/* Specify an infinite time limit for waiting */
-	err = WaitForSingleObject(event->handle, INFINITE);
+		/* Specify an infinite time limit for waiting */
+		err = WaitForSingleObject(event->handle, INFINITE);
 
-	ut_a(err == WAIT_OBJECT_0);
+		ut_a(err == WAIT_OBJECT_0);
 
-	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-		os_thread_exit(NULL);
+		if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+			os_thread_exit(NULL);
+		}
+		return;
 	}
-#else
-	ib_int64_t	old_signal_count;
+#endif
 
 	os_fast_mutex_lock(&(event->os_mutex));
 
@@ -396,13 +602,12 @@ os_event_wait_low(
 			return;
 		}
 
-		pthread_cond_wait(&(event->cond_var), &(event->os_mutex));
+		os_cond_wait(&(event->cond_var), &(event->os_mutex));
 
 		/* Solaris manual said that spurious wakeups may occur: we
 		have to check if the event really has been signaled after
 		we came here to wait */
 	}
-#endif
 }
 
 /**********************************************************//**
@@ -418,27 +623,29 @@ os_event_wait_time(
 				OS_SYNC_INFINITE_TIME */
 {
 #ifdef __WIN__
-	DWORD	err;
-
-	ut_a(event);
-
-	if (time != OS_SYNC_INFINITE_TIME) {
-		err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
-	} else {
-		err = WaitForSingleObject(event->handle, INFINITE);
-	}
-
-	if (err == WAIT_OBJECT_0) {
-
-		return(0);
-	} else if (err == WAIT_TIMEOUT) {
-
-		return(OS_SYNC_TIME_EXCEEDED);
-	} else {
-		ut_error;
-		return(1000000); /* dummy value to eliminate compiler warn. */
+	if( !os_sync_use_native_conditions) {
+		DWORD	err;
+	
+		ut_a(event);
+	
+		if (time != OS_SYNC_INFINITE_TIME) {
+			err = WaitForSingleObject(event->handle, (DWORD) time / 1000);
+		} else {
+			err = WaitForSingleObject(event->handle, INFINITE);
+		}
+	
+		if (err == WAIT_OBJECT_0) {
+	
+			return(0);
+		} else if (err == WAIT_TIMEOUT) {
+	
+			return(OS_SYNC_TIME_EXCEEDED);
+		} else {
+			ut_error;
+			return(1000000); /* dummy value to eliminate compiler warn. */
+		}
 	}
-#else
+#endif
 	UT_NOT_USED(time);
 
 	/* In Posix this is just an ordinary, infinite wait */
@@ -446,43 +653,10 @@ os_event_wait_time(
 	os_event_wait(event);
 
 	return(0);
-#endif
-}
 
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return	index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
-/*===================*/
-	ulint			n,	/*!< in: number of events in the
-					array */
-	os_native_event_t*	native_event_array)
-					/*!< in: pointer to an array of event
-					handles */
-{
-	DWORD	index;
-
-	ut_a(native_event_array);
-	ut_a(n > 0);
-
-	index = WaitForMultipleObjects((DWORD) n, native_event_array,
-				       FALSE,	   /* Wait for any 1 event */
-				       INFINITE); /* Infinite wait time
-						  limit */
-	ut_a(index >= WAIT_OBJECT_0);	/* NOTE: Pointless comparison */
-	ut_a(index < WAIT_OBJECT_0 + n);
+}
 
-	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
-		os_thread_exit(NULL);
-	}
 
-	return(index - WAIT_OBJECT_0);
-}
-#endif
 
 /*********************************************************//**
 Creates an operating system mutex semaphore. Because these are slow, the
@@ -495,15 +669,6 @@ os_mutex_create(
 	const char*	name)	/*!< in: the name of the mutex, if NULL
 				the mutex is created without a name */
 {
-#ifdef __WIN__
-	HANDLE		mutex;
-	os_mutex_t	mutex_str;
-
-	mutex = CreateMutex(NULL,	/* No security attributes */
-			    FALSE,		/* Initial state: no owner */
-			    (LPCTSTR) name);
-	ut_a(mutex);
-#else
 	os_fast_mutex_t*	mutex;
 	os_mutex_t		mutex_str;
 
@@ -512,7 +677,6 @@ os_mutex_create(
 	mutex = ut_malloc(sizeof(os_fast_mutex_t));
 
 	os_fast_mutex_init(mutex);
-#endif
 	mutex_str = ut_malloc(sizeof(os_mutex_str_t));
 
 	mutex_str->handle = mutex;
@@ -543,25 +707,11 @@ os_mutex_enter(
 /*===========*/
 	os_mutex_t	mutex)	/*!< in: mutex to acquire */
 {
-#ifdef __WIN__
-	DWORD	err;
-
-	ut_a(mutex);
-
-	/* Specify infinite time limit for waiting */
-	err = WaitForSingleObject(mutex->handle, INFINITE);
-
-	ut_a(err == WAIT_OBJECT_0);
-
-	(mutex->count)++;
-	ut_a(mutex->count == 1);
-#else
 	os_fast_mutex_lock(mutex->handle);
 
 	(mutex->count)++;
 
 	ut_a(mutex->count == 1);
-#endif
 }
 
 /**********************************************************//**
@@ -577,11 +727,7 @@ os_mutex_exit(
 	ut_a(mutex->count == 1);
 
 	(mutex->count)--;
-#ifdef __WIN__
-	ut_a(ReleaseMutex(mutex->handle));
-#else
 	os_fast_mutex_unlock(mutex->handle);
-#endif
 }
 
 /**********************************************************//**
@@ -610,15 +756,9 @@ os_mutex_free(
 		os_mutex_exit(os_sync_mutex);
 	}
 
-#ifdef __WIN__
-	ut_a(CloseHandle(mutex->handle));
-
-	ut_free(mutex);
-#else
 	os_fast_mutex_free(mutex->handle);
 	ut_free(mutex->handle);
 	ut_free(mutex);
-#endif
 }
 
 /*********************************************************//**

=== modified file 'storage/innobase/os/os0thread.c'
--- a/storage/innobase/os/os0thread.c	2010-03-29 07:34:42 +0000
+++ b/storage/innobase/os/os0thread.c	2010-05-10 15:20:10 +0000
@@ -252,7 +252,7 @@ os_thread_yield(void)
 /*=================*/
 {
 #if defined(__WIN__)
-	Sleep(0);
+	SwitchToThread();
 #elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
 	sched_yield();
 #elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)

=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c	2010-04-30 11:38:12 +0000
+++ b/storage/innobase/srv/srv0srv.c	2010-05-10 15:20:10 +0000
@@ -244,10 +244,10 @@ that during a time of heavy update/inser
 UNIV_INTERN ulong	srv_max_buf_pool_modified_pct	= 75;
 
 /* the number of purge threads to use from the worker pool (currently 0 or 1).*/
-UNIV_INTERN ulint srv_n_purge_threads = 0;
+UNIV_INTERN ulong srv_n_purge_threads = 0;
 
 /* the number of records to purge in one batch */
-UNIV_INTERN ulint srv_purge_batch_size = 20;
+UNIV_INTERN ulong srv_purge_batch_size = 20;
 
 /* variable counts amount of data read in total (in bytes) */
 UNIV_INTERN ulint srv_data_read = 0;

=== modified file 'storage/innobase/srv/srv0start.c'
--- a/storage/innobase/srv/srv0start.c	2010-04-22 21:51:43 +0000
+++ b/storage/innobase/srv/srv0start.c	2010-05-10 15:20:10 +0000
@@ -1147,7 +1147,6 @@ innobase_start_or_create_for_mysql(void)
 
 	srv_is_being_started = TRUE;
 	srv_startup_is_before_trx_rollback_phase = TRUE;
-
 #ifdef __WIN__
 	switch (os_get_os_version()) {
 	case OS_WIN95:
@@ -1159,13 +1158,22 @@ innobase_start_or_create_for_mysql(void)
 		to corrupt the data files. */
 
 		srv_use_native_aio = FALSE;
+		os_sync_use_native_conditions = FALSE;
+		break;
+
+	case OS_WIN2000:
+	case OS_WINXP:
+		/* On 2000 and XP, async IO is available, but no condition variables. */
+		srv_use_native_aio = TRUE;
+		os_sync_use_native_conditions = FALSE;
 		break;
+
 	default:
-		/* On Win 2000 and XP use async i/o */
+		/* Vista and later have both async IO and condition variables */
 		srv_use_native_aio = TRUE;
+		os_sync_use_native_conditions = TRUE;
 		break;
 	}
-
 #elif defined(LINUX_NATIVE_AIO)
 
 	if (srv_use_native_aio) {


Attachment: [text/bzr-bundle] bzr/vvaintroub@mysql.com-20100510152010-yf1b68iottbrjmot.bundle
Thread
bzr commit into mysql-trunk-innodb branch (vvaintroub:3068) Bug#52102Vladislav Vaintroub10 May