List:Internals« Previous MessageNext Message »
From:Jan Lindstrom Date:October 7 2005 6:54am
Subject:bk commit into 5.1 tree (jan:1.1912)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jan. When jan does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.1912 05/10/07 09:54:15 jan@stripped +7 -0
  Added support for Linux native async I/O. Created by Christoffer
  Hal-Frederiksen and used here with permission.

  storage/innobase/srv/srv0start.c
    1.87 05/10/07 09:53:37 jan@stripped +15 -2
    Start event handler for Linux native aio if used.

  storage/innobase/srv/srv0srv.c
    1.94 05/10/07 09:53:37 jan@stripped +1 -0
    Add required include file.

  storage/innobase/os/os0file.c
    1.111 05/10/07 09:53:37 jan@stripped +349 -5
    Added support for Linux native async I/O. Created by Christoffer 
    Hal-Frederiksen and used here with permission.

  storage/innobase/include/univ.i
    1.45 05/10/07 09:53:37 jan@stripped +5 -0
    Added libaio.h.

  storage/innobase/include/os0file.h
    1.40 05/10/07 09:53:37 jan@stripped +42 -0
    Prototypes for Linux native aio handlers.

  storage/innobase/fil/fil0fil.c
    1.60 05/10/07 09:53:36 jan@stripped +3 -0
    Handle Linux aio events.

  config/ac-macros/ha_innodb.m4
    1.4 05/10/07 09:53:36 jan@stripped +29 -2
    Added test for Linux native aio. Support for Linux native aio is 
    compiled if you configure with option --with-linux-native-aio.
    This implementation is based on libaio. At the moment this is tested
    with libaio 0.3.99-3.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jan
# Host:	hundin.mysql.fi
# Root:	/home/jan/aio

--- 1.3/config/ac-macros/ha_innodb.m4	2005-04-27 05:04:31 +03:00
+++ 1.4/config/ac-macros/ha_innodb.m4	2005-10-07 09:53:36 +03:00
@@ -19,9 +19,10 @@
     yes )
       AC_MSG_RESULT([Using Innodb])
       AC_DEFINE([HAVE_INNOBASE_DB], [1], [Using Innobase DB])
+      innodb_system_libs=""
+      MYSQL_CHECK_LINUX_NATIVE_AIO
       have_innodb="yes"
       innodb_includes="-I\$(top_builddir)/innobase/include"
-      innodb_system_libs=""
 dnl Some libs are listed several times, in order for gcc to sort out
 dnl circular references.
       innodb_libs="\
@@ -60,7 +61,7 @@
  \$(top_builddir)/storage/innobase/os/libos.a\
  \$(top_builddir)/storage/innobase/ut/libut.a"
 
-      AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"])
+      AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="$innodb_system_libs -lrt"])
       ;;
     * )
       AC_MSG_RESULT([Not using Innodb])
@@ -70,6 +71,32 @@
   AC_SUBST(innodb_includes)
   AC_SUBST(innodb_libs)
   AC_SUBST(innodb_system_libs)
+])
+
+dnl ---------------------------------------------------------------------------
+dnl Macro: MYSQL_CHECK_LINUX_NATIVE_AIO
+dnl Sets LINUX_NATIVE_AIO if --with-linux-native-aio is used and supported
+dnl ---------------------------------------------------------------------------
+
+AC_DEFUN([MYSQL_CHECK_LINUX_NATIVE_AIO], [
+  AC_ARG_WITH(linux_native_aio,
+    [  --with-linux-native-aio   Use linux native aio.],
+    [with_linux_native_aio=yes],
+    [with_linux_native_aio=no]
+  )
+
+  case "$with_linux_native_aio" in
+    yes )
+      AC_MSG_RESULT([Using linux native aio])
+      AC_DEFINE(LINUX_NATIVE_AIO, [1], [Linux native async I/O support])
+      AC_CHECK_HEADER(libaio.h,, AC_MSG_ERROR([libaio needed for Linux native async I/O]))
+      AC_CHECK_LIB(aio, io_queue_init, [innodb_system_libs="-laio"], 
+        AC_MSG_ERROR([libaio does not support required io_queue_init]) )
+      ;;
+    * )
+      AC_MSG_RESULT([Not using linux native aio])
+      ;;
+  esac
 ])
 
 dnl ---------------------------------------------------------------------------

--- 1.59/storage/innobase/fil/fil0fil.c	2005-07-12 20:23:29 +03:00
+++ 1.60/storage/innobase/fil/fil0fil.c	2005-10-07 09:53:36 +03:00
@@ -4036,6 +4036,9 @@
 					    &message, &type);
 #elif defined(POSIX_ASYNC_IO)
 		ret = os_aio_posix_handle(segment, &fil_node, &message);
+#elif defined(LINUX_NATIVE_AIO)
+		ret = os_aio_linuxaio_handle(segment, 0, (void **)&fil_node, 
+							&message, &type);
 #else
 		ret = 0; /* Eliminate compiler warning */
 		ut_error;

--- 1.39/storage/innobase/include/os0file.h	2005-08-06 00:50:39 +03:00
+++ 1.40/storage/innobase/include/os0file.h	2005-10-07 09:53:37 +03:00
@@ -712,4 +712,46 @@
 	const char*     path,		/* in:  pathname of the file */
 	os_file_stat_t* stat_info);	/* information of a file in a directory */
 
+#ifdef LINUX_NATIVE_AIO
+/**************************************************************************
+This function is only used in Linux native asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+ibool
+os_aio_linuxaio_handle(
+/*===================*/
+				/* out: TRUE if the aio operation succeeded */
+	ulint	segment,	/* in: the number of the segment in the aio
+				arrays to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 the log i/o thread,
+				then follow the non-ibuf read threads, and as
+				the last are the non-ibuf write threads; if
+				this is ULINT_UNDEFINED, then it means that
+				sync aio is used, and this parameter is
+				ignored */
+	ulint	pos,		/* this parameter is used only in sync aio:
+				wait for the aio slot at this position */  
+	void**	message1,	/* out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2,
+	ulint*	type);		/* out: OS_FILE_WRITE or ..._READ */
+
+/**************************************************************************
+This function is only used in Linux native asynchronous i/o. Event handler
+for a linux native asynchronous i/o. */
+
+void*
+io_linuxaio_event_handler (
+/*=======================*/
+	void*	arg);
+
+#endif /* LINUX_NATIVE_AIO */
+
 #endif 

--- 1.44/storage/innobase/include/univ.i	2005-09-06 10:11:51 +03:00
+++ 1.45/storage/innobase/include/univ.i	2005-10-07 09:53:37 +03:00
@@ -46,6 +46,11 @@
 #include <sched.h>
 #endif
 
+#ifdef HAVE_LIBAIO_H
+#include <libaio.h>
+#endif
+
+
 /* When compiling for Itanium IA64, undefine the flag below to prevent use
 of the 32-bit x86 assembler in mutex operations. */
 

--- 1.110/storage/innobase/os/os0file.c	2005-08-06 00:50:39 +03:00
+++ 1.111/storage/innobase/os/os0file.c	2005-10-07 09:53:37 +03:00
@@ -30,6 +30,14 @@
 
 #endif
 
+#ifdef LINUX_NATIVE_AIO
+/* The implementation of linux native async I/O  is highly experimental! ;)
+ * Christoffer Hall-Frederiksen
+ */
+#include<libaio.h>
+
+#endif
+
 /* This specifies the file permissions InnoDB uses when it creates files in
 Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
 my_umask */
@@ -64,9 +72,21 @@
 
 ibool	os_aio_print_debug	= FALSE;
 
+#ifdef LINUX_NATIVE_AIO
+io_context_t os_aio_ctx = NULL;
+
+ulint		aio_events;
+mutex_t		aio_events_mux;
+#define LINUX_AIO_NUM_EVENTS (32)
+#endif
+
+
 /* The aio array slot structure */
 typedef struct os_aio_slot_struct	os_aio_slot_t;
 
+/* The aio array structure */
+typedef struct os_aio_array_struct	os_aio_array_t;
+
 struct os_aio_slot_struct{
 	ibool		is_read;	/* TRUE if a read operation */
 	ulint		pos;		/* index of the slot in the aio
@@ -100,12 +120,18 @@
 #elif defined(POSIX_ASYNC_IO)
 	struct aiocb	control;	/* Posix control block for aio
 					request */
+#elif defined(LINUX_NATIVE_AIO)
+	struct iocb	control;	/* Linux native control block 
+					   for aio */
+	os_event_t	event;		/* Event signaled when the request
+					   completes */
+	os_aio_array_t	*array;		/* Pointer back to array, needed 
+					   for completion */
+	int		res;		/* Async I/O return codes */
+	int		res2;
 #endif
 };
 
-/* The aio array structure */
-typedef struct os_aio_array_struct	os_aio_array_t;
-
 struct os_aio_array_struct{
 	os_mutex_t	mutex;	  /* the mutex protecting the aio array */
 	os_event_t	not_full; /* The event which is set to the signaled
@@ -129,6 +155,11 @@
 				  slots, in the same order. This can be used
 				  in WaitForMultipleObjects; used only in
 				  Windows */
+#elif defined(LINUX_NATIVE_AIO)
+	os_event_t	event;	   /* Event is set when at least one request
+				     from the array completes */
+	ulint		completed; /* Last pos in array that has been 
+				      completed. */
 #endif
 };
 
@@ -2721,6 +2752,9 @@
 	array->slots		= ut_malloc(n * sizeof(os_aio_slot_t));
 #ifdef __WIN__
 	array->native_events	= ut_malloc(n * sizeof(os_native_event_t));
+#elif defined(LINUX_NATIVE_AIO)
+	/* fprintf(stderr, "Linux native aio creating a event \n"); */
+	array->event = os_event_create(NULL);
 #endif	
 	for (i = 0; i < n; i++) {
 		slot = os_aio_array_get_nth_slot(array, i);
@@ -2735,6 +2769,10 @@
 		over->hEvent = slot->event->handle;
 
 		*((array->native_events) + i) = over->hEvent;
+#elif defined(LINUX_NATIVE_AIO)
+		/* fprintf(stderr, "Linux native aio creating a event\n"); */
+		slot->event = os_event_create(NULL);
+		slot->array = array;
 #endif
 	}
 	
@@ -2766,6 +2804,8 @@
 	ulint	i;
 #ifdef POSIX_ASYNC_IO
 	sigset_t   sigset;
+#elif defined(LINUX_NATIVE_AIO)
+	long ret;
 #endif
 	ut_ad(n % n_segments == 0);
 	ut_ad(n_segments >= 4);
@@ -2833,6 +2873,12 @@
 	sigaddset(&sigset, SIGRTMIN + 1 + 3);
 
 	pthread_sigmask(SIG_BLOCK, &sigset, NULL); */
+
+#elif defined(LINUX_NATIVE_AIO)
+	mutex_create(&aio_events_mux);
+	ret = io_queue_init(n, &os_aio_ctx);
+	/* fprintf(stderr, "Linux native aio queue init %ld\n", ret); */
+	ut_ad(ret == 0);
 #endif
 }
 
@@ -2871,6 +2917,11 @@
 	os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
 	os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
 	os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
+#elif defined(LINUX_NATIVE_AIO)
+	os_event_set(os_aio_read_array->event);
+	os_event_set(os_aio_write_array->event);
+	os_event_set(os_aio_ibuf_array->event);
+	os_event_set(os_aio_log_array->event);
 #endif
 	/* This loop wakes up all simulated ai/o threads */
 
@@ -2968,7 +3019,7 @@
 Gets an integer value designating a specified aio array. This is used
 to give numbers to signals in Posix aio. */
 
-#if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO)
+#if (!defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO)) || defined(LINUX_NATIVE_AIO)
 static
 ulint
 os_aio_get_array_no(
@@ -3135,6 +3186,9 @@
 		(ulint) control->aio_sigevent.sigev_signo);
 */
 	control->aio_sigevent.sigev_value.sival_ptr = slot;
+#elif defined(LINUX_NATIVE_AIO)
+	os_event_reset(slot->event);
+	/* fprintf(stderr, "Linux native aio event reset\n"); */
 #endif
 	os_mutex_exit(array->mutex);
 
@@ -3171,6 +3225,10 @@
 
 #ifdef WIN_ASYNC_IO		
 	os_event_reset(slot->event);
+#elif defined(LINUX_NATIVE_AIO)
+	/* fprintf(stderr, "Linux native aio event reset\n"); */
+	os_event_reset(slot->event);
+	os_event_reset(array->event);
 #endif
 	os_mutex_exit(array->mutex);
 }
@@ -3264,6 +3322,216 @@
 	}
 }
 
+#ifdef LINUX_NATIVE_AIO
+
+/**************************************************************************
+This function is only used in Linux native asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing! */
+
+ibool
+os_aio_linuxaio_handle(
+/*===================*/
+				/* out: TRUE if the aio operation succeeded */
+	ulint	segment,	/* in: the number of the segment in the aio
+				arrays to wait for; segment 0 is the ibuf
+				i/o thread, segment 1 the log i/o thread,
+				then follow the non-ibuf read threads, and as
+				the last are the non-ibuf write threads; if
+				this is ULINT_UNDEFINED, then it means that
+				sync aio is used, and this parameter is
+				ignored */
+	ulint	pos,		/* this parameter is used only in sync aio:
+				wait for the aio slot at this position */  
+	void**	message1,	/* out: the messages passed with the aio
+				request; note that also in the case where
+				the aio operation failed, these output
+				parameters are valid and can be used to
+				restart the operation, for example */
+	void**	message2,
+	ulint*	type)		/* out: OS_FILE_WRITE or ..._READ */
+{
+	ulint		orig_seg	= segment;
+	os_aio_array_t*	array;
+	os_aio_slot_t*	slot;
+	ulint		n;
+	ulint		i;
+	ibool		pos_found = FALSE;
+	ibool		ret_val = FALSE;
+
+	if (segment == ULINT_UNDEFINED) {
+		array = os_aio_sync_array;
+		segment = 0;
+	} else {
+		segment = os_aio_get_array_and_local_segment(&array, segment);
+	} 
+
+	n = array->n_slots / array->n_segments;
+
+	if (array == os_aio_sync_array) {
+		srv_io_thread_op_info[orig_seg] =
+						"wait Linux aio for 1 page";
+		
+		i = pos;
+		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+		ut_a(pos == slot->pos);
+
+		os_mutex_enter(array->mutex);
+		if (slot->reserved && slot->io_already_done)
+			goto requestfound_nolock;
+
+		os_mutex_exit(array->mutex);
+		os_event_wait(slot->event);
+	} else {
+scanagain:
+		os_mutex_enter(array->mutex);
+		for (i = 0; i < n; i++) {
+			slot = os_aio_array_get_nth_slot(array, i + segment * n);
+			if (slot->reserved && slot->io_already_done)
+				goto requestfound_nolock;
+		}
+
+		srv_io_thread_op_info[orig_seg] =
+						"wait Linux aio";
+		os_mutex_exit(array->mutex);
+		os_event_wait(array->event);
+		goto scanagain;
+	} 
+
+
+requestfound_lock:
+	os_mutex_enter(array->mutex);
+
+requestfound_nolock:
+	ut_ad(i < array->n_slots);
+
+	slot = os_aio_array_get_nth_slot(array, i + segment * n);
+
+	ut_a(slot->reserved);
+
+
+	*message1 = slot->message1;
+	*message2 = slot->message2;
+
+	*type = slot->type;
+
+	if ((slot->res2 == 0) && (slot->res == slot->len)) {
+		ret_val = TRUE;
+
+#ifdef UNIV_DO_FLUSH
+		if (slot->type == OS_FILE_WRITE
+				&& !os_do_not_call_flush_at_each_write) {
+			//printf("calling os_file_flush\n");
+		         ut_a(TRUE == os_file_flush(slot->file));
+		}
+#endif /* UNIV_DO_FLUSH */
+	} else {
+		errno = -slot->res2;
+		os_file_handle_error(slot->name, "Linux aio");
+		
+		ret_val = FALSE;
+	}		  
+
+	os_mutex_exit(array->mutex);
+
+	os_aio_array_free_slot(array, slot);
+	
+	return(ret_val);
+}
+
+/**************************************************************************
+This function is only used in Linux native asynchronous i/o. Event handler
+for a linux native asynchronous i/o. */
+
+void*
+io_linuxaio_event_handler (
+/*=======================*/
+	void*	arg)
+{
+	ulint	no;
+	ulint	i, j;
+	long 	ret;
+	long	res;
+	long	res2;
+
+	os_aio_array_t 	*io_array = NULL;
+	os_aio_slot_t 	*io_slot = NULL;
+	struct iocb 	*control = NULL;
+	struct timespec	timeout;
+	struct io_event	events[LINUX_AIO_NUM_EVENTS];
+
+	no = (ulint)arg;
+
+	/* fprintf(stderr, "Linux aio event handler thread %lu starts, id %lu\n", no,
+			  os_thread_pf(os_thread_get_curr_id())); */
+
+	for (i = 0;; i++) {
+getevents:
+		memset(events, 0, sizeof(events));
+		timeout.tv_sec = 0;
+		timeout.tv_nsec = 500000;
+		ret = io_getevents(os_aio_ctx, 1, LINUX_AIO_NUM_EVENTS,
+				   events, &timeout);
+
+		if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)
+			os_thread_exit(NULL);
+
+		if (ret == -EAGAIN)
+			goto getevents;
+
+		if (ret == -EINTR) 
+			goto getevents;
+
+		if (ret == 0)
+			goto getevents;
+
+		ut_a(ret > 0);
+
+		for (j = 0; j < ret; j++) {
+			res = (long) events[j].res;
+			res2 = (long) events[j].res2;
+
+			control = (struct iocb *) (unsigned long) events[j].obj;
+
+			ut_a(control != NULL);
+
+			io_slot = (os_aio_slot_t *) events[j].data;
+
+			ut_a(io_slot != NULL);
+			if (io_slot == NULL) {
+				printf("Slot is NULL!\n");
+				ut_error;
+			}
+
+			io_array = io_slot->array;
+			ut_ad(io_array != NULL);
+
+			ut_ad(io_slot->reserved);
+
+			os_mutex_enter(io_array->mutex);
+			io_slot->res = res;
+			io_slot->res2 = res2;
+			io_slot->io_already_done = TRUE;
+			os_event_set(io_slot->event);
+			os_event_set(io_array->event);
+			os_mutex_exit(io_array->mutex);
+		}
+	}
+
+	/* We count the number of threads in os_thread_exit(). A created
+	thread should always use that to exit and not use return() to exit.
+	The thread actually never comes here because it is exited in an
+	os_event_wait(). */
+
+	os_thread_exit(NULL);
+
+	return(NULL);
+}
+#endif /* LINUX_NATIVE_AIO */
+
 /***********************************************************************
 Requests an asynchronous i/o operation. */
 
@@ -3311,6 +3579,14 @@
 	struct fil_node_struct * dummy_mess1;
 	void*		dummy_mess2;
 	ulint		dummy_type;
+#elif defined(LINUX_NATIVE_AIO)
+	long long	aio_offset;
+	long		ret;
+	ibool		retval;
+	void*		dummy_mess1;
+	void*		dummy_mess2;
+	ulint		dummy_type;
+	struct iocb*	iocb = NULL;
 #endif
 	ulint		err		= 0;
 	ibool		retry;
@@ -3327,7 +3603,7 @@
 	mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
 	
 	if (mode == OS_AIO_SYNC
-#ifdef WIN_ASYNC_IO
+#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
 				&& !os_aio_use_native_aio
 #endif
 	) {
@@ -3387,6 +3663,30 @@
 			slot->control.aio_lio_opcode = LIO_READ;
 			err = (ulint) aio_read(&(slot->control));
 			fprintf(stderr, "Starting POSIX aio read %lu\n", err);
+#elif defined(LINUX_NATIVE_AIO)
+			os_n_file_reads++;
+			os_bytes_read_since_printout += n;
+
+			aio_offset = offset_high;
+			aio_offset <<= 32;
+			aio_offset += offset;
+			iocb = &(slot->control);
+			io_prep_pread(iocb, file, buf, n, aio_offset);
+			iocb->data = (void *) slot;
+			/* fprintf(stderr, "Starting Linux native aio read\n"); */
+
+readagain:
+			ret = io_submit(os_aio_ctx, 1, &iocb);
+
+			if (ret == -EINTR)
+				goto writeagain;
+
+			if (ret == -EAGAIN) {
+				os_thread_sleep(20);
+				goto writeagain;
+			}
+
+			ut_ad(ret == 1);
 #endif
 		} else {
 			if (!wake_later) {
@@ -3401,9 +3701,31 @@
 			ret = WriteFile(file, buf, (DWORD)n, &len,
 							&(slot->control));
 #elif defined(POSIX_ASYNC_IO)
+			os_n_file_writes++;
 			slot->control.aio_lio_opcode = LIO_WRITE;
 			err = (ulint) aio_write(&(slot->control));
 			fprintf(stderr, "Starting POSIX aio write %lu\n", err);
+#elif defined(LINUX_NATIVE_AIO)
+			aio_offset = offset_high;
+			aio_offset <<= 32;
+			aio_offset += offset;
+			iocb = &(slot->control);
+			io_prep_pwrite(iocb, file, buf, n, aio_offset);
+			iocb->data = (void *) slot;
+			/* fprintf(stderr, "Starting Linux native aio write\n"); */
+
+writeagain:
+			ret = io_submit(os_aio_ctx, 1, &iocb);
+
+			if (ret == -EINTR)
+				goto writeagain;
+
+			if (ret == -EAGAIN) {
+				os_thread_sleep(20);
+				goto writeagain;
+			}
+
+			ut_ad(ret == 1);
 #endif
 		} else {
 			if (!wake_later) {
@@ -3432,6 +3754,28 @@
 					&dummy_type);
 
 			    return(retval);
+	    		}
+
+			return(TRUE);
+		}
+
+		err = 1; /* Fall through the next if */
+	}
+#elif defined(LINUX_NATIVE_AIO)
+	if (os_aio_use_native_aio) {
+		if (ret == 1) {
+
+	    		if (mode == OS_AIO_SYNC) {
+	    		
+				retval = os_aio_linuxaio_handle(
+						ULINT_UNDEFINED,
+						slot->pos,
+						&dummy_mess1, &dummy_mess2,
+						&dummy_type); 
+
+				/* fprintf(stderr, "Linux native aio handle\n"); */
+
+				return(retval);
 	    		}
 
 			return(TRUE);

--- 1.93/storage/innobase/srv/srv0srv.c	2005-08-25 19:49:45 +03:00
+++ 1.94/storage/innobase/srv/srv0srv.c	2005-10-07 09:53:37 +03:00
@@ -29,6 +29,7 @@
 
 #include "ut0mem.h"
 #include "os0proc.h"
+#include "os0file.h"
 #include "mem0mem.h"
 #include "mem0pool.h"
 #include "sync0sync.h"

--- 1.86/storage/innobase/srv/srv0start.c	2005-08-25 19:49:45 +03:00
+++ 1.87/storage/innobase/srv/srv0start.c	2005-10-07 09:53:37 +03:00
@@ -87,8 +87,8 @@
 static mutex_t		ios_mutex;
 static ulint		ios;
 
-static ulint		n[SRV_MAX_N_IO_THREADS + 5];
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 5];
+static ulint		n[SRV_MAX_N_IO_THREADS + 6];
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6];
 
 /* We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1083,6 +1083,11 @@
 		srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
 
 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+
+#ifdef LINUX_NATIVE_AIO
+		os_aio_use_native_aio = TRUE;
+#endif
+
 #ifndef __WIN__        
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fdatasync")) {
 	  	srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
@@ -1093,6 +1098,10 @@
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
 	  	srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
 
+#ifdef LINUX_NATIVE_AIO
+		os_aio_use_native_aio = TRUE;
+#endif
+
 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
 	  	srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
 
@@ -1239,6 +1248,10 @@
 
 		os_thread_create(io_handler_thread, n + i, thread_ids + i);
     	}
+
+#ifdef LINUX_NATIVE_AIO
+	os_thread_create(io_linuxaio_event_handler, (void *) 0, thread_ids + i);
+#endif
 
 #ifdef UNIV_LOG_ARCHIVE
 	if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
Thread
bk commit into 5.1 tree (jan:1.1912)Jan Lindstrom7 Oct