Thanks everyone who gave me comments on the previous patch.
I have improved that patch to further utilize fallocate when creating
new InnoDB tables.
This time, in our application, the patched MySQL takes about the half in
creating tables compared to the original. We used MySQL 5.5.29 with ext4
filesystem on Ubuntu 12.04.
Some numbers:
Tables in our application: 1285
Original MySQL 5.5.29: 65.5 seconds (5 times avg.)
Patched MySQL 5.5.29: 33.9 seconds (5 times avg.)
Any comments or suggestions?
Thanks,
Toshikuni Fukaya
> Hi Sergei,
>
> On 12/28/12 12:50, Sergei Golubchik wrote:
>> Hi, Sveta!
>>
>> On Dec 27, Sveta Smirnova wrote:
>>> Dear Toshikuni,
>>>
>>>>> I made a patch for MySQL 5.5.28 to speed up InnoDB table space
>>>>> creation. I noticed that there are slow I/Os (consisting of many
>>>>> zero-fill writes and syncs) when creating tables.
>>>>>
>>>>> Linux kernels have the fallocate(2) system call, which guarantees
>>>>> that a given file region is zeroed and yields a performance
>>>>> improvement on supporting file systems.
>>>>>
>>>>> I measured a 10% speed up in table space creation in our
>>>>> application.
>>>>>
>>>>> Could you please accept my patch? If there is anything further I
>>>>> should do for this patch, please advise me.
>>>
>>> We evaluated this option internally and, unfortunately, results were not
>>> conclusive, so we decided not to implement the patch.
>>
>> And why is that? What were the opinions? What were the arguments against
>> this patch?
>
> I am not sure how much can I say, because Oracle forbids its employees
> to distribute such discussions.
>
> But in short: there are cases which this patch does not cover. Actual
> patch should be more complicated.
>
> Sveta.
>
>> Regards,
>> Sergei
>
diff -Naru mysql-5.5.29.orig/config.h.cmake mysql-5.5.29/config.h.cmake
--- mysql-5.5.29.orig/config.h.cmake 2012-12-10 15:16:24.000000000 +0900
+++ mysql-5.5.29/config.h.cmake 2013-01-16 11:38:02.983547729 +0900
@@ -206,6 +206,7 @@
#cmakedefine HAVE_POLL 1
#cmakedefine HAVE_PORT_CREATE 1
#cmakedefine HAVE_POSIX_FALLOCATE 1
+#cmakedefine HAVE_FALLOCATE 1
#cmakedefine HAVE_PREAD 1
#cmakedefine HAVE_PAUSE_INSTRUCTION 1
#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1
diff -Naru mysql-5.5.29.orig/configure.cmake mysql-5.5.29/configure.cmake
--- mysql-5.5.29.orig/configure.cmake 2012-12-10 15:16:24.000000000 +0900
+++ mysql-5.5.29/configure.cmake 2013-01-16 11:38:02.983547729 +0900
@@ -387,6 +387,7 @@
CHECK_FUNCTION_EXISTS (poll HAVE_POLL)
CHECK_FUNCTION_EXISTS (port_create HAVE_PORT_CREATE)
CHECK_FUNCTION_EXISTS (posix_fallocate HAVE_POSIX_FALLOCATE)
+CHECK_FUNCTION_EXISTS (fallocate HAVE_FALLOCATE)
CHECK_FUNCTION_EXISTS (pread HAVE_PREAD)
CHECK_FUNCTION_EXISTS (pthread_attr_create HAVE_PTHREAD_ATTR_CREATE)
CHECK_FUNCTION_EXISTS (pthread_attr_getstacksize HAVE_PTHREAD_ATTR_GETSTACKSIZE)
diff -Naru mysql-5.5.29.orig/storage/innobase/fil/fil0fil.c
mysql-5.5.29/storage/innobase/fil/fil0fil.c
--- mysql-5.5.29.orig/storage/innobase/fil/fil0fil.c 2012-12-10 15:16:24.000000000 +0900
+++ mysql-5.5.29/storage/innobase/fil/fil0fil.c 2013-01-16 11:38:03.127545918 +0900
@@ -3961,6 +3961,7 @@
ulint offset_low;
ulint page_size;
ibool success = TRUE;
+ ibool fallenback = TRUE;
fil_mutex_enter_and_prepare_for_io(space_id);
@@ -3989,6 +3990,31 @@
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+#ifdef HAVE_FALLOCATE
+ {
+ ulint n_pages = size_after_extend - start_page_no;
+ if(fallocate(node->handle, 0, node->size * page_size,
+ n_pages * page_size) == 0) {
+ node->size += n_pages;
+ space->size += n_pages;
+ os_has_said_disk_full = FALSE;
+ fallenback = FALSE;
+ goto extend_after;
+ } else if (errno != ENOSYS && errno != EOPNOTSUPP) {
+ n_pages = ((ulint)
+ (os_file_get_size_as_iblonglong(
+ node->handle)
+ / page_size)) - node->size;
+
+ node->size += n_pages;
+ space->size += n_pages;
+ fallenback = FALSE;
+ goto extend_after;
+ }
+ /* fall back if filesystem does not support fallocate */
+ }
+#endif
+
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = mem_alloc(buf_size + page_size);
@@ -4041,7 +4067,8 @@
mem_free(buf2);
- fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
+extend_after:
+ fil_node_complete_io(node, fil_system, fallenback ? OS_FILE_WRITE : OS_FILE_READ);
*actual_size = space->size;
diff -Naru mysql-5.5.29.orig/storage/innobase/os/os0file.c
mysql-5.5.29/storage/innobase/os/os0file.c
--- mysql-5.5.29.orig/storage/innobase/os/os0file.c 2012-12-10 15:16:24.000000000 +0900
+++ mysql-5.5.29/storage/innobase/os/os0file.c 2013-01-16 11:38:03.111546119 +0900
@@ -1970,6 +1970,15 @@
current_size = 0;
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
+#ifdef HAVE_FALLOCATE
+ if (fallocate(file, 0, 0, desired_size) == 0) {
+ return (TRUE);
+ } else if (errno != ENOSYS && errno != EOPNOTSUPP) {
+ goto error_handling;
+ }
+ /* fall back if filesystem does not support fallocate */
+#endif
+
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;