#At file:///data1/ymatsunobu/prealloc/mysql-next-mr-prealloc/ based on revid:alik@strippedgycfjcc
3165 Yoshinori Matsunobu 2010-07-12
WL#4925: Preallocating binary log for improving write performance
Patch 1/3: Server patch
1. Problems
sync_binlog=1 is sometimes required for critical applications. If the sync_binlog is not set to 1, there is a risk of corruption of the binary log if the server crashes. But, in many filesystems/OS, sync_binlog=1 is currently way too slow (sometimes 2-4 times slower than set to 0).
Currently binlog file is not pre-allocated, but extending per each transaction commit. In contrast, InnoDB log file is pre-allocated and file size is not changed. In other words, binlog adopts “appending” architecture, InnoDB log adopts “overwriting” architecture.
For most of operating systems/file systems (including Solaris ufs and Linux ext3), overwriting is much faster than appending. So binlog should pre-allocate large file size (e.g. 128MB at once), then overwriting per commit.
2. Solutions
This patch consists of the followings.
2.1 Server patch (sql/*)
- Introducing a system variable "binlog-preallocate=N" to handle preallocated binlog files
- Introducing a member variable MYSQL_BIN_LOG::actual_size to maintain current actual binlog size. This is required because currently some commands/tools (i.e. BINLOG DUMP thread, mysqlbinlog, SHOW BINLOG EVENTS) read until EOF of binary logs. Introducing actual_size makes it possible for them to read up to actual size, not EOF.
- Introducing a member function MYSQL_BIN_LOG::new_log_name_from_index() so that mysqld can open preallocated binary logs
- Adding "pthread_mutex_t* log_lock" for open_binlog() function's arguments. The purpose is to make sure that LOCK_log is acquired within this function
- All components/commands (BINLOG DUMP command, SHOW MASTER STATUS, SHOW BINLOG EVENTS, mysqlbinlog, etc) work without any problem
- Preallocation is supported only on binary logs, not relay logs
- The default behavior is kept as it is (for compatibility)
More detailed information is written here.
http://forge.mysql.com/worklog/task.php?id=4925
2.2 Client tool mysqlbinlogalloc (client/*)
- Introducing a new command line tool "mysqlbinlogalloc". This generates preallocated binary log files. Typical usage is 1. preallocating many binary log files by mysqlbinlogalloc, 2. starting mysqld with --binlog-preallocate=1 and --sync-binlog=1. This makes it possible to remove automatic binlog preallocation overheads, which is significant for filesystems that does not support posix_fallocate()
2.3 mysql-test test cases (mysql-test/*)
- To make sure that all components/commands (BINLOG DUMP command, SHOW MASTER STATUS, SHOW BINLOG EVENTS, mysqlbinlog, etc) work without any problem
- Test cases include concurrent programs (concurrent FLUSH LOGS, concurrent writing high volume of data to rotate binary logs) by mysqlslap
3. Results of the patch
Here is a result of this preallocating binlog patch. mysqlslap insert throughput (InnoDB)
1) preallocate (by mysqlbinlogalloc), sync_binlog=1
1 conn: 3,707 commits/sec
20 conn: 7,696 commits/sec
50 conn: 8,156 commits/sec
2) normal, sync_binlog=1
1 conn: 359 commits/sec
20 conn: 798 commits/sec
50 conn: 760 commits/sec
The difference between 1) and 2) is huge (10 times faster) so preallocating binlog is really helpful for people using sync_binlog=1 (no outstanding difference when using sync_binlog=0).
modified:
include/my_sys.h
include/mysql/psi/mysql_file.h
mysys/my_winfile.c
mysys/my_write.c
mysys/mysys_priv.h
sql/binlog.cc
sql/binlog.h
sql/log.cc
sql/log.h
sql/log_event.cc
sql/mysqld.cc
sql/mysqld.h
sql/repl_failsafe.cc
sql/rpl_master.cc
sql/rpl_rli.cc
sql/rpl_slave.cc
sql/sys_vars.cc
=== modified file 'include/my_sys.h'
--- a/include/my_sys.h 2010-06-19 07:50:33 +0000
+++ b/include/my_sys.h 2010-07-12 02:39:55 +0000
@@ -635,6 +635,8 @@ extern my_off_t my_seek(File fd,my_off_t
extern my_off_t my_tell(File fd,myf MyFlags);
extern size_t my_write(File Filedes,const uchar *Buffer,size_t Count,
myf MyFlags);
+extern int my_fallocate(File fd, ulong alloc_size);
+extern int my_zerofill(File fd, ulong alloc_size);
extern size_t my_pwrite(File Filedes,const uchar *Buffer,size_t Count,
my_off_t offset,myf MyFlags);
extern size_t my_fread(FILE *stream,uchar *Buffer,size_t Count,myf MyFlags);
=== modified file 'include/mysql/psi/mysql_file.h'
--- a/include/mysql/psi/mysql_file.h 2010-03-31 14:05:33 +0000
+++ b/include/mysql/psi/mysql_file.h 2010-07-12 02:39:55 +0000
@@ -474,6 +474,18 @@
#endif
/**
+ @def mysql_file_allocate(FD, SIZE)
+ Instrumented large file allocation.
+*/
+#ifdef HAVE_PSI_INTERFACE
+ #if defined (HAVE_POSIX_FALLOCATE) || defined(__WIN__)
+ #define mysql_file_allocate(FD, SIZE) my_fallocate(FD, SIZE)
+ #else
+ #define mysql_file_allocate(FD, SIZE) my_zerofill(FD, SIZE)
+ #endif
+#endif
+
+/**
An instrumented FILE structure.
@sa MYSQL_FILE
*/
=== modified file 'mysys/my_winfile.c'
--- a/mysys/my_winfile.c 2009-12-10 03:19:51 +0000
+++ b/mysys/my_winfile.c 2010-07-12 02:39:55 +0000
@@ -669,4 +669,15 @@ int my_win_dup(File fd)
DBUG_RETURN(-1);
}
+
+int my_win_fallocate(File fd, ulong alloc_size)
+{
+ LARGE_INTEGER length;
+ HANDLE hFile= (HANDLE) my_get_osfhandle(fd);
+ length.QuadPart= alloc_size;
+ if (!SetFilePointerEx(hFile, length , NULL , FILE_BEGIN)
+ || !SetEndOfFile(hFile))
+ return -1;
+ return 0;
+}
#endif /*_WIN32*/
=== modified file 'mysys/my_write.c'
--- a/mysys/my_write.c 2009-09-11 20:26:35 +0000
+++ b/mysys/my_write.c 2010-07-12 02:39:55 +0000
@@ -99,3 +99,28 @@ size_t my_write(File Filedes, const ucha
DBUG_RETURN(0); /* Want only errors */
DBUG_RETURN(writtenbytes+written);
} /* my_write */
+
+
+int my_fallocate(File fd, ulong alloc_size)
+{
+#if defined (__WIN__)
+ if (my_win_fallocate(fd, alloc_size))
+#else
+ if (posix_fallocate(fd, 0L, alloc_size))
+#endif
+ return -1;
+ return 0;
+}
+
+
+int my_zerofill(File fd, ulong alloc_size)
+{
+ uchar buf[IO_SIZE];
+ uint i;
+ memset(buf,0,IO_SIZE);
+ for (i= 0; i < alloc_size / IO_SIZE; i++){
+ if (mysql_file_write(fd, buf, IO_SIZE, MYF(MY_WME | MY_NABP)))
+ return -1;
+ }
+ return 0;
+}
=== modified file 'mysys/mysys_priv.h'
--- a/mysys/mysys_priv.h 2010-01-11 16:46:16 +0000
+++ b/mysys/mysys_priv.h 2010-07-12 02:39:55 +0000
@@ -106,6 +106,7 @@ extern int my_win_stat(const char *
extern int my_win_fstat(File fd, struct _stati64 *buf);
extern int my_win_fsync(File fd);
extern File my_win_dup(File fd);
+extern int my_win_fallocate(File fd, ulong alloc_size);
extern File my_win_sopen(const char *path, int oflag, int shflag, int perm);
extern File my_open_osfhandle(HANDLE handle, int oflag);
#endif
=== modified file 'sql/binlog.cc'
--- a/sql/binlog.cc 2010-06-22 12:58:10 +0000
+++ b/sql/binlog.cc 2010-07-12 02:39:55 +0000
@@ -820,7 +820,8 @@ int check_binlog_magic(IO_CACHE* log, co
}
-File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
+File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg,
+ mysql_mutex_t* log_lock)
{
File file;
DBUG_ENTER("open_binlog");
@@ -842,6 +843,22 @@ File open_binlog(IO_CACHE *log, const ch
*errmsg = "Could not open log file";
goto err;
}
+#ifndef MYSQL_CLIENT
+ if (binlog_preallocate)
+ {
+ log->file_name= (char*)log_file_name;
+ if (log_lock)
+ mysql_mutex_lock(log_lock);
+ if (mysql_bin_log.is_active(log_file_name))
+ {
+ mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
+ log->end_of_file= mysql_bin_log.actual_size;
+ DBUG_ASSERT(log->end_of_file >= BIN_LOG_HEADER_SIZE);
+ }
+ if (log_lock)
+ mysql_mutex_unlock(log_lock);
+ }
+#endif
if (check_binlog_magic(log,errmsg))
goto err;
DBUG_RETURN(file);
@@ -1201,7 +1218,8 @@ bool show_binlog_events(THD *thd, MYSQL_
thd->current_linfo = &linfo;
mysql_mutex_unlock(&LOCK_thread_count);
- if ((file=open_binlog(&log, linfo.log_file_name, &errmsg)) < 0)
+ if ((file=open_binlog(&log, linfo.log_file_name, &errmsg,
+ mysql_bin_log.get_log_lock())) < 0)
goto err;
/*
@@ -1280,8 +1298,20 @@ err:
}
if (errmsg)
- my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
- "SHOW BINLOG EVENTS", errmsg);
+ {
+ if (binlog_preallocate && !strcmp(errmsg, "Wrong offset or I/O error"))
+ {
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_ERROR_WHEN_EXECUTING_COMMAND,
+ "Wrong offset or I/O error. It was likely that "
+ "preallocated binlog file was not closed "
+ "properly (i.e. mysqld crash).");
+ my_eof(thd);
+ }
+ else
+ my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
+ "SHOW BINLOG EVENTS", errmsg);
+ }
else
my_eof(thd);
@@ -1331,7 +1361,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_
need_start_event(TRUE),
sync_period_ptr(sync_period),
is_relay_log(0), signal_cnt(0),
- description_event_for_exec(0), description_event_for_queue(0)
+ description_event_for_exec(0), description_event_for_queue(0), actual_size(0)
{
/*
We don't want to initialize locks here as such initialization depends on
@@ -1352,7 +1382,9 @@ void MYSQL_BIN_LOG::cleanup()
if (inited)
{
inited= 0;
+ mysql_mutex_lock(&LOCK_log);
close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
+ mysql_mutex_unlock(&LOCK_log);
delete description_event_for_queue;
delete description_event_for_exec;
mysql_mutex_destroy(&LOCK_log);
@@ -1450,6 +1482,83 @@ bool MYSQL_BIN_LOG::open_index_file(cons
}
+int MYSQL_BIN_LOG::new_log_name_from_index(char* new_log_name,
+ const char* opt_name, bool need_lock)
+{
+ LOG_INFO log_info;
+ int error= 1;
+ ulong binlog_suffix;
+ bool no_entry_in_index= true;
+ char buff[FN_REFLEN];
+ DBUG_ASSERT(opt_name && opt_name[0]);
+
+ while (!(error= find_next_log(&log_info, need_lock)))
+ {
+ no_entry_in_index= false;
+ strmake(new_log_name, log_info.log_file_name, FN_REFLEN - 1);
+ }
+
+ if (error != LOG_INFO_EOF)
+ {
+ sql_print_error("find_log_pos() failed (error: %d)", error);
+ goto err;
+ }
+ fn_format(buff, opt_name, mysql_data_home, "", 4);
+ if (no_entry_in_index)
+ {
+ binlog_suffix= 1;
+ sprintf(new_log_name, "%s.%06ld", buff, binlog_suffix);
+ }
+ else if (test_if_number(new_log_name + strlen(buff) + 1,
+ &binlog_suffix, 0))
+ {
+ binlog_suffix++;
+ sprintf(new_log_name + strlen(buff) + 1, "%06ld", binlog_suffix);
+ }
+ else
+ goto err;
+
+ return 0;
+err:
+ return error;
+}
+
+
+int MYSQL_BIN_LOG::preallocate(File fd)
+{
+ if (mysql_file_allocate(fd, max_binlog_size) &&
+ mysql_file_sync(fd, MYF(MY_WME)))
+ goto err;
+ return 0;
+err:
+ return -1;
+}
+
+
+/**
+ Create a new preallocated binary log
+
+ @retval
+ 0 ok
+ @retval
+ 1 error happens when creating file
+ 2 error happens when preallocating file
+*/
+int MYSQL_BIN_LOG::create_preallocate(char* new_log_path)
+{
+ int fd, err=0;
+ if ((fd= my_open(new_log_path, O_CREAT | O_WRONLY,
+ MYF(MY_WME))) >= 0)
+ {
+ if (preallocate(fd))
+ err=2;
+ my_close(fd, MYF(0));
+ } else
+ err=1;
+ return err;
+}
+
+
/**
Open a (new) binlog file.
@@ -1478,11 +1587,18 @@ bool MYSQL_BIN_LOG::open(const char *log
DBUG_ENTER("MYSQL_BIN_LOG::open");
DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
- if (init_and_set_log_file_name(log_name, new_name, log_type_arg,
- io_cache_type_arg))
+ if (!binlog_preallocate)
{
- sql_print_error("MSYQL_BIN_LOG::open failed to generate new file name.");
- DBUG_RETURN(1);
+ if (init_and_set_log_file_name(log_name, new_name, log_type_arg,
+ io_cache_type_arg))
+ {
+ sql_print_error("MSYQL_BIN_LOG::open failed to generate new file name.");
+ DBUG_RETURN(1);
+ }
+ } else if (!new_name)
+ {
+ new_log_name_from_index(log_file_name, log_name, need_mutex);
+ new_name= log_file_name;
}
#ifdef HAVE_REPLICATION
@@ -1500,8 +1616,7 @@ bool MYSQL_BIN_LOG::open(const char *log
write_error= 0;
/* open the main log file */
- if (MYSQL_LOG::open(log_name, log_type_arg, new_name,
- io_cache_type_arg))
+ if (MYSQL_LOG::open(log_name, log_type_arg, new_name, io_cache_type_arg))
{
#ifdef HAVE_REPLICATION
close_purge_index_file();
@@ -1515,6 +1630,39 @@ bool MYSQL_BIN_LOG::open(const char *log
DBUG_ASSERT(log_type == LOG_BIN);
+ if ((binlog_preallocate && io_cache_type_arg == WRITE_CACHE))
+ {
+ /* Preallocating binlog if newly activated binlog size is zero */
+ if (!my_seek(log_file.file, 0L, MY_SEEK_END, MYF(0)))
+ {
+ ulong i= 0;
+ char next_log_name[FN_REFLEN];
+ char log_name_buff[FN_REFLEN];
+ ulong binlog_suffix= 0;
+
+ preallocate(log_file.file);
+ /* If binlog_preallocate >= 2, creating and preallocating
+ (binlog_preallocate - 1) files here (if not exists) */
+ fn_format(log_name_buff, log_name, mysql_data_home, "", 4);
+ if (test_if_number((new_name)
+ + strlen(log_name_buff) + 1, &binlog_suffix, 0))
+ {
+ for (i=0; i< binlog_preallocate - 1; i++)
+ {
+ binlog_suffix++;
+ sprintf(next_log_name, "%s.%06ld", log_name_buff, binlog_suffix);
+ MY_STAT s;
+ if (!my_stat(next_log_name, &s, MYF(0)))
+ {
+ create_preallocate(next_log_name);
+ }
+ }
+ }
+ }
+ if (my_seek(log_file.file, 0L, MY_SEEK_SET, MYF(0)))
+ goto err;
+ }
+
{
bool write_file_name_to_index_file=0;
@@ -1588,6 +1736,8 @@ bool MYSQL_BIN_LOG::open(const char *log
mysql_file_sync(log_file.file, MYF(MY_WME)))
goto err;
+ actual_size= log_file.pos_in_file;
+
if (write_file_name_to_index_file)
{
#ifdef HAVE_REPLICATION
@@ -2686,13 +2836,22 @@ void MYSQL_BIN_LOG::new_file_impl(bool n
/* Reuse old name if not binlog and not update log */
new_name_ptr= name;
- /*
- If user hasn't specified an extension, generate a new log name
- We have to do this here and not in open as we want to store the
- new file name in the current binary log file.
- */
- if (generate_new_name(new_name, name))
- goto end;
+ if (binlog_preallocate)
+ {
+ if (new_log_name_from_index(new_name, name, 0))
+ goto end;
+ }
+ else
+ {
+ /*
+ If user hasn't specified an extension, generate a new log name
+ We have to do this here and not in open as we want to store the
+ new file name in the current binary log file.
+ */
+ if (generate_new_name(new_name, name))
+ goto end;
+ }
+
new_name_ptr=new_name;
if (log_type == LOG_BIN)
@@ -3092,6 +3251,7 @@ err:
if ((error= flush_and_sync(&synced)))
goto unlock;
+ actual_size= event_info->log_pos;
if ((error= RUN_HOOK(binlog_storage, after_flush,
(thd, log_file_name, file->pos_in_file, synced))))
{
@@ -3318,6 +3478,7 @@ bool MYSQL_BIN_LOG::write_incident(THD *
signal_update();
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+ actual_size= ev.log_pos;
mysql_mutex_unlock(&LOCK_log);
}
DBUG_RETURN(error);
@@ -3391,6 +3552,7 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_C
bool synced= 0;
if (flush_and_sync(&synced))
goto err;
+ actual_size= commit_event->log_pos;
DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_ABORT(););
if (cache->error) // Error on read
{
@@ -3546,6 +3708,17 @@ void MYSQL_BIN_LOG::close(uint exiting)
original position on system that doesn't support pwrite().
*/
mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
+
+ end_io_cache(&log_file);
+ DBUG_ASSERT(is_active(log_file_name));
+ mysql_mutex_assert_owner(&LOCK_log);
+ actual_size= log_file.pos_in_file;
+ if (binlog_preallocate && my_chsize(log_file.file,
+ log_file.pos_in_file, 0, MYF(MY_WME)))
+ {
+ write_error= 1;
+ sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ }
}
/* this will cleanup IO_CACHE, sync and close the file */
@@ -3666,7 +3839,7 @@ int MYSQL_BIN_LOG::open(const char *opt_
goto err;
}
- if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
+ if ((file= open_binlog(&log, log_name, &errmsg, get_log_lock())) < 0)
{
sql_print_error("%s", errmsg);
goto err;
=== modified file 'sql/binlog.h'
--- a/sql/binlog.h 2010-06-22 06:03:00 +0000
+++ b/sql/binlog.h 2010-07-12 02:39:55 +0000
@@ -234,6 +234,12 @@ public:
inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);}
inline IO_CACHE *get_index_file() { return &index_file;}
inline uint32 get_open_count() { return open_count; }
+
+ ulonglong actual_size;
+ int preallocate(File fd);
+ int create_preallocate(char* new_log_path);
+ int new_log_name_from_index(char* new_binlog_name,
+ const char* opt_name, bool need_lock);
};
typedef struct st_load_file_info
@@ -254,7 +260,7 @@ bool stmt_has_updated_non_trans_table(co
int log_loaded_block(IO_CACHE* file);
File open_binlog(IO_CACHE *log, const char *log_file_name,
- const char **errmsg);
+ const char **errmsg, mysql_mutex_t* log_lock);
int check_binlog_magic(IO_CACHE* log, const char** errmsg);
bool purge_master_logs(THD* thd, const char* to_log);
bool purge_master_logs_before_date(THD* thd, time_t purge_time);
=== modified file 'sql/log.cc'
--- a/sql/log.cc 2010-06-23 09:56:24 +0000
+++ b/sql/log.cc 2010-07-12 02:39:55 +0000
@@ -37,7 +37,6 @@
#include <my_dir.h>
#include <stdarg.h>
-#include <m_ctype.h> // For test_if_number
#ifdef _WIN32
#include "message.h"
@@ -49,8 +48,6 @@
LOGGER logger;
-static bool test_if_number(const char *str,
- ulong *res, bool allow_wildcards);
/**
purge logs, master and slave sides both, related error code
@@ -1949,8 +1946,8 @@ bool general_log_write(THD *thd, enum en
0 String is not a number
*/
-static bool test_if_number(register const char *str,
- ulong *res, bool allow_wildcards)
+bool test_if_number(register const char *str,
+ ulong *res, bool allow_wildcards)
{
reg2 int flag;
const char *start;
=== modified file 'sql/log.h'
--- a/sql/log.h 2010-06-23 09:56:24 +0000
+++ b/sql/log.h 2010-07-12 02:39:55 +0000
@@ -18,6 +18,11 @@
#include "unireg.h" // REQUIRED: for other includes
#include "handler.h" /* my_xid */
+#include <m_ctype.h> // For test_if_number
+
+
+bool test_if_number(const char *str,
+ ulong *res, bool allow_wildcards);
/*
Transaction Coordinator log - a base abstract class
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc 2010-06-22 12:58:10 +0000
+++ b/sql/log_event.cc 2010-07-12 02:39:55 +0000
@@ -977,6 +977,21 @@ int Log_event::read_log_event(IO_CACHE*
if (log_lock)
mysql_mutex_lock(log_lock);
+
+ /* mysql_bin_log.is_active() might be true if binlog dump thread
+ reads currently active binlog. If the binlog is preallocated,
+ binlog dump thread should not read active binlog until EOF,
+ but should read until mysql_bin_log.actual_size. By updating
+ file->end_of_file to mysql_bin_log.actual_size, my_b_read() reads
+ until mysql_bin_log.actual_size then detects as EOF. */
+ if (binlog_preallocate && file->file_name && file->type == READ_CACHE)
+ {
+ if (mysql_bin_log.is_active(file->file_name))
+ file->end_of_file= mysql_bin_log.actual_size;
+ else
+ file->end_of_file= ~(my_off_t) 0;
+ }
+
if (my_b_read(file, (uchar*) buf, sizeof(buf)))
{
/*
@@ -1078,6 +1093,23 @@ Log_event* Log_event::read_log_event(IO_
LOCK_MUTEX;
DBUG_PRINT("info", ("my_b_tell: %lu", (ulong) my_b_tell(file)));
+
+ /* mysql_bin_log.is_active() might be true if SHOW BINLOG EVENTS
+ reads currently active binlog. If the binlog is preallocated,
+ SHOW BINLOG EVENTS should not read active binlog until EOF,
+ but should read until mysql_bin_log.actual_size. By updating
+ file->end_of_file to mysql_bin_log.actual_size, my_b_read() reads
+ until mysql_bin_log.actual_size then detects as EOF. */
+#ifndef MYSQL_CLIENT
+ if (binlog_preallocate && file->file_name && file->type == READ_CACHE)
+ {
+ if (mysql_bin_log.is_active(file->file_name))
+ file->end_of_file= mysql_bin_log.actual_size;
+ else
+ file->end_of_file= ~(my_off_t) 0;
+ }
+#endif
+
if (my_b_read(file, (uchar *) head, header_size))
{
DBUG_PRINT("info", ("Log_event::read_log_event(IO_CACHE*,Format_desc*) \
@@ -1136,6 +1168,9 @@ err:
sql_print_error("Error in Log_event::read_log_event(): "
"'%s', data_len: %lu, event_type: %d",
error,data_len,head[EVENT_TYPE_OFFSET]);
+ sql_print_error("If binlog_preallocate was 1 or higher, "
+ "it was likely that preallocated binlog file "
+ "was not closed properly (i.e. mysqld crash).");
my_free(buf, MYF(MY_ALLOW_ZERO_PTR));
/*
The SQL slave thread will check if file->error<0 to know
=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc 2010-06-23 09:56:24 +0000
+++ b/sql/mysqld.cc 2010-07-12 02:39:55 +0000
@@ -507,6 +507,7 @@ uint slave_exec_mode_options;
ulonglong slave_type_conversions_options;
ulong thread_cache_size=0, thread_pool_size= 0;
ulong binlog_cache_size=0;
+ulong binlog_preallocate=0;
ulonglong max_binlog_cache_size=0;
ulong query_cache_size=0;
ulong refresh_version; /* Increments on each reload */
=== modified file 'sql/mysqld.h'
--- a/sql/mysqld.h 2010-06-22 09:34:59 +0000
+++ b/sql/mysqld.h 2010-07-12 02:39:55 +0000
@@ -180,6 +180,7 @@ extern ulong what_to_log,flush_time;
extern ulong query_buff_size;
extern ulong max_prepared_stmt_count, prepared_stmt_count;
extern ulong binlog_cache_size, open_files_limit;
+extern ulong binlog_preallocate;
extern ulonglong max_binlog_cache_size;
extern ulong max_binlog_size, max_relay_log_size;
extern ulong opt_binlog_rows_event_max_size;
=== modified file 'sql/repl_failsafe.cc'
--- a/sql/repl_failsafe.cc 2010-06-23 09:56:24 +0000
+++ b/sql/repl_failsafe.cc 2010-07-12 02:39:55 +0000
@@ -351,7 +351,7 @@ int translate_master(THD* thd, LEX_MASTE
for (;;)
{
- if ((file=open_binlog(&log, linfo.log_file_name, &errmsg_p)) < 0)
+ if ((file=open_binlog(&log, linfo.log_file_name, &errmsg_p, NULL)) < 0)
{
strmov(errmsg, errmsg_p);
goto err;
=== modified file 'sql/rpl_master.cc'
--- a/sql/rpl_master.cc 2010-06-22 12:58:10 +0000
+++ b/sql/rpl_master.cc 2010-07-12 02:39:55 +0000
@@ -376,7 +376,8 @@ void mysql_binlog_send(THD* thd, char* l
thd->current_linfo = &linfo;
mysql_mutex_unlock(&LOCK_thread_count);
- if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0)
+ if ((file=open_binlog(&log, log_file_name, &errmsg,
+ mysql_bin_log.get_log_lock())) < 0)
{
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
goto err;
@@ -806,7 +807,8 @@ impossible position";
position. If the binlog is 5.0, the next event we are going to
read and send is Format_description_log_event.
*/
- if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
+ if ((file=open_binlog(&log, log_file_name, &errmsg,
+ mysql_bin_log.get_log_lock())) < 0 ||
fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE,
&errmsg))
{
=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc 2010-06-22 12:58:10 +0000
+++ b/sql/rpl_rli.cc 2010-07-12 02:39:55 +0000
@@ -607,7 +607,7 @@ int init_relay_log_pos(Relay_log_info* r
Open the relay log and set rli->cur_log to point at this one
*/
if ((rli->cur_log_fd=open_binlog(&rli->cache_buf,
- rli->linfo.log_file_name,errmsg)) < 0)
+ rli->linfo.log_file_name,errmsg, NULL)) < 0)
goto err;
rli->cur_log = &rli->cache_buf;
}
=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc 2010-06-23 09:56:24 +0000
+++ b/sql/rpl_slave.cc 2010-07-12 02:39:55 +0000
@@ -4696,7 +4696,7 @@ static IO_CACHE *reopen_relay_log(Relay_
IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
if ((rli->cur_log_fd=open_binlog(cur_log,rli->event_relay_log_name,
- errmsg)) <0)
+ errmsg, NULL)) <0)
DBUG_RETURN(0);
/*
We want to start exactly where we was before:
@@ -5042,7 +5042,7 @@ static Log_event* next_event(Relay_log_i
#endif
// open_binlog() will check the magic header
if ((rli->cur_log_fd=open_binlog(cur_log,rli->linfo.log_file_name,
- &errmsg)) <0)
+ &errmsg, NULL)) <0)
goto err;
}
else
=== modified file 'sql/sys_vars.cc'
--- a/sql/sys_vars.cc 2010-06-23 09:56:24 +0000
+++ b/sql/sys_vars.cc 2010-07-12 02:39:55 +0000
@@ -244,6 +244,33 @@ static Sys_var_ulong Sys_binlog_cache_si
CMD_LINE(REQUIRED_ARG),
VALID_RANGE(IO_SIZE, ULONG_MAX), DEFAULT(32768), BLOCK_SIZE(IO_SIZE));
+static Sys_var_ulong Sys_binlog_preallocate(
+ "binlog_preallocate", "This variable is used to dynamically "
+ "preallocating binlog file(s) to improve binary logging performance. "
+ "This is epecially effective if sync-binlog=1."
+ "The default value is 0, which does neither preallocate binlog "
+ "automatically nor uses preallocated binlog files(generated by "
+ "mysqlbinlogalloc). This is the as same behavior as previous versions. "
+ "If binlog_preallocate is 1, mysqld does not preallocate binlogs "
+ "automatically, but detect statically preallocated binlog files. "
+ "If binlog_preallocate >=2, mysqld works as follows when "
+ "switching binlog. (1)If new binlog file does not exist, mysqld creates "
+ "and preallocates binlog file(s). Preallocated file size is equal to "
+ "max_binlog_size. binlog_preallocate=N (N>=2) means N binlog files are "
+ "automatically generated at this point. (2)If new binlog file is "
+ "created beforehand(by mysqlbinlogalloc), mysqld opens and uses it "
+ "instead of creating new binlog file (creating new binlog file is "
+ "the default behavior in MySQL). It is recommended that allocating "
+ "many binlog files by mysqlbinlogalloc before starting mysqld, "
+ "and setting binlog_preallocate=1 and sync_binlog=1 in my.cnf. "
+ "Then you can avoid overheads of writing huge amounts of data "
+ "to preallocate binlog. If posix_fallocate() is accurately supported "
+ "on targetted filesystems, setting high binlog_preallocate value "
+ "is fine because preallocation cost is almost zero.",
+ GLOBAL_VAR(binlog_preallocate),
+ CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 512), DEFAULT(0), BLOCK_SIZE(1));
+
static bool check_has_super(sys_var *self, THD *thd, set_var *var)
{
DBUG_ASSERT(self->scope() != sys_var::GLOBAL);// don't abuse check_has_super()
Attachment: [text/bzr-bundle] bzr/yoshinori.matsunobu@gmail.com-20100712023955-7zf4qguutwk1t588.bundle