Hi Libing,
Good work.
You have my approval on this patch.
However, please find some minor suggestions inline.
Best,
Nirbhay
On Tuesday 22 February 2011 08:57 AM, Libing Song wrote:
> #Atfile:///home/songlibing/bzrwork/wt3/mysql-trunk/ based
> onrevid:alexander.barkov@stripped
>
> 3674 Libing Song 2011-02-22
> BUG#11747577 Not able to recover binary/blob data correctly using mysqlbinlog
>
> The queries generated by mysqlbinlog could not be executed by 'mysql' client
> if there was any 0x00 in it. Or the queries recovered diverged data if there
> was any 0x0D0A sequence in it. The reason is that 'mysql' client always
> translated '0x0D0A'('\r\n') to '0x0A'('\n') and treated '0x00' as the end
> of a query if it is in non-interactive mode(receiving queries from a file
> or a pipe).
>
> This patch added the option 'binary-mode' in 'mysql' program. 'mysql' will
> not translates '0x0D0A' to '0x0A' and not treat '0x00' as the end of a query
> if binary-mode is set 1 and it is in non-interactive mode. Meanwhile all
> mysql client commands except 'delimiter' and '\C' are disabled. This will
> improve the performance when mysql applies huge logs.
I suggest to add an additional note in the documentation of --binary-mode,
which should state the recommendation of using this option with 'sql
statements
generated from binlog files'.
> added:
> mysql-test/r/mysql_binary_mode.result
> mysql-test/t/mysql_binary_mode.test
> modified:
> client/client_priv.h
> client/my_readline.h
> client/mysql.cc
> client/readline.cc
> === modified file 'client/client_priv.h'
> --- a/client/client_priv.h 2011-01-16 04:02:29 +0000
> +++ b/client/client_priv.h 2011-02-22 03:27:36 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2001-2006 MySQL AB, 2009 Sun Microsystems, Inc
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -86,6 +86,7 @@ enum options_client
> OPT_DEFAULT_PLUGIN,
> OPT_RAW_OUTPUT, OPT_WAIT_SERVER_ID, OPT_STOP_NEVER,
> OPT_BINLOG_ROWS_EVENT_MAX_SIZE,
> + OPT_BINARY_MODE,
> OPT_MAX_CLIENT_OPTION
> };
>
>
> === modified file 'client/my_readline.h'
> --- a/client/my_readline.h 2011-02-05 05:04:15 +0000
> +++ b/client/my_readline.h 2011-02-22 03:27:36 +0000
> @@ -1,7 +1,7 @@
> #ifndef CLIENT_MY_READLINE_INCLUDED
> #define CLIENT_MY_READLINE_INCLUDED
>
> -/* Copyright (C) 2000 MySQL AB
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -34,7 +34,7 @@ typedef struct st_line_buffer
>
> extern LINE_BUFFER *batch_readline_init(ulong max_size,FILE *file);
> extern LINE_BUFFER *batch_readline_command(LINE_BUFFER *buffer, char * str);
> -extern char *batch_readline(LINE_BUFFER *buffer);
> +extern char *batch_readline(LINE_BUFFER *buffer, bool binary_mode);
> extern void batch_readline_end(LINE_BUFFER *buffer);
>
> #endif /* CLIENT_MY_READLINE_INCLUDED */
>
> === modified file 'client/mysql.cc'
> --- a/client/mysql.cc 2011-02-15 12:38:39 +0000
> +++ b/client/mysql.cc 2011-02-22 03:27:36 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright 2000, 2010, Oracle and/or its affiliates. All rights reserved.
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -150,6 +150,7 @@ static uint my_end_arg;
> static char * opt_mysql_unix_port=0;
> static char *opt_bind_addr = NULL;
> static int connect_flag=CLIENT_INTERACTIVE;
> +static my_bool opt_binary_mode= FALSE;
> static char *current_host,*current_db,*current_user=0,*opt_password=0,
> *current_prompt=0, *delimiter_str= 0,
> *default_charset= (char*) MYSQL_AUTODETECT_CHARSET_NAME,
> @@ -1044,9 +1045,10 @@ static void initialize_readline (char *n
> static void fix_history(String *final_command);
> #endif
>
> -static COMMANDS *find_command(char *name,char cmd_name);
> -static bool add_line(String&buffer,char *line,char *in_string,
> - bool *ml_comment, bool truncated);
> +static COMMANDS *find_command(char *name);
> +static COMMANDS *find_command(char cmd_name);
> +static bool add_line(String&buffer, char *line, ulong line_length,
> + char *in_string, bool *ml_comment, bool truncated);
> static void remove_cntrl(String&buffer);
> static void print_table_data(MYSQL_RES *result);
> static void print_table_data_html(MYSQL_RES *result);
> @@ -1064,6 +1066,43 @@ extern "C" sig_handler handle_sigint(int
> static sig_handler window_resize(int sig);
> #endif
>
> +const char DELIMITER_NAME[]= "delimiter";
> +const uint DELIMITER_NAME_LEN= sizeof(DELIMITER_NAME) - 1;
> +inline bool is_delimiter_command(char *name, ulong len)
> +{
> + /*
> + Delimiter command has a parameter, so the length of the whole command
> + is larger than DELIMITER_NAME_LEN. We don't care the parameter, so
> + only name(first DELIMITER_NAME_LEN bytes) is checked.
> + */
> + return (len>= DELIMITER_NAME_LEN&&
> + !my_strnncoll(charset_info, (uchar*) name, DELIMITER_NAME_LEN,
> + (uchar *) DELIMITER_NAME, DELIMITER_NAME_LEN));
> +}
> +
> +/**
> + Get the index of a command in the commands array.
> +
> + @param cmd_char Short form command.
> +
> + @return int
> + The index of the command is returned if it is found, else -1 is returned.
> +*/
> +inline int get_command_index(char cmd_char)
> +{
> + /*
> + All client-specific commands are in the first part of commands array
> + and have a function to implement it.
> + */
> + for (uint i= 0; *commands[i].func; i++)
> + if (commands[i].cmd_char == cmd_char)
> + return i;
> + return -1;
> +}
> +
> +static int delimiter_index= -1;
> +static int charset_index= -1;
> +static bool real_binary_mode= FALSE;
>
> int main(int argc,char *argv[])
> {
> @@ -1072,7 +1111,9 @@ int main(int argc,char *argv[])
> MY_INIT(argv[0]);
> DBUG_ENTER("main");
> DBUG_PROCESS(argv[0]);
> -
> +
> + charset_index= get_command_index('C');
> + delimiter_index= get_command_index('d');
> delimiter_str= delimiter;
> default_prompt = my_strdup(getenv("MYSQL_PS1") ?
> getenv("MYSQL_PS1") :
> @@ -1574,6 +1615,12 @@ static struct my_option my_long_options[
> "Default authentication client-side plugin to use.",
> (uchar**)&opt_default_auth, (uchar**)&opt_default_auth, 0,
> GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
> + {"binary-mode", OPT_BINARY_MODE,
> + "By default, ASCII '\\0' is disallowed and '\\r\\n' is translated to '\\n'. "
> + "This switch turns off both features in non-interactive mode (for input "
> + "piped to mysql or loaded using the 'source' command). This is necessary "
> + "when processing output from mysqlbinlog that may contain blobs.",
> +&opt_binary_mode,&opt_binary_mode, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
> { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
> };
>
> @@ -1845,13 +1892,35 @@ static int read_and_execute(bool interac
> ulong line_number=0;
> bool ml_comment= 0;
> COMMANDS *com;
> + ulong line_length= 0;
> status.exit_status=1;
>
> + real_binary_mode= !interactive&& opt_binary_mode;
> for (;;)
> {
> if (!interactive)
> {
> - line=batch_readline(status.line_buff);
> + line= batch_readline(status.line_buff, real_binary_mode);
> + line_length= status.line_buff->read_length;
> + /*
> + ASCII 0x00 is not allowed appearing in queries if it is not in binary
> + mode.
> + */
> + if (!real_binary_mode&& line&& strlen(line) != line_length)
> + {
> + status.exit_status= 1;
> + String msg;
> + msg.append("ASCII '\\0' appeared in the statement, but this is not "
> + "allowed unless option --binary-mode is enabled and mysql is "
> + "run in non-interactive mode. Set --binary-mode 1 if ASCII "
s/Set --binary-mode 1/Set --binary-mode to 1
OR
s/Set --binary-mode 1/Set --binary-mode
> + "'\\0' is expected. Query: '");
> + msg.append(glob_buffer);
> + msg.append(line);
> + msg.append("'.");
> + put_info(msg.c_ptr(), INFO_ERROR);
> + break;
> + }
> +
> /*
> Skip UTF8 Byte Order Marker (BOM) 0xEFBBBF.
> Editors like "notepad" put this marker in
> @@ -1913,6 +1982,8 @@ static int read_and_execute(bool interac
> */
> if (opt_outfile&& line)
> fprintf(OUTFILE, "%s\n", line);
> +
> + line_length= line ? strlen(line) : 0;
> }
> // End of file or system error
> if (!line)
> @@ -1929,7 +2000,7 @@ static int read_and_execute(bool interac
> (We want to allow help, print and clear anywhere at line start
> */
> if ((named_cmds || glob_buffer.is_empty())
> - && !ml_comment&& !in_string&&
> (com=find_command(line,0)))
> + && !ml_comment&& !in_string&& (com=
> find_command(line)))
> {
> if ((*com->func)(&glob_buffer,line)> 0)
> break;
> @@ -1941,7 +2012,7 @@ static int read_and_execute(bool interac
> #endif
> continue;
> }
> - if (add_line(glob_buffer, line,&in_string,&ml_comment,
> + if (add_line(glob_buffer, line, line_length,&in_string,&ml_comment,
> status.line_buff ? status.line_buff->truncated : 0))
> break;
> }
> @@ -1963,70 +2034,123 @@ static int read_and_execute(bool interac
> tmpbuf.free();
> #endif
>
> + /*
> + If the function is called by 'source' command, it will return to interactive
> + mode, so real_binary_mode should be FALSE. Otherwise, it will exit the
> + program, it is safe to set real_binary_mode to FALSE.
> + */
> + real_binary_mode= FALSE;
> return status.exit_status;
> }
>
> +/**
> + It checks if the input is a short form command. It returns the command's
> + pointer if a command is found, else return NULL.
> +
> + @param cmd_char A character of one byte.
> +
> + @return
> + the command's pointer or NULL.
> +*/
> +static COMMANDS *find_command(char cmd_char)
> +{
> + DBUG_ENTER("find_command");
> + DBUG_PRINT("enter", ("cmd_char: %d", cmd_char));
> +
> + int index= -1;
> + if (real_binary_mode)
> + {
> + if (cmd_char == 'C')
> + index= charset_index;
> + }
Please add a comment for the above exception.
> + else
> + index= get_command_index(cmd_char);
> +
> + if (index>= 0)
> + {
> + DBUG_PRINT("exit",("found command: %s", commands[index].name));
> + DBUG_RETURN(&commands[index]);
> + }
> + else
> + DBUG_RETURN((COMMANDS *) 0);
> +}
> +
> +/**
Drop a * above.
> + It checks if the input is a long form command. It returns the command's
> + pointer if a command is found, else return NULL.
>
> -static COMMANDS *find_command(char *name,char cmd_char)
> + @param name A string.
> + @return
> + the command's pointer or NULL.
> +*/
> +static COMMANDS *find_command(char *name)
> {
> uint len;
> char *end;
> DBUG_ENTER("find_command");
> - DBUG_PRINT("enter",("name: '%s' char: %d", name ? name : "NULL", cmd_char));
>
> - if (!name)
> + DBUG_ASSERT(name != NULL);
> + DBUG_PRINT("enter", ("name: '%s'", name));
> +
> + while (my_isspace(charset_info, *name))
> + name++;
> + /*
> + If there is an \\g in the row or if the row has a delimiter but
> + this is not a delimiter command, let add_line() take care of
> + parsing the row and calling find_command().
> + */
> + if ((!real_binary_mode&& strstr(name, "\\g")) ||
> + (strstr(name, delimiter)&&
> + !is_delimiter_command(name, DELIMITER_NAME_LEN)))
> + DBUG_RETURN((COMMANDS *) 0);
> +
> + if ((end=strcont(name, " \t")))
> + {
> + len=(uint) (end - name);
> + while (my_isspace(charset_info, *end))
> + end++;
> + if (!*end)
> + end= 0; // no arguments to function
> + }
> + else
> + len= (uint) strlen(name);
> +
> + int index= -1;
> + if (real_binary_mode)
> {
> - len=0;
> - end=0;
> + if (is_delimiter_command(name, len))
> + index= delimiter_index;
> }
> else
> {
> - while (my_isspace(charset_info,*name))
> - name++;
> /*
> - If there is an \\g in the row or if the row has a delimiter but
> - this is not a delimiter command, let add_line() take care of
> - parsing the row and calling find_command()
> + All commands are in the first part of commands array and have a function
> + to implemente it.
s/implemente/implement
> */
> - if (strstr(name, "\\g") || (strstr(name, delimiter)&&
> - !(strlen(name)>= 9&&
> - !my_strnncoll(&my_charset_latin1,
> - (uchar*) name, 9,
> - (const uchar*) "delimiter",
> - 9))))
> - DBUG_RETURN((COMMANDS *) 0);
> - if ((end=strcont(name," \t")))
> + for (uint i= 0; commands[i].func; i++)
> {
> - len=(uint) (end - name);
> - while (my_isspace(charset_info,*end))
> - end++;
> - if (!*end)
> - end=0; // no arguments to function
> + if (commands[i].name[len] == '\0'&&
> + !my_strnncoll(&my_charset_latin1, (uchar*) name, len,
> + (uchar*) commands[i].name, len)&&
> + (!end || commands[i].takes_params))
> + {
> + index= i;
> + break;
> + }
> }
> - else
> - len=(uint) strlen(name);
> }
>
> - for (uint i= 0; commands[i].name; i++)
> + if (index>= 0)
> {
> - if (commands[i].func&&
> - ((name&&
> - !my_strnncoll(&my_charset_latin1, (uchar*)name, len,
> - (uchar*)commands[i].name,len)&&
> - !commands[i].name[len]&&
> - (!end || (end&& commands[i].takes_params))) ||
> - (!name&& commands[i].cmd_char == cmd_char)))
> - {
> - DBUG_PRINT("exit",("found command: %s", commands[i].name));
> - DBUG_RETURN(&commands[i]);
> - }
> + DBUG_PRINT("exit", ("found command: %s", commands[index].name));
> + DBUG_RETURN(&commands[index]);
> }
> DBUG_RETURN((COMMANDS *) 0);
> }
>
>
> -static bool add_line(String&buffer,char *line,char *in_string,
> - bool *ml_comment, bool truncated)
> +static bool add_line(String&buffer, char *line, ulong line_length,
> + char *in_string, bool *ml_comment, bool truncated)
> {
> uchar inchar;
> char buff[80], *pos, *out;
> @@ -2041,10 +2165,11 @@ static bool add_line(String&buffer,char
> if (status.add_to_history&& line[0]&& not_in_history(line))
> add_history(line);
> #endif
> - char *end_of_line=line+(uint) strlen(line);
> + char *end_of_line= line + line_length;
>
> - for (pos=out=line ; (inchar= (uchar) *pos) ; pos++)
> + for (pos= out= line; pos< end_of_line; pos++)
> {
> + inchar= (uchar) *pos;
> if (!preserve_comments)
> {
> // Skip spaces at the beginning of a statement
> @@ -2084,7 +2209,7 @@ static bool add_line(String&buffer,char
> *out++= (char) inchar;
> continue;
> }
> - if ((com=find_command(NullS,(char) inchar)))
> + if ((com= find_command((char) inchar)))
> {
> // Flush previously accepted characters
> if (out != line)
> @@ -2160,7 +2285,7 @@ static bool add_line(String&buffer,char
>
> pos--;
>
> - if ((com= find_command(buffer.c_ptr(), 0)))
> + if ((com= find_command(buffer.c_ptr())))
> {
>
> if ((*com->func)(&buffer, buffer.c_ptr())> 0)
> @@ -2274,10 +2399,7 @@ static bool add_line(String&buffer,char
> {
> uint length=(uint) (out-line);
>
> - if (!truncated&&
> - (length< 9 ||
> - my_strnncoll (charset_info,
> - (uchar *)line, 9, (const uchar *) "delimiter", 9)))
> + if (!truncated&& !is_delimiter_command(line, length))
> {
> /*
> Don't add a new line in case there's a DELIMITER command to be
>
> === modified file 'client/readline.cc'
> --- a/client/readline.cc 2011-02-09 11:16:33 +0000
> +++ b/client/readline.cc 2011-02-22 03:27:36 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2000 MySQL AB
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -52,7 +52,7 @@ LINE_BUFFER *batch_readline_init(ulong m
> }
>
>
> -char *batch_readline(LINE_BUFFER *line_buff)
> +char *batch_readline(LINE_BUFFER *line_buff, bool binary_mode)
> {
> char *pos;
> ulong out_length;
> @@ -60,10 +60,14 @@ char *batch_readline(LINE_BUFFER *line_b
> if (!(pos=intern_read_line(line_buff,&out_length)))
> return 0;
> if (out_length&& pos[out_length-1] == '\n')
> - if (--out_length&& pos[out_length-1] == '\r') /* Remove '\n' */
> + {
> + out_length--; /* Remove '\n' */
> + if (!binary_mode&& pos[out_length-1] == '\r')
> out_length--; /* Remove '\r' */
> + }
> line_buff->read_length=out_length;
> pos[out_length]=0;
> + DBUG_DUMP("Query: ", (unsigned char *) pos, out_length);
> return pos;
> }
>
> @@ -223,7 +227,7 @@ char *intern_read_line(LINE_BUFFER *buff
> for (;;)
> {
> pos=buffer->end_of_line;
> - while (*pos != '\n'&& *pos)
> + while (*pos != '\n'&& pos != buffer->end)
> pos++;
> if (pos == buffer->end)
> {
> @@ -249,6 +253,8 @@ char *intern_read_line(LINE_BUFFER *buff
> buffer->truncated= 0;
> buffer->end_of_line=pos+1;
> *out_length=(ulong) (pos + 1 - buffer->eof - buffer->start_of_line);
> +
> + DBUG_DUMP("Query: ", (unsigned char *) buffer->start_of_line, *out_length);
> DBUG_RETURN(buffer->start_of_line);
> }
> }
>
> === added file 'mysql-test/r/mysql_binary_mode.result'
> --- a/mysql-test/r/mysql_binary_mode.result 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/r/mysql_binary_mode.result 2011-02-22 03:27:36 +0000
> @@ -0,0 +1,46 @@
> +RESET MASTER;
> +# Bug#33048 Not able to recover binary/blob data correctly using mysqlbinlog
> +# --------------------------------------------------------------------------
> +# The test verify that 0x00 and 0x0D0A sequence can be handled correctly by
> +# mysql
> +
> +CREATE TABLE `A
> +B` (c1 CHAR(100));
> +# It is a faked statement. ASCII 0 is in the original statement, it would
> +# make the test result to become a binary file which was difficult to get
> +# the diff result if the original query was logged in the result.
> +INSERT INTO `A\r\nB` VALUES("A\0B");
> +
> +INSERT INTO `A
> +B` VALUES("A
> +B");
> +SELECT HEX(c1) FROM `A
> +B`;
> +HEX(c1)
> +410042
> +410D0A42
> +
> +FLUSH LOGS;
> +DROP TABLE `A
> +B`;
> +
> +# '--exec mysql ...' without --binary-mode option
> +# It creates the table with a wrong table name and generates an error.
> +ERROR at line 34: ASCII '\0' appeared in the statement, but this is not allowed
> unless option --binary-mode is enabled and mysql is run in non-interactive mode. Set
> --binary-mode 1 if ASCII '\0' is expected. Query: 'INSERT INTO `A
> +B` VALUES("A'.
> +
> +# It is not in binary_mode, so table name '0x410D0A42' is translated to
> +# '0x410A42' by mysql.
> +DROP TABLE `A
> +B`;
> +
> +# In binary_mode, table name '0x410D0A42' and string '0x410042' can be
> +# handled correctly.
> +SELECT HEX(c1) FROM `A
> +B`;
> +HEX(c1)
> +410042
> +410D0A42
> +
> +DROP TABLE `A
> +B`;
>
> === added file 'mysql-test/t/mysql_binary_mode.test'
> --- a/mysql-test/t/mysql_binary_mode.test 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/t/mysql_binary_mode.test 2011-02-22 03:27:36 +0000
> @@ -0,0 +1,54 @@
> +source include/have_binlog_format_statement.inc;
> +RESET MASTER;
> +
> +--echo # Bug#33048 Not able to recover binary/blob data correctly using mysqlbinlog
> +--echo # --------------------------------------------------------------------------
> +--echo # The test verify that 0x00 and 0x0D0A sequence can be handled correctly by
> +--echo # mysql
> +--echo
> +
> +let $table_name= `SELECT 0x410D0A42`;
> +eval CREATE TABLE `$table_name` (c1 CHAR(100));
> +
> +let $char= `SELECT 0x410042`;
> +--echo # It is a faked statement. ASCII 0 is in the original statement, it would
> +--echo # make the test result to become a binary file which was difficult to get
> +--echo # the diff result if the original query was logged in the result.
> +--echo INSERT INTO `A\r\nB` VALUES("A\0B");
> +--echo
> +--disable_query_log
> +eval INSERT INTO `$table_name` VALUES("$char");
> +--enable_query_log
> +
> +let $char= `SELECT 0x410D0A42`;
> +eval INSERT INTO `$table_name` VALUES("$char");
> +
> +eval SELECT HEX(c1) FROM `$table_name`;
> +
> +--echo
> +FLUSH LOGS;
> +eval DROP TABLE `$table_name`;
> +
> +--echo
> +let $MYSQLD_DATADIR= `SELECT @@datadir`;
> +--exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000001>
> $MYSQLTEST_VARDIR/tmp/my.sql
> +
> +--echo # '--exec mysql ...' without --binary-mode option
> +--echo # It creates the table with a wrong table name and generates an error.
> +--error 1
> +--exec $MYSQL test< $MYSQLTEST_VARDIR/tmp/my.sql 2>&1
> +
> +--echo
> +--echo # It is not in binary_mode, so table name '0x410D0A42' is translated to
> +--echo # '0x410A42' by mysql.
> +let $table_name1= `SELECT 0x410A42`;
> +eval DROP TABLE `$table_name1`;
> +
> +--echo
> +--echo # In binary_mode, table name '0x410D0A42' and string '0x410042' can be
> +--echo # handled correctly.
> +--exec $MYSQL --binary-mode test< $MYSQLTEST_VARDIR/tmp/my.sql
> +eval SELECT HEX(c1) FROM `$table_name`;
> +
> +--echo
> +eval DROP TABLE `$table_name`;
>
>
>
>