Hi Libing,
Thanks for this work. This is a complicated piece of code and I could
not review the whole patch. I will be away tomorrow so I send this
partial review so that you have something to start work on.
See comments inline.
/Sven
On 02/17/2011 03:40 AM, Libing Song wrote:
> #At file:///home/songlibing/bzrwork/wt3/mysql-trunk/ based on
> revid:dmitry.lenev@stripped
>
> 3653 Libing Song 2011-02-17
> BUG#11747577 Not able to recover binary/blob data correctly using mysqlbinlog
>
> The queries generated by mysqlbinlog could not be executed by 'mysql' client
> if there was any 0x00 in it. Or the queries recovered diverged data if there
> was any 0x0D0A sequence in it. The reason is that 'mysql' client always
> translated '0x0D0A'('\r\n') to '0x0A'('\n') and treated '0x00' as the end
> of a query if it is in non-interactive mode(receiving queries from a file
> or a pipe).
>
> This patch added the option 'binary-mode' in 'mysql' program. 'mysql' will
> not translated '0x0D0A' to '0x0A' and not treat '0x00' as the end of a query
> if binary-mode is set 1 and it is in non-interactive mode. Meanwhile all
> mysql client commands except 'delimiter' and '\C' are disabled. This will
> improve the performance when mysql applies huge logs.
>
> added:
> mysql-test/r/mysql_binary_mode.result
> mysql-test/t/mysql_binary_mode.test
> modified:
> client/client_priv.h
> client/my_readline.h
> client/mysql.cc
> client/readline.cc
> === modified file 'client/client_priv.h'
> --- a/client/client_priv.h 2011-01-16 04:02:29 +0000
> +++ b/client/client_priv.h 2011-02-17 02:39:58 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2001-2006 MySQL AB, 2009 Sun Microsystems, Inc
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -86,6 +86,7 @@ enum options_client
> OPT_DEFAULT_PLUGIN,
> OPT_RAW_OUTPUT, OPT_WAIT_SERVER_ID, OPT_STOP_NEVER,
> OPT_BINLOG_ROWS_EVENT_MAX_SIZE,
> + OPT_BINARY_MODE,
> OPT_MAX_CLIENT_OPTION
> };
>
>
> === modified file 'client/my_readline.h'
> --- a/client/my_readline.h 2011-02-05 05:04:15 +0000
> +++ b/client/my_readline.h 2011-02-17 02:39:58 +0000
> @@ -1,7 +1,7 @@
> #ifndef CLIENT_MY_READLINE_INCLUDED
> #define CLIENT_MY_READLINE_INCLUDED
>
> -/* Copyright (C) 2000 MySQL AB
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -34,7 +34,7 @@ typedef struct st_line_buffer
>
> extern LINE_BUFFER *batch_readline_init(ulong max_size,FILE *file);
> extern LINE_BUFFER *batch_readline_command(LINE_BUFFER *buffer, char * str);
> -extern char *batch_readline(LINE_BUFFER *buffer);
> +extern char *batch_readline(LINE_BUFFER *buffer, bool binary_mode);
> extern void batch_readline_end(LINE_BUFFER *buffer);
>
> #endif /* CLIENT_MY_READLINE_INCLUDED */
>
> === modified file 'client/mysql.cc'
> --- a/client/mysql.cc 2011-02-15 12:38:39 +0000
> +++ b/client/mysql.cc 2011-02-17 02:39:58 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright 2000, 2010, Oracle and/or its affiliates. All rights reserved.
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -150,6 +150,7 @@ static uint my_end_arg;
> static char * opt_mysql_unix_port=0;
> static char *opt_bind_addr = NULL;
> static int connect_flag=CLIENT_INTERACTIVE;
> +static my_bool opt_binary_mode= FALSE;
> static char *current_host,*current_db,*current_user=0,*opt_password=0,
> *current_prompt=0, *delimiter_str= 0,
> *default_charset= (char*) MYSQL_AUTODETECT_CHARSET_NAME,
> @@ -1044,9 +1045,10 @@ static void initialize_readline (char *n
> static void fix_history(String *final_command);
> #endif
>
> -static COMMANDS *find_command(char *name,char cmd_name);
> -static bool add_line(String&buffer,char *line,char *in_string,
> - bool *ml_comment, bool truncated);
> +static COMMANDS *find_command(char *name);
> +static COMMANDS *find_command(char cmd_name);
> +static bool add_line(String&buffer, char *line, ulong line_length,
> + char *in_string, bool *ml_comment, bool truncated);
> static void remove_cntrl(String&buffer);
> static void print_table_data(MYSQL_RES *result);
> static void print_table_data_html(MYSQL_RES *result);
> @@ -1064,6 +1066,30 @@ extern "C" sig_handler handle_sigint(int
> static sig_handler window_resize(int sig);
> #endif
>
> +const char DELIMITER_NAME[]= "delimiter";
> +const uint DELIMITER_NAME_LEN= sizeof(DELIMITER_NAME) - 1;
> +inline bool is_delimiter_command(char *name, ulong len)
> +{
> + return (len == DELIMITER_NAME_LEN&&
> + !my_strnncoll(charset_info, (uchar*)name, DELIMITER_NAME_LEN,
> + (uchar *)DELIMITER_NAME, DELIMITER_NAME_LEN));
> +}
> +
> +inline int get_command_index(char cmd_char)
Please add a comment that explains what this function does.
> +{
> + /*
> + All commands are in the first part of commands array and have a function
> + to implemente it.
Clarification: s/commands/client-specific commands/
Typo: s/implemente/implement/
> + */
> + for (uint i=0; *commands[i].func; i++)
> + if (commands[i].cmd_char == cmd_char)
> + return i;
> + return -1;
> +}
> +
> +static int delimiter_index= -1;
> +static int charset_index= -1;
> +static bool real_binary_mode= FALSE;
>
> int main(int argc,char *argv[])
> {
> @@ -1072,7 +1098,9 @@ int main(int argc,char *argv[])
> MY_INIT(argv[0]);
> DBUG_ENTER("main");
> DBUG_PROCESS(argv[0]);
> -
> +
> + charset_index= get_command_index('C');
> + delimiter_index= get_command_index('d');
> delimiter_str= delimiter;
> default_prompt = my_strdup(getenv("MYSQL_PS1") ?
> getenv("MYSQL_PS1") :
> @@ -1574,6 +1602,11 @@ static struct my_option my_long_options[
> "Default authentication client-side plugin to use.",
> (uchar**)&opt_default_auth, (uchar**)&opt_default_auth, 0,
> GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
> + {"binary-mode", OPT_BINARY_MODE,
> + "ASCII '0x00' is allowed in queries and '0x0D0A'('\\r\\n') sequence "
> + "is not translated to '0x0A'('\\n') if 'mysql' is non-interactive and "
> + "--binary-mode is set.",
> +&opt_binary_mode,&opt_binary_mode, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
I think the option has to be more clearly explained. It's not obvious
what the default behavior is, so I would suggest that we first describe
that and then describe how the flag changes the behavior.
Maybe something like:
By default, ASCII '\0' is disallowed and '\r\n' is translated to '\n'.
This switch turns off both these features in non-interactive mode (i.e.,
for input that is either piped to mysql or loaded using the "source"
command). This is necessary when processing output from mysqlbinlog that
may contain blobs.
> { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
> };
>
> @@ -1845,13 +1878,36 @@ static int read_and_execute(bool interac
> ulong line_number=0;
> bool ml_comment= 0;
> COMMANDS *com;
> + ulong line_length= 0;
> status.exit_status=1;
>
> for (;;)
> {
> +
> + real_binary_mode= !interactive&& opt_binary_mode;
interactive and opt_binary_mode don't change in this function, so I
think real_binary_mode can be set outside the loop.
> if (!interactive)
> {
> - line=batch_readline(status.line_buff);
> + line=batch_readline(status.line_buff, real_binary_mode);
> + line_length= status.line_buff->read_length;
> + /*
> + ASCII 0x00 is not allowed appearing in queries if it is not in binary
> + mode.
> + */
> + if (!real_binary_mode&& line&& strlen(line) != line_length)
> + {
> + status.exit_status= 1;
> + String msg;
> + msg.append("ASCII '0x00' appeared in the query. It is disallowed unless "
> + "option --binary-mode is set and 'mysql' is non-interactive. "
> + "Please set --binary-mode 1 if the ASCII '0x00' is expected. "
> + "Query: '");
> + msg.append(glob_buffer);
> + msg.append(line);
> + msg.append("'.");
> + put_info(msg.c_ptr(), INFO_ERROR);
> + break;
> + }
> +
> /*
> Skip UTF8 Byte Order Marker (BOM) 0xEFBBBF.
> Editors like "notepad" put this marker in
> @@ -1913,6 +1969,8 @@ static int read_and_execute(bool interac
> */
> if (opt_outfile&& line)
> fprintf(OUTFILE, "%s\n", line);
> +
> + line_length= line ? strlen(line) : 0;
> }
> // End of file or system error
> if (!line)
> @@ -1929,7 +1987,7 @@ static int read_and_execute(bool interac
> (We want to allow help, print and clear anywhere at line start
> */
> if ((named_cmds || glob_buffer.is_empty())
> - && !ml_comment&& !in_string&&
> (com=find_command(line,0)))
> + && !ml_comment&& !in_string&& (com=find_command(line)))
> {
> if ((*com->func)(&glob_buffer,line)> 0)
> break;
> @@ -1941,7 +1999,7 @@ static int read_and_execute(bool interac
> #endif
> continue;
> }
> - if (add_line(glob_buffer, line,&in_string,&ml_comment,
> + if (add_line(glob_buffer, line, line_length,&in_string,&ml_comment,
> status.line_buff ? status.line_buff->truncated : 0))
> break;
> }
> @@ -1962,71 +2020,102 @@ static int read_and_execute(bool interac
> buffer.free();
> tmpbuf.free();
> #endif
> -
> + real_binary_mode= FALSE;
I think maybe we should add a comment explaining why we set
real_binary_mode to FALSE (if this is a 'source' command, it means we
return to interactive mode, so real_binary_mode should be FALSE. If this
is not a 'source' command, then we are at the end of a piped file, so we
are not going to read more commands and it is safe to set
real_binary_mode to FALSE).
> return status.exit_status;
> }
>
Thanks for splitting the function below into two functions. Please, can
you add comments that explain what each of them does? Both the purpose
of the functions and how real_binary_mode affects the functions.
> +static COMMANDS *find_command(char cmd_char)
> +{
> + DBUG_ENTER("find_command");
> + DBUG_PRINT("enter", ("cmd_char: %d", cmd_char));
> +
> + int index= -1;
> + if (real_binary_mode)
> + {
> + if (cmd_char == 'C')
> + index= charset_index;
> + }
> + else
> + index= get_command_index(cmd_char);
> +
> + if (index>= 0)
> + {
> + DBUG_PRINT("exit",("found command: %s", commands[index].name));
> + DBUG_RETURN(&commands[index]);
> + }
> + else
> + DBUG_RETURN((COMMANDS *) 0);
> +}
>
> -static COMMANDS *find_command(char *name,char cmd_char)
> +static COMMANDS *find_command(char *name)
> {
> uint len;
> char *end;
> DBUG_ENTER("find_command");
> - DBUG_PRINT("enter",("name: '%s' char: %d", name ? name : "NULL", cmd_char));
>
> - if (!name)
> + DBUG_ASSERT(name != NULL);
> + DBUG_PRINT("enter", ("name: '%s'", name));
> +
> + while (my_isspace(charset_info,*name))
> + name++;
> + /*
> + If there is an \\g in the row or if the row has a delimiter but
> + this is not a delimiter command, let add_line() take care of
> + parsing the row and calling find_command()
> + */
> + if ((!real_binary_mode&& strstr(name, "\\g")) ||
> + (strstr(name, delimiter)&&
> + !is_delimiter_command(name, DELIMITER_NAME_LEN)))
> + DBUG_RETURN((COMMANDS *) 0);
> +
> + if ((end=strcont(name," \t")))
> {
> - len=0;
> - end=0;
> + len=(uint) (end - name);
> + while (my_isspace(charset_info,*end))
> + end++;
> + if (!*end)
> + end=0; // no arguments to function
> + }
> + else
> + len=(uint) strlen(name);
> +
> + int index= -1;
> + if (real_binary_mode)
> + {
> + if (is_delimiter_command(name, len))
> + index= delimiter_index;
> }
> else
> {
> - while (my_isspace(charset_info,*name))
> - name++;
> /*
> - If there is an \\g in the row or if the row has a delimiter but
> - this is not a delimiter command, let add_line() take care of
> - parsing the row and calling find_command()
> + All commands are in the first part of commands array and have a function
> + to implemente it.
> */
> - if (strstr(name, "\\g") || (strstr(name, delimiter)&&
> - !(strlen(name)>= 9&&
> - !my_strnncoll(&my_charset_latin1,
> - (uchar*) name, 9,
> - (const uchar*) "delimiter",
> - 9))))
> - DBUG_RETURN((COMMANDS *) 0);
> - if ((end=strcont(name," \t")))
> + for (uint i= 0; commands[i].func; i++)
> {
> - len=(uint) (end - name);
> - while (my_isspace(charset_info,*end))
> - end++;
> - if (!*end)
> - end=0; // no arguments to function
> + if (commands[i].name[len] == '\0'&&
> + !my_strnncoll(&my_charset_latin1, (uchar*)name, len,
> + (uchar*)commands[i].name,len)&&
> + (!end || commands[i].takes_params))
> + {
> + index= i;
> + break;
> + }
> }
> - else
> - len=(uint) strlen(name);
> }
>
> - for (uint i= 0; commands[i].name; i++)
> + if (index>= 0)
> {
> - if (commands[i].func&&
> - ((name&&
> - !my_strnncoll(&my_charset_latin1, (uchar*)name, len,
> - (uchar*)commands[i].name,len)&&
> - !commands[i].name[len]&&
> - (!end || (end&& commands[i].takes_params))) ||
> - (!name&& commands[i].cmd_char == cmd_char)))
> - {
> - DBUG_PRINT("exit",("found command: %s", commands[i].name));
> - DBUG_RETURN(&commands[i]);
> - }
> + DBUG_PRINT("exit",("found command: %s", commands[index].name));
> + DBUG_RETURN(&commands[index]);
> }
> +
> DBUG_RETURN((COMMANDS *) 0);
> }
>
>
> -static bool add_line(String&buffer,char *line,char *in_string,
> - bool *ml_comment, bool truncated)
> +static bool add_line(String&buffer, char *line, ulong line_length,
> + char *in_string, bool *ml_comment, bool truncated)
> {
> uchar inchar;
> char buff[80], *pos, *out;
> @@ -2041,10 +2130,11 @@ static bool add_line(String&buffer,char
> if (status.add_to_history&& line[0]&& not_in_history(line))
> add_history(line);
> #endif
> - char *end_of_line=line+(uint) strlen(line);
> + char *end_of_line= line + line_length;
>
> - for (pos=out=line ; (inchar= (uchar) *pos) ; pos++)
> + for (pos=out=line; pos< end_of_line; pos++)
> {
> + inchar= (uchar) *pos;
> if (!preserve_comments)
> {
> // Skip spaces at the beginning of a statement
> @@ -2084,7 +2174,7 @@ static bool add_line(String&buffer,char
> *out++= (char) inchar;
> continue;
> }
> - if ((com=find_command(NullS,(char) inchar)))
> + if ((com=find_command((char) inchar)))
> {
> // Flush previously accepted characters
> if (out != line)
> @@ -2160,7 +2250,7 @@ static bool add_line(String&buffer,char
>
> pos--;
>
> - if ((com= find_command(buffer.c_ptr(), 0)))
> + if ((com= find_command(buffer.c_ptr())))
> {
>
> if ((*com->func)(&buffer, buffer.c_ptr())> 0)
> @@ -2275,9 +2365,7 @@ static bool add_line(String&buffer,char
> uint length=(uint) (out-line);
>
> if (!truncated&&
> - (length< 9 ||
> - my_strnncoll (charset_info,
> - (uchar *)line, 9, (const uchar *) "delimiter", 9)))
> + (length< 9 || !is_delimiter_command(line, DELIMITER_NAME_LEN)))
I think you can replace the above line by:
!is_delimiter_command(line, length)
> {
> /*
> Don't add a new line in case there's a DELIMITER command to be
>
> === modified file 'client/readline.cc'
> --- a/client/readline.cc 2011-02-09 11:16:33 +0000
> +++ b/client/readline.cc 2011-02-17 02:39:58 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2000 MySQL AB
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
> This program is free software; you can redistribute it and/or modify
> it under the terms of the GNU General Public License as published by
> @@ -52,7 +52,7 @@ LINE_BUFFER *batch_readline_init(ulong m
> }
>
>
> -char *batch_readline(LINE_BUFFER *line_buff)
> +char *batch_readline(LINE_BUFFER *line_buff, bool binary_mode)
> {
> char *pos;
> ulong out_length;
> @@ -60,10 +60,14 @@ char *batch_readline(LINE_BUFFER *line_b
> if (!(pos=intern_read_line(line_buff,&out_length)))
> return 0;
> if (out_length&& pos[out_length-1] == '\n')
> - if (--out_length&& pos[out_length-1] == '\r') /* Remove '\n' */
> + {
> + out_length--; /* Remove '\n' */
> + if (!binary_mode&& pos[out_length-1] == '\r')
> out_length--; /* Remove '\r' */
> + }
> line_buff->read_length=out_length;
> pos[out_length]=0;
> + DBUG_DUMP("Query: ", (unsigned char *)pos, out_length);
> return pos;
> }
>
> @@ -223,7 +227,7 @@ char *intern_read_line(LINE_BUFFER *buff
> for (;;)
> {
> pos=buffer->end_of_line;
> - while (*pos != '\n'&& *pos)
> + while (*pos != '\n'&& pos != buffer->end)
> pos++;
> if (pos == buffer->end)
> {
> @@ -249,6 +253,7 @@ char *intern_read_line(LINE_BUFFER *buff
> buffer->truncated= 0;
> buffer->end_of_line=pos+1;
> *out_length=(ulong) (pos + 1 - buffer->eof - buffer->start_of_line);
> + DBUG_DUMP("Query: ", (unsigned char *)buffer->start_of_line, *out_length);
> DBUG_RETURN(buffer->start_of_line);
> }
> }
>
> === added file 'mysql-test/r/mysql_binary_mode.result'
> Binary files a/mysql-test/r/mysql_binary_mode.result 1970-01-01 00:00:00 +0000 and
> b/mysql-test/r/mysql_binary_mode.result 2011-02-17 02:39:58 +0000 differ
>
> === added file 'mysql-test/t/mysql_binary_mode.test'
> --- a/mysql-test/t/mysql_binary_mode.test 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/t/mysql_binary_mode.test 2011-02-17 02:39:58 +0000
> @@ -0,0 +1,45 @@
> +source include/have_binlog_format_statement.inc;
> +
> +--echo # Bug#33048 Not able to recover binary/blob data correctly using mysqlbinlog
> +--echo # --------------------------------------------------------------------------
> +--echo # The test verify that 0x00 and 0x0D0A sequence can be handled correctly by
> +--echo # mysql
> +--echo
> +let $table_name= `SELECT 0x410D0A42`;
> +eval CREATE TABLE `$table_name` (c1 CHAR(100));
> +
> +let $char= `SELECT 0x410042`;
> +eval INSERT INTO `$table_name` VALUES("$char");
> +
> +let $char= `SELECT 0x410D0A42`;
> +eval INSERT INTO `$table_name` VALUES("$char");
> +
> +eval SELECT HEX(c1) FROM `$table_name`;
> +
> +--echo
> +FLUSH LOGS;
> +eval DROP TABLE `$table_name`;
> +
> +--echo
> +let $MYSQLD_DATADIR= `SELECT @@datadir`;
> +--exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000001>
> $MYSQLTEST_VARDIR/tmp/my.sql
> +
> +--echo # '--exec mysql ...' without --binary-mode option
> +--echo # It creates the table with a wrong table name and generates an error.
> +--error 1
> +--exec $MYSQL test< $MYSQLTEST_VARDIR/tmp/my.sql 2>&1
> +
> +--echo
> +--echo # It is not in binary_mode, so table name '0x410D0A42' is translated to
> +--echo # '0x410A42' by mysql.
> +let $table_name1= `SELECT 0x410A42`;
> +eval DROP TABLE `$table_name1`;
> +
> +--echo
> +--echo # In binary_mode, table name '0x410D0A42' and string '0x410042' can be
> +--echo # handled correctly.
> +--exec $MYSQL --binary-mode test< $MYSQLTEST_VARDIR/tmp/my.sql
> +eval SELECT HEX(c1) FROM `$table_name`;
> +
> +--echo
> +eval DROP TABLE `$table_name`;
>
>
>
>
>