List:Commits« Previous MessageNext Message »
From:Sven Sandberg Date:February 17 2011 6:18pm
Subject:Re: bzr commit into mysql-trunk branch (anders.song:3653) Bug#11747577
View as plain text  
Hi Libing,

Thanks for this work. This is a complicated piece of code and I could 
not review the whole patch. I will be away tomorrow so I send this 
partial review so that you have something to start work on.

See comments inline.

/Sven


On 02/17/2011 03:40 AM, Libing Song wrote:
> #At file:///home/songlibing/bzrwork/wt3/mysql-trunk/ based on
> revid:dmitry.lenev@stripped
>
>   3653 Libing Song	2011-02-17
>        BUG#11747577 Not able to recover binary/blob data correctly using mysqlbinlog
>
>        The queries generated by mysqlbinlog could not be executed by 'mysql' client
>        if there was any 0x00 in it. Or the queries recovered diverged data if there
>        was any 0x0D0A sequence in it. The reason is that 'mysql' client always
>        translated '0x0D0A'('\r\n') to '0x0A'('\n') and treated '0x00' as the end
>        of a query if it is in non-interactive mode(receiving queries from a file
>        or a pipe).
>
>        This patch added the option 'binary-mode' in 'mysql' program.  'mysql' will
>        not translated '0x0D0A' to '0x0A' and not treat '0x00' as the end of a query
>        if binary-mode is set 1 and it is in non-interactive mode.  Meanwhile all
>        mysql client commands except 'delimiter' and '\C' are disabled. This will
>        improve the performance when mysql applies huge logs.
>
>      added:
>        mysql-test/r/mysql_binary_mode.result
>        mysql-test/t/mysql_binary_mode.test
>      modified:
>        client/client_priv.h
>        client/my_readline.h
>        client/mysql.cc
>        client/readline.cc
> === modified file 'client/client_priv.h'
> --- a/client/client_priv.h	2011-01-16 04:02:29 +0000
> +++ b/client/client_priv.h	2011-02-17 02:39:58 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2001-2006 MySQL AB, 2009 Sun Microsystems, Inc
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
>      This program is free software; you can redistribute it and/or modify
>      it under the terms of the GNU General Public License as published by
> @@ -86,6 +86,7 @@ enum options_client
>     OPT_DEFAULT_PLUGIN,
>     OPT_RAW_OUTPUT, OPT_WAIT_SERVER_ID, OPT_STOP_NEVER,
>     OPT_BINLOG_ROWS_EVENT_MAX_SIZE,
> +  OPT_BINARY_MODE,
>     OPT_MAX_CLIENT_OPTION
>   };
>
>
> === modified file 'client/my_readline.h'
> --- a/client/my_readline.h	2011-02-05 05:04:15 +0000
> +++ b/client/my_readline.h	2011-02-17 02:39:58 +0000
> @@ -1,7 +1,7 @@
>   #ifndef CLIENT_MY_READLINE_INCLUDED
>   #define CLIENT_MY_READLINE_INCLUDED
>
> -/* Copyright (C) 2000 MySQL AB
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
>      This program is free software; you can redistribute it and/or modify
>      it under the terms of the GNU General Public License as published by
> @@ -34,7 +34,7 @@ typedef struct st_line_buffer
>
>   extern LINE_BUFFER *batch_readline_init(ulong max_size,FILE *file);
>   extern LINE_BUFFER *batch_readline_command(LINE_BUFFER *buffer, char * str);
> -extern char *batch_readline(LINE_BUFFER *buffer);
> +extern char *batch_readline(LINE_BUFFER *buffer, bool binary_mode);
>   extern void batch_readline_end(LINE_BUFFER *buffer);
>
>   #endif /* CLIENT_MY_READLINE_INCLUDED */
>
> === modified file 'client/mysql.cc'
> --- a/client/mysql.cc	2011-02-15 12:38:39 +0000
> +++ b/client/mysql.cc	2011-02-17 02:39:58 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright 2000, 2010, Oracle and/or its affiliates. All rights reserved.
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
>      This program is free software; you can redistribute it and/or modify
>      it under the terms of the GNU General Public License as published by
> @@ -150,6 +150,7 @@ static uint my_end_arg;
>   static char * opt_mysql_unix_port=0;
>   static char *opt_bind_addr = NULL;
>   static int connect_flag=CLIENT_INTERACTIVE;
> +static my_bool opt_binary_mode= FALSE;
>   static char *current_host,*current_db,*current_user=0,*opt_password=0,
>               *current_prompt=0, *delimiter_str= 0,
>               *default_charset= (char*) MYSQL_AUTODETECT_CHARSET_NAME,
> @@ -1044,9 +1045,10 @@ static void initialize_readline (char *n
>   static void fix_history(String *final_command);
>   #endif
>
> -static COMMANDS *find_command(char *name,char cmd_name);
> -static bool add_line(String&buffer,char *line,char *in_string,
> -                     bool *ml_comment, bool truncated);
> +static COMMANDS *find_command(char *name);
> +static COMMANDS *find_command(char cmd_name);
> +static bool add_line(String&buffer, char *line, ulong line_length,
> +                     char *in_string, bool *ml_comment, bool truncated);
>   static void remove_cntrl(String&buffer);
>   static void print_table_data(MYSQL_RES *result);
>   static void print_table_data_html(MYSQL_RES *result);
> @@ -1064,6 +1066,30 @@ extern "C" sig_handler handle_sigint(int
>   static sig_handler window_resize(int sig);
>   #endif
>
> +const char DELIMITER_NAME[]= "delimiter";
> +const uint DELIMITER_NAME_LEN= sizeof(DELIMITER_NAME) - 1;
> +inline bool is_delimiter_command(char *name, ulong len)
> +{
> +  return  (len == DELIMITER_NAME_LEN&&
> +           !my_strnncoll(charset_info, (uchar*)name, DELIMITER_NAME_LEN,
> +                         (uchar *)DELIMITER_NAME, DELIMITER_NAME_LEN));
> +}
> +
> +inline int get_command_index(char cmd_char)

Please add a comment that explains what this function does.

> +{
> +  /*
> +    All commands are in the first part of commands array and have a function
> +    to implemente it.

Clarification: s/commands/client-specific commands/

Typo: s/implemente/implement/

> +  */
> +  for (uint i=0; *commands[i].func; i++)
> +    if (commands[i].cmd_char == cmd_char)
> +      return i;
> +  return -1;
> +}
> +
> +static int delimiter_index= -1;
> +static int charset_index= -1;
> +static bool real_binary_mode= FALSE;
>
>   int main(int argc,char *argv[])
>   {
> @@ -1072,7 +1098,9 @@ int main(int argc,char *argv[])
>     MY_INIT(argv[0]);
>     DBUG_ENTER("main");
>     DBUG_PROCESS(argv[0]);
> -
> +
> +  charset_index= get_command_index('C');
> +  delimiter_index= get_command_index('d');
>     delimiter_str= delimiter;
>     default_prompt = my_strdup(getenv("MYSQL_PS1") ?
>   			     getenv("MYSQL_PS1") :
> @@ -1574,6 +1602,11 @@ static struct my_option my_long_options[
>       "Default authentication client-side plugin to use.",
>      (uchar**)&opt_default_auth, (uchar**)&opt_default_auth, 0,
>      GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
> +  {"binary-mode", OPT_BINARY_MODE,
> +   "ASCII '0x00' is allowed in queries and '0x0D0A'('\\r\\n') sequence "
> +   "is not translated to '0x0A'('\\n') if 'mysql' is non-interactive and "
> +   "--binary-mode is set.",
> +&opt_binary_mode,&opt_binary_mode, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},

I think the option has to be more clearly explained. It's not obvious 
what the default behavior is, so I would suggest that we first describe 
that and then describe how the flag changes the behavior.

Maybe something like:

By default, ASCII '\0' is disallowed and '\r\n' is translated to '\n'. 
This switch turns off both these features in non-interactive mode (i.e., 
for input that is either piped to mysql or loaded using the "source" 
command). This is necessary when processing output from mysqlbinlog that 
may contain blobs.

>     { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
>   };
>
> @@ -1845,13 +1878,36 @@ static int read_and_execute(bool interac
>     ulong line_number=0;
>     bool ml_comment= 0;
>     COMMANDS *com;
> +  ulong line_length= 0;
>     status.exit_status=1;
>
>     for (;;)
>     {
> +
> +    real_binary_mode= !interactive&&  opt_binary_mode;

interactive and opt_binary_mode don't change in this function, so I 
think real_binary_mode can be set outside the loop.

>       if (!interactive)
>       {
> -      line=batch_readline(status.line_buff);
> +      line=batch_readline(status.line_buff, real_binary_mode);
> +      line_length= status.line_buff->read_length;
> +      /*
> +        ASCII 0x00 is not allowed appearing in queries if it is not in binary
> +        mode.
> +      */
> +      if (!real_binary_mode&&  line&&  strlen(line) != line_length)
> +      {
> +        status.exit_status= 1;
> +        String msg;
> +        msg.append("ASCII '0x00' appeared in the query.  It is disallowed unless "
> +                   "option --binary-mode is set and 'mysql' is non-interactive.  "
> +                   "Please set --binary-mode 1 if the ASCII '0x00' is expected.  "
> +                   "Query: '");
> +        msg.append(glob_buffer);
> +        msg.append(line);
> +        msg.append("'.");
> +        put_info(msg.c_ptr(), INFO_ERROR);
> +        break;
> +      }
> +
>         /*
>           Skip UTF8 Byte Order Marker (BOM) 0xEFBBBF.
>           Editors like "notepad" put this marker in
> @@ -1913,6 +1969,8 @@ static int read_and_execute(bool interac
>         */
>         if (opt_outfile&&  line)
>   	fprintf(OUTFILE, "%s\n", line);
> +
> +      line_length= line ? strlen(line) : 0;
>       }
>       // End of file or system error
>       if (!line)
> @@ -1929,7 +1987,7 @@ static int read_and_execute(bool interac
>         (We want to allow help, print and clear anywhere at line start
>       */
>       if ((named_cmds || glob_buffer.is_empty())
> -	&&  !ml_comment&&  !in_string&& 
> (com=find_command(line,0)))
> +	&&  !ml_comment&&  !in_string&&  (com=find_command(line)))
>       {
>         if ((*com->func)(&glob_buffer,line)>  0)
>   	break;
> @@ -1941,7 +1999,7 @@ static int read_and_execute(bool interac
>   #endif
>         continue;
>       }
> -    if (add_line(glob_buffer, line,&in_string,&ml_comment,
> +    if (add_line(glob_buffer, line, line_length,&in_string,&ml_comment,
>                    status.line_buff ? status.line_buff->truncated : 0))
>         break;
>     }
> @@ -1962,71 +2020,102 @@ static int read_and_execute(bool interac
>     buffer.free();
>     tmpbuf.free();
>   #endif
> -
> +  real_binary_mode= FALSE;

I think maybe we should add a comment explaining why we set 
real_binary_mode to FALSE (if this is a 'source' command, it means we 
return to interactive mode, so real_binary_mode should be FALSE. If this 
is not a 'source' command, then we are at the end of a piped file, so we 
are not going to read more commands and it is safe to set 
real_binary_mode to FALSE).

>     return status.exit_status;
>   }
>

Thanks for splitting the function below into two functions. Please, can 
you add comments that explain what each of them does? Both the purpose 
of the functions and how real_binary_mode affects the functions.

> +static COMMANDS *find_command(char cmd_char)
> +{
> +  DBUG_ENTER("find_command");
> +  DBUG_PRINT("enter", ("cmd_char: %d", cmd_char));
> +
> +  int index= -1;
> +  if (real_binary_mode)
> +  {
> +    if (cmd_char == 'C')
> +      index= charset_index;
> +  }
> +  else
> +    index= get_command_index(cmd_char);
> +
> +  if (index>= 0)
> +  {
> +    DBUG_PRINT("exit",("found command: %s", commands[index].name));
> +    DBUG_RETURN(&commands[index]);
> +  }
> +  else
> +    DBUG_RETURN((COMMANDS *) 0);
> +}
>
> -static COMMANDS *find_command(char *name,char cmd_char)
> +static COMMANDS *find_command(char *name)
>   {
>     uint len;
>     char *end;
>     DBUG_ENTER("find_command");
> -  DBUG_PRINT("enter",("name: '%s'  char: %d", name ? name : "NULL", cmd_char));
>
> -  if (!name)
> +  DBUG_ASSERT(name != NULL);
> +  DBUG_PRINT("enter", ("name: '%s'", name));
> +
> +  while (my_isspace(charset_info,*name))
> +    name++;
> +  /*
> +    If there is an \\g in the row or if the row has a delimiter but
> +    this is not a delimiter command, let add_line() take care of
> +    parsing the row and calling find_command()
> +  */
> +  if ((!real_binary_mode&&  strstr(name, "\\g")) ||
> +      (strstr(name, delimiter)&&
> +       !is_delimiter_command(name, DELIMITER_NAME_LEN)))
> +      DBUG_RETURN((COMMANDS *) 0);
> +
> +  if ((end=strcont(name," \t")))
>     {
> -    len=0;
> -    end=0;
> +    len=(uint) (end - name);
> +    while (my_isspace(charset_info,*end))
> +      end++;
> +    if (!*end)
> +      end=0;					// no arguments to function
> +  }
> +  else
> +    len=(uint) strlen(name);
> +
> +  int index= -1;
> +  if (real_binary_mode)
> +  {
> +    if (is_delimiter_command(name, len))
> +      index= delimiter_index;
>     }
>     else
>     {
> -    while (my_isspace(charset_info,*name))
> -      name++;
>       /*
> -      If there is an \\g in the row or if the row has a delimiter but
> -      this is not a delimiter command, let add_line() take care of
> -      parsing the row and calling find_command()
> +      All commands are in the first part of commands array and have a function
> +      to implemente it.
>       */
> -    if (strstr(name, "\\g") || (strstr(name, delimiter)&&
> -                                !(strlen(name)>= 9&&
> -                                  !my_strnncoll(&my_charset_latin1,
> -                                                (uchar*) name, 9,
> -                                                (const uchar*) "delimiter",
> -                                                9))))
> -      DBUG_RETURN((COMMANDS *) 0);
> -    if ((end=strcont(name," \t")))
> +    for (uint i= 0; commands[i].func; i++)
>       {
> -      len=(uint) (end - name);
> -      while (my_isspace(charset_info,*end))
> -	end++;
> -      if (!*end)
> -	end=0;					// no arguments to function
> +      if (commands[i].name[len] == '\0'&&
> +          !my_strnncoll(&my_charset_latin1, (uchar*)name, len,
> +                        (uchar*)commands[i].name,len)&&
> +          (!end || commands[i].takes_params))
> +      {
> +        index= i;
> +        break;
> +      }
>       }
> -    else
> -      len=(uint) strlen(name);
>     }
>
> -  for (uint i= 0; commands[i].name; i++)
> +  if (index>= 0)
>     {
> -    if (commands[i].func&&
> -	((name&&
> -	  !my_strnncoll(&my_charset_latin1, (uchar*)name, len,
> -				     (uchar*)commands[i].name,len)&&
> -	  !commands[i].name[len]&&
> -	  (!end || (end&&  commands[i].takes_params))) ||
> -	 (!name&&  commands[i].cmd_char == cmd_char)))
> -    {
> -      DBUG_PRINT("exit",("found command: %s", commands[i].name));
> -      DBUG_RETURN(&commands[i]);
> -    }
> +    DBUG_PRINT("exit",("found command: %s", commands[index].name));
> +    DBUG_RETURN(&commands[index]);
>     }
> +
>     DBUG_RETURN((COMMANDS *) 0);
>   }
>
>
> -static bool add_line(String&buffer,char *line,char *in_string,
> -                     bool *ml_comment, bool truncated)
> +static bool add_line(String&buffer, char *line, ulong line_length,
> +                     char *in_string, bool *ml_comment, bool truncated)
>   {
>     uchar inchar;
>     char buff[80], *pos, *out;
> @@ -2041,10 +2130,11 @@ static bool add_line(String&buffer,char
>     if (status.add_to_history&&  line[0]&&  not_in_history(line))
>       add_history(line);
>   #endif
> -  char *end_of_line=line+(uint) strlen(line);
> +  char *end_of_line= line + line_length;
>
> -  for (pos=out=line ; (inchar= (uchar) *pos) ; pos++)
> +  for (pos=out=line; pos<  end_of_line; pos++)
>     {
> +    inchar= (uchar) *pos;
>       if (!preserve_comments)
>       {
>         // Skip spaces at the beginning of a statement
> @@ -2084,7 +2174,7 @@ static bool add_line(String&buffer,char
>   	*out++= (char) inchar;
>   	continue;
>         }
> -      if ((com=find_command(NullS,(char) inchar)))
> +      if ((com=find_command((char) inchar)))
>         {
>           // Flush previously accepted characters
>           if (out != line)
> @@ -2160,7 +2250,7 @@ static bool add_line(String&buffer,char
>
>         pos--;
>
> -      if ((com= find_command(buffer.c_ptr(), 0)))
> +      if ((com= find_command(buffer.c_ptr())))
>         {
>
>           if ((*com->func)(&buffer, buffer.c_ptr())>  0)
> @@ -2275,9 +2365,7 @@ static bool add_line(String&buffer,char
>       uint length=(uint) (out-line);
>
>       if (!truncated&&
> -        (length<  9 ||
> -         my_strnncoll (charset_info,
> -                       (uchar *)line, 9, (const uchar *) "delimiter", 9)))
> +        (length<  9 || !is_delimiter_command(line, DELIMITER_NAME_LEN)))

I think you can replace the above line by:
  !is_delimiter_command(line, length)

>       {
>         /*
>           Don't add a new line in case there's a DELIMITER command to be
>
> === modified file 'client/readline.cc'
> --- a/client/readline.cc	2011-02-09 11:16:33 +0000
> +++ b/client/readline.cc	2011-02-17 02:39:58 +0000
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2000 MySQL AB
> +/* Copyright 2000, 2011, Oracle and/or its affiliates. All rights reserved.
>
>      This program is free software; you can redistribute it and/or modify
>      it under the terms of the GNU General Public License as published by
> @@ -52,7 +52,7 @@ LINE_BUFFER *batch_readline_init(ulong m
>   }
>
>
> -char *batch_readline(LINE_BUFFER *line_buff)
> +char *batch_readline(LINE_BUFFER *line_buff, bool binary_mode)
>   {
>     char *pos;
>     ulong out_length;
> @@ -60,10 +60,14 @@ char *batch_readline(LINE_BUFFER *line_b
>     if (!(pos=intern_read_line(line_buff,&out_length)))
>       return 0;
>     if (out_length&&  pos[out_length-1] == '\n')
> -    if (--out_length&&  pos[out_length-1] == '\r')  /* Remove '\n' */
> +  {
> +    out_length--;                                   /* Remove '\n' */
> +    if (!binary_mode&&  pos[out_length-1] == '\r')
>         out_length--;                                 /* Remove '\r' */
> +  }
>     line_buff->read_length=out_length;
>     pos[out_length]=0;
> +  DBUG_DUMP("Query: ", (unsigned char *)pos, out_length);
>     return pos;
>   }
>
> @@ -223,7 +227,7 @@ char *intern_read_line(LINE_BUFFER *buff
>     for (;;)
>     {
>       pos=buffer->end_of_line;
> -    while (*pos != '\n'&&  *pos)
> +    while (*pos != '\n'&&  pos != buffer->end)
>         pos++;
>       if (pos == buffer->end)
>       {
> @@ -249,6 +253,7 @@ char *intern_read_line(LINE_BUFFER *buff
>         buffer->truncated= 0;
>       buffer->end_of_line=pos+1;
>       *out_length=(ulong) (pos + 1 - buffer->eof - buffer->start_of_line);
> +    DBUG_DUMP("Query: ", (unsigned char *)buffer->start_of_line, *out_length);
>       DBUG_RETURN(buffer->start_of_line);
>     }
>   }
>
> === added file 'mysql-test/r/mysql_binary_mode.result'
> Binary files a/mysql-test/r/mysql_binary_mode.result	1970-01-01 00:00:00 +0000 and
> b/mysql-test/r/mysql_binary_mode.result	2011-02-17 02:39:58 +0000 differ
>
> === added file 'mysql-test/t/mysql_binary_mode.test'
> --- a/mysql-test/t/mysql_binary_mode.test	1970-01-01 00:00:00 +0000
> +++ b/mysql-test/t/mysql_binary_mode.test	2011-02-17 02:39:58 +0000
> @@ -0,0 +1,45 @@
> +source include/have_binlog_format_statement.inc;
> +
> +--echo # Bug#33048 Not able to recover binary/blob data correctly using mysqlbinlog
> +--echo # --------------------------------------------------------------------------
> +--echo # The test verify that 0x00 and 0x0D0A sequence can be handled correctly by
> +--echo # mysql
> +--echo
> +let $table_name= `SELECT 0x410D0A42`;
> +eval CREATE TABLE `$table_name` (c1 CHAR(100));
> +
> +let $char= `SELECT 0x410042`;
> +eval INSERT INTO `$table_name` VALUES("$char");
> +
> +let $char= `SELECT 0x410D0A42`;
> +eval INSERT INTO `$table_name` VALUES("$char");
> +
> +eval SELECT HEX(c1) FROM `$table_name`;
> +
> +--echo
> +FLUSH LOGS;
> +eval DROP TABLE `$table_name`;
> +
> +--echo
> +let $MYSQLD_DATADIR= `SELECT @@datadir`;
> +--exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000001> 
> $MYSQLTEST_VARDIR/tmp/my.sql
> +
> +--echo # '--exec mysql ...' without --binary-mode option
> +--echo # It creates the table with a wrong table name and generates an error.
> +--error 1
> +--exec $MYSQL test<  $MYSQLTEST_VARDIR/tmp/my.sql 2>&1
> +
> +--echo
> +--echo # It is not in binary_mode, so table name '0x410D0A42' is translated to
> +--echo # '0x410A42' by mysql.
> +let $table_name1= `SELECT 0x410A42`;
> +eval DROP TABLE `$table_name1`;
> +
> +--echo
> +--echo # In binary_mode, table name '0x410D0A42' and string '0x410042' can be
> +--echo # handled correctly.
> +--exec $MYSQL --binary-mode test<  $MYSQLTEST_VARDIR/tmp/my.sql
> +eval SELECT HEX(c1) FROM `$table_name`;
> +
> +--echo
> +eval DROP TABLE `$table_name`;
>
>
>
>
>

Thread
bzr commit into mysql-trunk branch (anders.song:3653) Bug#11747577Libing Song17 Feb
  • Re: bzr commit into mysql-trunk branch (anders.song:3653) Bug#11747577Sven Sandberg17 Feb