List:Internals« Previous MessageNext Message »
From:Arjen Lentz Date:December 2 2005 1:11am
Subject:Re: Speed patch for escape_string_for_mysql (libmysys)
View as plain text  
Hi Stephan,

saf wrote:
> I'm developper at the NOOFS project (filesystem which saves its data in a SQL
> database).
> I needed more speed for the function escape_string_for_mysql(...), so I changed the
> source code.

Mark Matthews and Konstantin Osipov are looking at it.
You'll hear back from them.

Regards,
Arjen.


> My local benchmark tests results was that this function is now two times faster
> (without multi-byte 
> characters) than before. I couldn't test it with multi-byte characters, if somebody
> at MySQL
> could do it, it would be nice. "Normally" it should work with the two.
> 
> I know that this new code is "ugly", but there was no other choice to have more
> speed. It's possible to 
> make a beautifuller code, but the function will loss speed.
> 
> The patch is for MySQL 5.0.16.
> 
> Here my patch, if it works with Multi-Byte characters, please apply it:
> 
> diff -Naur ../old/mysql-5.0.16/mysys/charset.c mysql-5.0.16/mysys/charset.c
> --- ../old/mysql-5.0.16/mysys/charset.c	2005-11-15 01:12:37.000000000 +0100
> +++ mysql-5.0.16/mysys/charset.c	2005-12-01 00:11:37.000000000 +0100
> @@ -596,95 +596,182 @@
>      >=0         The length of the escaped string
>  */
>  
> -ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
> -                              char *to, ulong to_length,
> -                              const char *from, ulong length)
> -{
> -  const char *to_start= to;
> -  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
> -  my_bool overflow= FALSE;
>  #ifdef USE_MB
> -  my_bool use_mb_flag= use_mb(charset_info);
> -#endif
> -  for (end= from + length; from < end; from++)
> -  {
> -    char escape= 0;
> -#ifdef USE_MB
> -    int tmp_length;
> -    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
> +
> +/* High speed function if no multi-byte character flag */
> +static ulong escape_string_for_mysql_no_mb(char *to, const char *from, ulong
> length)
> +{
> +  ulong i, u;
> +
> +  for (i = 0, u = 0; i < length; i++)
>      {
> -      if (to + tmp_length > to_end)
> -      {
> -        overflow= TRUE;
> -        break;
> -      }
> -      while (tmp_length--)
> -	*to++= *from++;
> -      from--;
> -      continue;
> +      if (!from[i])
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '0';
> +	  continue;
> +	}
> +      if (from[i] == '\n')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= 'n';
> +	  continue;
> +	}
> +      if (from[i] == '\r')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= 'r';
> +	  continue;
> +	}
> +      if (from[i] == '\\')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '\\';
> +	  continue;
> +	}
> +      if (from[i] == '\'')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '\'';
> +	  continue;
> +	}
> +      if (from[i] == '"')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '"';
> +	  continue;
> +	}
> +      if (from[i] == '\032')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= 'Z';
> +	  continue;
> +	}
> +      to[u++]= from[i];
>      }
> -    /*
> -     If the next character appears to begin a multi-byte character, we
> -     escape that first byte of that apparent multi-byte character. (The
> -     character just looks like a multi-byte character -- if it were actually
> -     a multi-byte character, it would have been passed through in the test
> -     above.)
> -
> -     Without this check, we can create a problem by converting an invalid
> -     multi-byte character into a valid one. For example, 0xbf27 is not
> -     a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
> -    */
> -    if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) >
> 1)
> -      escape= *from;
> -    else
> -#endif
> -    switch (*from) {
> -    case 0:				/* Must be escaped for 'mysql' */
> -      escape= '0';
> -      break;
> -    case '\n':				/* Must be escaped for logs */
> -      escape= 'n';
> -      break;
> -    case '\r':
> -      escape= 'r';
> -      break;
> -    case '\\':
> -      escape= '\\';
> -      break;
> -    case '\'':
> -      escape= '\'';
> -      break;
> -    case '"':				/* Better safe than sorry */
> -      escape= '"';
> -      break;
> -    case '\032':			/* This gives problems on Win32 */
> -      escape= 'Z';
> -      break;
> -    }
> -    if (escape)
> +  to[u]= 0;
> +  return (u); 
> +}
> +
> +#define CHECK_AND_ESCAPE(src, replace_token) \
> +      if (from[i] == src) \
> +	{ \
> +	  to[u++]= '\\'; \
> +	  to[u++]= replace_token; \
> +	  i++; \
> +	  continue; \
> +	}
> +
> +/* We can't detect overflow on the to string, it depends how much
> +   memory the client has allocated. */
> +static escape_string_for_mysql_mb(CHARSET_INFO *charset_info,
> +				  char *to, const char *from, ulong length)
> +{
> +  ulong i, u;
> +  const char *end;
> +  
> +  end = from + length;
> +  for (i = 0, u = 0; i < length;)
>      {
> -      if (to + 2 > to_end)
> -      {
> -        overflow= TRUE;
> -        break;
> -      }
> -      *to++= '\\';
> -      *to++= escape;
> +      int		tmp_length;
> +
> +      if ((tmp_length= my_ismbchar(charset_info, from, end)))
> +	{
> +	  if (tmp_length)
> +	    {
> +	      if (i + tmp_length > length)
> +		return (ulong)~0;
> +	      while (tmp_length--)
> +		to[u++]= from[i++];
> +	    }
> +	  continue;
> +	}
> +      CHECK_AND_ESCAPE(0, '0');
> +      CHECK_AND_ESCAPE('\n', 'n');
> +      CHECK_AND_ESCAPE('\r', 'r');
> +      CHECK_AND_ESCAPE('\\', '\\');
> +      CHECK_AND_ESCAPE('\'', '\'');
> +      CHECK_AND_ESCAPE('"', '"');
> +      CHECK_AND_ESCAPE('\032', 'Z');
> +
> +      /* Normal behaviour, only copy */
> +      to[u++]= from[i];
> +      i++;
>      }
> -    else
> +  to[u]= 0;
> +  return (u);
> +}
> +
> +ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
> +                              char *to, ulong to_length,
> +                              const char *from, ulong length)
> +{
> +  my_bool use_mb_flag= use_mb(charset_info);
> +
> +  if (!use_mb_flag)
> +    return escape_string_for_mysql_no_mb(to, from, length);
> +  return escape_string_for_mysql_mb(charset_info, to, from, length);
> +}
> +
> +#else
> +
> +/* high optimization without MB */
> +ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
> +                              char *to, ulong to_length,
> +                              const char *from, ulong length)
> +{
> +  ulong i, u;
> +
> +  for (i = 0, u = 0; i < length; i++)
>      {
> -      if (to + 1 > to_end)
> -      {
> -        overflow= TRUE;
> -        break;
> -      }
> -      *to++= *from;
> +      if (!from[i])
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '0';
> +	  continue;
> +	}
> +      if (from[i] == '\n')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= 'n';
> +	  continue;
> +	}
> +      if (from[i] == '\r')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= 'r';
> +	  continue;
> +	}
> +      if (from[i] == '\\')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '\\';
> +	  continue;
> +	}
> +      if (from[i] == '\'')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '\'';
> +	  continue;
> +	}
> +      if (from[i] == '"')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= '"';
> +	  continue;
> +	}
> +      if (from[i] == '\032')
> +	{
> +	  to[u++]= '\\';
> +	  to[u++]= 'Z';
> +	  continue;
> +	}
> +      to[u++]= from[i];
>      }
> -  }
> -  *to= 0;
> -  return overflow ? (ulong)~0 : (ulong) (to - to_start);
> +  to[u]= 0;
> +  return (u); 
>  }
> -
> +#endif
>  
>  #ifdef BACKSLASH_MBTAIL
>  static CHARSET_INFO *fs_cset_cache= NULL;
> 


-- 
Arjen Lentz, Community Relations Manager, MySQL AB
Program Chair, MySQL Users Conference

MySQL Users Conference 2006 (Santa Clara CA, 24-27 April)
http://www.mysqluc.com/
Thread
Speed patch for escape_string_for_mysql (libmysys)(saf)1 Dec
  • Re: Speed patch for escape_string_for_mysql (libmysys)Olaf van der Spek1 Dec
  • Re: Speed patch for escape_string_for_mysql (libmysys)Arjen Lentz2 Dec
    • Re: Speed patch for escape_string_for_mysql (libmysys)(saf)2 Dec