List:Internals« Previous MessageNext Message »
From:saf Date:November 30 2005 11:37pm
Subject:Speed patch for escape_string_for_mysql (libmysys)
View as plain text  
Hi,

I'm developper at the NOOFS project (filesystem which saves its data in a SQL database).
I needed more speed for the function escape_string_for_mysql(...), so I changed the source
code.

My local benchmark tests results was that this function is now two times faster (without
multi-byte 
characters) than before. I couldn't test it with multi-byte characters, if somebody at
MySQL
could do it, it would be nice. "Normally" it should work with the two.

I know that this new code is "ugly", but there was no other choice to have more speed.
It's possible to 
make a beautifuller code, but the function will loss speed.

The patch is for MySQL 5.0.16.

Here my patch, if it works with Multi-Byte characters, please apply it:

diff -Naur ../old/mysql-5.0.16/mysys/charset.c mysql-5.0.16/mysys/charset.c
--- ../old/mysql-5.0.16/mysys/charset.c	2005-11-15 01:12:37.000000000 +0100
+++ mysql-5.0.16/mysys/charset.c	2005-12-01 00:11:37.000000000 +0100
@@ -596,95 +596,182 @@
     >=0         The length of the escaped string
 */
 
-ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
-                              char *to, ulong to_length,
-                              const char *from, ulong length)
-{
-  const char *to_start= to;
-  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
-  my_bool overflow= FALSE;
 #ifdef USE_MB
-  my_bool use_mb_flag= use_mb(charset_info);
-#endif
-  for (end= from + length; from < end; from++)
-  {
-    char escape= 0;
-#ifdef USE_MB
-    int tmp_length;
-    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
+
+/* High speed function if no multi-byte character flag */
+static ulong escape_string_for_mysql_no_mb(char *to, const char *from, ulong length)
+{
+  ulong i, u;
+
+  for (i = 0, u = 0; i < length; i++)
     {
-      if (to + tmp_length > to_end)
-      {
-        overflow= TRUE;
-        break;
-      }
-      while (tmp_length--)
-	*to++= *from++;
-      from--;
-      continue;
+      if (!from[i])
+	{
+	  to[u++]= '\\';
+	  to[u++]= '0';
+	  continue;
+	}
+      if (from[i] == '\n')
+	{
+	  to[u++]= '\\';
+	  to[u++]= 'n';
+	  continue;
+	}
+      if (from[i] == '\r')
+	{
+	  to[u++]= '\\';
+	  to[u++]= 'r';
+	  continue;
+	}
+      if (from[i] == '\\')
+	{
+	  to[u++]= '\\';
+	  to[u++]= '\\';
+	  continue;
+	}
+      if (from[i] == '\'')
+	{
+	  to[u++]= '\\';
+	  to[u++]= '\'';
+	  continue;
+	}
+      if (from[i] == '"')
+	{
+	  to[u++]= '\\';
+	  to[u++]= '"';
+	  continue;
+	}
+      if (from[i] == '\032')
+	{
+	  to[u++]= '\\';
+	  to[u++]= 'Z';
+	  continue;
+	}
+      to[u++]= from[i];
     }
-    /*
-     If the next character appears to begin a multi-byte character, we
-     escape that first byte of that apparent multi-byte character. (The
-     character just looks like a multi-byte character -- if it were actually
-     a multi-byte character, it would have been passed through in the test
-     above.)
-
-     Without this check, we can create a problem by converting an invalid
-     multi-byte character into a valid one. For example, 0xbf27 is not
-     a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
-    */
-    if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
-      escape= *from;
-    else
-#endif
-    switch (*from) {
-    case 0:				/* Must be escaped for 'mysql' */
-      escape= '0';
-      break;
-    case '\n':				/* Must be escaped for logs */
-      escape= 'n';
-      break;
-    case '\r':
-      escape= 'r';
-      break;
-    case '\\':
-      escape= '\\';
-      break;
-    case '\'':
-      escape= '\'';
-      break;
-    case '"':				/* Better safe than sorry */
-      escape= '"';
-      break;
-    case '\032':			/* This gives problems on Win32 */
-      escape= 'Z';
-      break;
-    }
-    if (escape)
+  to[u]= 0;
+  return (u); 
+}
+
+#define CHECK_AND_ESCAPE(src, replace_token) \
+      if (from[i] == src) \
+	{ \
+	  to[u++]= '\\'; \
+	  to[u++]= replace_token; \
+	  i++; \
+	  continue; \
+	}
+
+/* We can't detect overflow on the to string, it depends how much
+   memory the client has allocated. */
+static escape_string_for_mysql_mb(CHARSET_INFO *charset_info,
+				  char *to, const char *from, ulong length)
+{
+  ulong i, u;
+  const char *end;
+  
+  end = from + length;
+  for (i = 0, u = 0; i < length;)
     {
-      if (to + 2 > to_end)
-      {
-        overflow= TRUE;
-        break;
-      }
-      *to++= '\\';
-      *to++= escape;
+      int		tmp_length;
+
+      if ((tmp_length= my_ismbchar(charset_info, from, end)))
+	{
+	  if (tmp_length)
+	    {
+	      if (i + tmp_length > length)
+		return (ulong)~0;
+	      while (tmp_length--)
+		to[u++]= from[i++];
+	    }
+	  continue;
+	}
+      CHECK_AND_ESCAPE(0, '0');
+      CHECK_AND_ESCAPE('\n', 'n');
+      CHECK_AND_ESCAPE('\r', 'r');
+      CHECK_AND_ESCAPE('\\', '\\');
+      CHECK_AND_ESCAPE('\'', '\'');
+      CHECK_AND_ESCAPE('"', '"');
+      CHECK_AND_ESCAPE('\032', 'Z');
+
+      /* Normal behaviour, only copy */
+      to[u++]= from[i];
+      i++;
     }
-    else
+  to[u]= 0;
+  return (u);
+}
+
+ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
+                              char *to, ulong to_length,
+                              const char *from, ulong length)
+{
+  my_bool use_mb_flag= use_mb(charset_info);
+
+  if (!use_mb_flag)
+    return escape_string_for_mysql_no_mb(to, from, length);
+  return escape_string_for_mysql_mb(charset_info, to, from, length);
+}
+
+#else
+
+/* high optimization without MB */
+ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
+                              char *to, ulong to_length,
+                              const char *from, ulong length)
+{
+  ulong i, u;
+
+  for (i = 0, u = 0; i < length; i++)
     {
-      if (to + 1 > to_end)
-      {
-        overflow= TRUE;
-        break;
-      }
-      *to++= *from;
+      if (!from[i])
+	{
+	  to[u++]= '\\';
+	  to[u++]= '0';
+	  continue;
+	}
+      if (from[i] == '\n')
+	{
+	  to[u++]= '\\';
+	  to[u++]= 'n';
+	  continue;
+	}
+      if (from[i] == '\r')
+	{
+	  to[u++]= '\\';
+	  to[u++]= 'r';
+	  continue;
+	}
+      if (from[i] == '\\')
+	{
+	  to[u++]= '\\';
+	  to[u++]= '\\';
+	  continue;
+	}
+      if (from[i] == '\'')
+	{
+	  to[u++]= '\\';
+	  to[u++]= '\'';
+	  continue;
+	}
+      if (from[i] == '"')
+	{
+	  to[u++]= '\\';
+	  to[u++]= '"';
+	  continue;
+	}
+      if (from[i] == '\032')
+	{
+	  to[u++]= '\\';
+	  to[u++]= 'Z';
+	  continue;
+	}
+      to[u++]= from[i];
     }
-  }
-  *to= 0;
-  return overflow ? (ulong)~0 : (ulong) (to - to_start);
+  to[u]= 0;
+  return (u); 
 }
-
+#endif
 
 #ifdef BACKSLASH_MBTAIL
 static CHARSET_INFO *fs_cset_cache= NULL;

-- 
Best regards,
Stephan FERRARO
NOOFS Core Developper - http://www.noofs.org/
GnuPG public key: gpg --keyserver www.keyserver.net --recv-key 94B2664F

Attachment: [application/pgp-signature] Digital signature signature.asc
Thread
Speed patch for escape_string_for_mysql (libmysys)(saf)1 Dec
  • Re: Speed patch for escape_string_for_mysql (libmysys)Olaf van der Spek1 Dec
  • Re: Speed patch for escape_string_for_mysql (libmysys)Arjen Lentz2 Dec
    • Re: Speed patch for escape_string_for_mysql (libmysys)(saf)2 Dec