Hi,
I'm developper at the NOOFS project (filesystem which saves its data in a SQL database).
I needed more speed for the function escape_string_for_mysql(...), so I changed the source
code.
My local benchmark tests results was that this function is now two times faster (without
multi-byte
characters) than before. I couldn't test it with multi-byte characters, if somebody at
MySQL
could do it, it would be nice. "Normally" it should work with the two.
I know that this new code is "ugly", but there was no other choice to have more speed.
It's possible to
make a beautifuller code, but the function will loss speed.
The patch is for MySQL 5.0.16.
Here my patch, if it works with Multi-Byte characters, please apply it:
diff -Naur ../old/mysql-5.0.16/mysys/charset.c mysql-5.0.16/mysys/charset.c
--- ../old/mysql-5.0.16/mysys/charset.c 2005-11-15 01:12:37.000000000 +0100
+++ mysql-5.0.16/mysys/charset.c 2005-12-01 00:11:37.000000000 +0100
@@ -596,95 +596,182 @@
>=0 The length of the escaped string
*/
-ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
- char *to, ulong to_length,
- const char *from, ulong length)
-{
- const char *to_start= to;
- const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
- my_bool overflow= FALSE;
#ifdef USE_MB
- my_bool use_mb_flag= use_mb(charset_info);
-#endif
- for (end= from + length; from < end; from++)
- {
- char escape= 0;
-#ifdef USE_MB
- int tmp_length;
- if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
+
+/* High speed function if no multi-byte character flag */
+static ulong escape_string_for_mysql_no_mb(char *to, const char *from, ulong length)
+{
+ ulong i, u;
+
+ for (i = 0, u = 0; i < length; i++)
{
- if (to + tmp_length > to_end)
- {
- overflow= TRUE;
- break;
- }
- while (tmp_length--)
- *to++= *from++;
- from--;
- continue;
+ if (!from[i])
+ {
+ to[u++]= '\\';
+ to[u++]= '0';
+ continue;
+ }
+ if (from[i] == '\n')
+ {
+ to[u++]= '\\';
+ to[u++]= 'n';
+ continue;
+ }
+ if (from[i] == '\r')
+ {
+ to[u++]= '\\';
+ to[u++]= 'r';
+ continue;
+ }
+ if (from[i] == '\\')
+ {
+ to[u++]= '\\';
+ to[u++]= '\\';
+ continue;
+ }
+ if (from[i] == '\'')
+ {
+ to[u++]= '\\';
+ to[u++]= '\'';
+ continue;
+ }
+ if (from[i] == '"')
+ {
+ to[u++]= '\\';
+ to[u++]= '"';
+ continue;
+ }
+ if (from[i] == '\032')
+ {
+ to[u++]= '\\';
+ to[u++]= 'Z';
+ continue;
+ }
+ to[u++]= from[i];
}
- /*
- If the next character appears to begin a multi-byte character, we
- escape that first byte of that apparent multi-byte character. (The
- character just looks like a multi-byte character -- if it were actually
- a multi-byte character, it would have been passed through in the test
- above.)
-
- Without this check, we can create a problem by converting an invalid
- multi-byte character into a valid one. For example, 0xbf27 is not
- a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
- */
- if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
- escape= *from;
- else
-#endif
- switch (*from) {
- case 0: /* Must be escaped for 'mysql' */
- escape= '0';
- break;
- case '\n': /* Must be escaped for logs */
- escape= 'n';
- break;
- case '\r':
- escape= 'r';
- break;
- case '\\':
- escape= '\\';
- break;
- case '\'':
- escape= '\'';
- break;
- case '"': /* Better safe than sorry */
- escape= '"';
- break;
- case '\032': /* This gives problems on Win32 */
- escape= 'Z';
- break;
- }
- if (escape)
+ to[u]= 0;
+ return (u);
+}
+
+#define CHECK_AND_ESCAPE(src, replace_token) \
+ if (from[i] == src) \
+ { \
+ to[u++]= '\\'; \
+ to[u++]= replace_token; \
+ i++; \
+ continue; \
+ }
+
+/* We can't detect overflow on the to string, it depends how much
+ memory the client has allocated. */
+static escape_string_for_mysql_mb(CHARSET_INFO *charset_info,
+ char *to, const char *from, ulong length)
+{
+ ulong i, u;
+ const char *end;
+
+ end = from + length;
+ for (i = 0, u = 0; i < length;)
{
- if (to + 2 > to_end)
- {
- overflow= TRUE;
- break;
- }
- *to++= '\\';
- *to++= escape;
+ int tmp_length;
+
+ if ((tmp_length= my_ismbchar(charset_info, from, end)))
+ {
+ if (tmp_length)
+ {
+ if (i + tmp_length > length)
+ return (ulong)~0;
+ while (tmp_length--)
+ to[u++]= from[i++];
+ }
+ continue;
+ }
+ CHECK_AND_ESCAPE(0, '0');
+ CHECK_AND_ESCAPE('\n', 'n');
+ CHECK_AND_ESCAPE('\r', 'r');
+ CHECK_AND_ESCAPE('\\', '\\');
+ CHECK_AND_ESCAPE('\'', '\'');
+ CHECK_AND_ESCAPE('"', '"');
+ CHECK_AND_ESCAPE('\032', 'Z');
+
+ /* Normal behaviour, only copy */
+ to[u++]= from[i];
+ i++;
}
- else
+ to[u]= 0;
+ return (u);
+}
+
+ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
+ char *to, ulong to_length,
+ const char *from, ulong length)
+{
+ my_bool use_mb_flag= use_mb(charset_info);
+
+ if (!use_mb_flag)
+ return escape_string_for_mysql_no_mb(to, from, length);
+ return escape_string_for_mysql_mb(charset_info, to, from, length);
+}
+
+#else
+
+/* high optimization without MB */
+ulong escape_string_for_mysql(CHARSET_INFO *charset_info,
+ char *to, ulong to_length,
+ const char *from, ulong length)
+{
+ ulong i, u;
+
+ for (i = 0, u = 0; i < length; i++)
{
- if (to + 1 > to_end)
- {
- overflow= TRUE;
- break;
- }
- *to++= *from;
+ if (!from[i])
+ {
+ to[u++]= '\\';
+ to[u++]= '0';
+ continue;
+ }
+ if (from[i] == '\n')
+ {
+ to[u++]= '\\';
+ to[u++]= 'n';
+ continue;
+ }
+ if (from[i] == '\r')
+ {
+ to[u++]= '\\';
+ to[u++]= 'r';
+ continue;
+ }
+ if (from[i] == '\\')
+ {
+ to[u++]= '\\';
+ to[u++]= '\\';
+ continue;
+ }
+ if (from[i] == '\'')
+ {
+ to[u++]= '\\';
+ to[u++]= '\'';
+ continue;
+ }
+ if (from[i] == '"')
+ {
+ to[u++]= '\\';
+ to[u++]= '"';
+ continue;
+ }
+ if (from[i] == '\032')
+ {
+ to[u++]= '\\';
+ to[u++]= 'Z';
+ continue;
+ }
+ to[u++]= from[i];
}
- }
- *to= 0;
- return overflow ? (ulong)~0 : (ulong) (to - to_start);
+ to[u]= 0;
+ return (u);
}
-
+#endif
#ifdef BACKSLASH_MBTAIL
static CHARSET_INFO *fs_cset_cache= NULL;
--
Best regards,
Stephan FERRARO
NOOFS Core Developper - http://www.noofs.org/
GnuPG public key: gpg --keyserver www.keyserver.net --recv-key 94B2664F
Attachment: [application/pgp-signature] Digital signature signature.asc