Below is the list of changes that have just been committed into a local
5.0 repository of tomash. When tomash does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2168 06/06/22 14:01:23 kroki@stripped +3 -0
Bug#15811: extremely long time for mysql client to execute long INSERT
The problem was in redundant calls to strlen() in string compare functions,
where we may then return after checking only the small number of characters.
NOTE FOR THE REVIEWER:
Kostja: I suggest Bar should do the review (too).
Bar:
Perhaps my_strcasecmp_mb() has a bug, as it never returns negative number.
At least its name suggests the semantics of strcmp(), if it isn't so, then
it should at least have a comment for the return value. Actually, original
implementation had 'return *t;', where 't' is 'char *', so its sign was
undefined when casting to 'int'. I changed this to 'return (*t != 0);'.
Please let me know whether this is a bug, so I will fix it as a part of this
patch.
strings/ctype-utf8.c
1.99 06/06/22 14:01:17 kroki@stripped +55 -5
Implement my_caseup_str_utf8() and my_casedn_str_utf8() as a simplified
versions of my_caseup_utf8() and my_casedn_utf8() respectively. Do not
call strlen() where sliding window of at least one multi-byte
character is sufficient.
Fix CHARSET_INFO::mbmaxlen for UTF-8 to be 6 bytes if UNICODE_32BIT is
defined.
strings/ctype-ucs2.c
1.62 06/06/22 14:01:16 kroki@stripped +26 -18
Do not call strlen() where sliding window of at least one multi-byte
character is sufficient.
Remove now unused my_strncasecmp_ucs2().
strings/ctype-mb.c
1.48 06/06/22 14:01:16 kroki@stripped +51 -5
Do not call strlen() where sliding window of at least one multi-byte
character is sufficient.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: kroki
# Host: moonlight.intranet
# Root: /home/tomash/src/mysql_ab/mysql-5.0-bug15811
--- 1.61/strings/ctype-ucs2.c 2006-03-30 17:14:51 +04:00
+++ 1.62/strings/ctype-ucs2.c 2006-06-22 14:01:16 +04:00
@@ -310,15 +310,26 @@
}
-static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
- const char *s, const char *t, uint len)
+static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
- const char *se=s+len;
- const char *te=t+len;
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-
+ /*
+ Since we process the strings one (multi-byte) character at a time,
+ 'se' and 'te' point either cs->mbmaxlen chars forward from the
+ current positions 's' and 't', or at the end of the corresponding
+ string if it is within cs->mbmaxlen chars window.
+ */
+ const char *se=s;
+ const char *te=t;
+ int i;
+
+ for (i= 0; i < cs->mbmaxlen && *se; ++i)
+ ++se;
+ for (i= 0; i < cs->mbmaxlen && *te; ++i)
+ ++te;
+
while ( s < se && t < te )
{
int plane;
@@ -342,21 +353,21 @@
return ((int) s_wc) - ((int) t_wc);
s+=s_res;
+ for (i= 0; i < s_res && *se; ++i)
+ ++se;
+
t+=t_res;
+ for (i= 0; i < t_res && *te; ++i)
+ ++te;
}
+ /*
+ At least one of the expressions is zero, and we are interesed only
+ in the sign of the result.
+ */
return (int) ( (se-s) - (te-t) );
}
-static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
-{
- uint s_len= (uint) strlen(s);
- uint t_len= (uint) strlen(t);
- uint len = (s_len > t_len) ? s_len : t_len;
- return my_strncasecmp_ucs2(cs, s, t, len);
-}
-
-
static int my_strnxfrm_ucs2(CHARSET_INFO *cs,
uchar *dst, uint dstlen, const uchar *src, uint srclen)
{
@@ -1423,10 +1434,7 @@
static
int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
{
- uint s_len= (uint) strlen(s);
- uint t_len= (uint) strlen(t);
- uint len = (s_len > t_len) ? s_len : t_len;
- return my_strncasecmp_ucs2(cs, s, t, len);
+ return my_strcasecmp_ucs2(cs, s, t);
}
--- 1.47/strings/ctype-mb.c 2005-10-06 16:40:13 +04:00
+++ 1.48/strings/ctype-mb.c 2006-06-22 14:01:16 +04:00
@@ -24,17 +24,32 @@
void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
- register char *end=str+strlen(str); /* BAR TODO: remove strlen() call */
register uchar *map=cs->to_upper;
+ /*
+ Since we process the string one (multi-byte) character at a time,
+ 'end' points either cs->mbmaxlen chars forward from the current
+ position 'str', or at the end of the string if it is within
+ cs->mbmaxlen chars window.
+ */
+ register char *end=str;
+
+ for (l= 0; l < cs->mbmaxlen && *end; ++l)
+ ++end;
while (*str)
{
if ((l=my_ismbchar(cs, str,end)))
+ {
str+=l;
+ while (l-- && *end)
+ ++end;
+ }
else
{
*str=(char) map[(uchar)*str];
str++;
+ if (*end)
+ ++end;
}
}
}
@@ -42,17 +57,32 @@
void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
- register char *end=str+strlen(str);
register uchar *map=cs->to_lower;
+ /*
+ Since we process the string one (multi-byte) character at a time,
+ 'end' points either cs->mbmaxlen chars forward from the current
+ position 'str', or at the end of the string if it is within
+ cs->mbmaxlen chars window.
+ */
+ register char *end=str;
+
+ for (l= 0; l < cs->mbmaxlen && *end; ++l)
+ ++end;
while (*str)
{
if ((l=my_ismbchar(cs, str,end)))
+ {
str+=l;
+ while (l-- && *end)
+ ++end;
+ }
else
{
*str=(char) map[(uchar)*str];
str++;
+ if (*end)
+ ++end;
}
}
}
@@ -104,23 +134,39 @@
int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
{
register uint32 l;
- register const char *end=s+strlen(s);
register uchar *map=cs->to_upper;
-
+ /*
+ Since we process the string one (multi-byte) character at a time,
+ 'end' points either cs->mbmaxlen chars forward from the current
+ position 's', or at the end of the string if it is within
+ cs->mbmaxlen chars window.
+ */
+ register const char *end=s;
+
+ for (l= 0; l < cs->mbmaxlen && *end; ++l)
+ ++end;
+
while (s<end)
{
if ((l=my_ismbchar(cs, s,end)))
{
while (l--)
+ {
if (*s++ != *t++)
return 1;
+
+ if (*end)
+ ++end;
+ }
}
else if (my_mbcharlen(cs, *t) > 1)
return 1;
else if (map[(uchar) *s++] != map[(uchar) *t++])
return 1;
+ else if (*end)
+ ++end;
}
- return *t;
+ return (*t != 0);
}
--- 1.98/strings/ctype-utf8.c 2006-03-23 11:37:51 +03:00
+++ 1.99/strings/ctype-utf8.c 2006-06-22 14:01:17 +04:00
@@ -2143,8 +2143,27 @@
static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
{
- uint len= (uint) strlen(s);
- my_caseup_utf8(cs, s, len, s, len);
+ my_wc_t wc;
+ int res;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ char *end= s;
+ DBUG_ASSERT(cs->casedn_multiply == 1);
+
+ for (res= 0; res < cs->mbmaxlen && *end; ++res)
+ ++end;
+
+ while (s < end &&
+ (res= my_utf8_uni(cs, &wc, (uchar*) s, (uchar*) end)) > 0)
+ {
+ int res2;
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ res2= my_uni_utf8(cs, wc, (uchar*) s, (uchar*) end);
+ DBUG_ASSERT(res2 == res);
+ s+= res;
+ while (res-- && *end)
+ ++end;
+ }
}
@@ -2172,8 +2191,27 @@
static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
{
- uint len= (uint) strlen(s);
- my_casedn_utf8(cs, s, len, s, len);
+ my_wc_t wc;
+ int res;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ char *end= s;
+ DBUG_ASSERT(cs->casedn_multiply == 1);
+
+ for (res= 0; res < cs->mbmaxlen && *end; ++res)
+ ++end;
+
+ while (s < end &&
+ (res= my_utf8_uni(cs, &wc, (uchar*) s, (uchar*) end)) > 0)
+ {
+ int res2;
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ res2= my_uni_utf8(cs, wc, (uchar*) s, (uchar*) end);
+ DBUG_ASSERT(res2 == res);
+ s+= res;
+ while (res-- && *end)
+ ++end;
+ }
}
@@ -2576,7 +2614,11 @@
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
+#ifdef UNICODE_32BIT
+ 6, /* mbmaxlen */
+#else
3, /* mbmaxlen */
+#endif
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
@@ -2609,7 +2651,11 @@
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
+#ifdef UNICODE_32BIT
+ 6, /* mbmaxlen */
+#else
3, /* mbmaxlen */
+#endif
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
@@ -2780,7 +2826,11 @@
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
- 3, /* mbmaxlen */
+#ifdef UNICODE_32BIT
+ 6, /* mbmaxlen */
+#else
+ 3, /* mbmaxlen */
+#endif
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
Thread |
---|
• bk commit into 5.0 tree (kroki:1.2168) BUG#15811 | kroki | 22 Jun |