List:Commits« Previous MessageNext Message »
From:kroki Date:June 22 2006 10:01am
Subject:bk commit into 5.0 tree (kroki:1.2168) BUG#15811
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of tomash. When tomash does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2168 06/06/22 14:01:23 kroki@stripped +3 -0
  Bug#15811: extremely long time for mysql client to execute long INSERT
  
  The problem was in redundant calls to strlen() in string compare functions,
  where we may then return after checking only the small number of characters.
  
  NOTE FOR THE REVIEWER:
  
  Kostja: I suggest Bar should do the review (too).
  
  Bar:
  
  Perhaps my_strcasecmp_mb() has a bug, as it never returns negative number.
  At least its name suggests the semantics of strcmp(), if it isn't so, then
  it should at least have a comment for the return value.  Actually, original
  implementation had 'return *t;', where 't' is 'char *', so its sign was
  undefined when casting to 'int'.  I changed this to 'return (*t != 0);'.
  
  Please let me know whether this is a bug, so I will fix it as a part of this
  patch.

  strings/ctype-utf8.c
    1.99 06/06/22 14:01:17 kroki@stripped +55 -5
    Implement my_caseup_str_utf8() and my_casedn_str_utf8() as a simplified
    versions of my_caseup_utf8() and my_casedn_utf8() respectively.  Do not
    call strlen() where sliding window of at least one multi-byte
    character is sufficient.
    Fix CHARSET_INFO::mbmaxlen for UTF-8 to be 6 bytes if UNICODE_32BIT is
    defined.

  strings/ctype-ucs2.c
    1.62 06/06/22 14:01:16 kroki@stripped +26 -18
    Do not call strlen() where sliding window of at least one multi-byte
    character is sufficient.
    Remove now unused my_strncasecmp_ucs2().

  strings/ctype-mb.c
    1.48 06/06/22 14:01:16 kroki@stripped +51 -5
    Do not call strlen() where sliding window of at least one multi-byte
    character is sufficient.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	kroki
# Host:	moonlight.intranet
# Root:	/home/tomash/src/mysql_ab/mysql-5.0-bug15811

--- 1.61/strings/ctype-ucs2.c	2006-03-30 17:14:51 +04:00
+++ 1.62/strings/ctype-ucs2.c	2006-06-22 14:01:16 +04:00
@@ -310,15 +310,26 @@
 }
 
 
-static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
-			       const char *s, const char *t,  uint len)
+static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
 {
   int s_res,t_res;
   my_wc_t s_wc,t_wc;
-  const char *se=s+len;
-  const char *te=t+len;
   MY_UNICASE_INFO **uni_plane= cs->caseinfo;
-  
+  /*
+    Since we process the strings one (multi-byte) character at a time,
+    'se' and 'te' point either cs->mbmaxlen chars forward from the
+    current positions 's' and 't', or at the end of the corresponding
+    string if it is within cs->mbmaxlen chars window.
+  */
+  const char *se=s;
+  const char *te=t;
+  int i;
+
+  for (i= 0; i < cs->mbmaxlen && *se; ++i)
+    ++se;
+  for (i= 0; i < cs->mbmaxlen && *te; ++i)
+    ++te;
+
   while ( s < se && t < te )
   {
     int plane;
@@ -342,21 +353,21 @@
       return  ((int) s_wc) - ((int) t_wc);
     
     s+=s_res;
+    for (i= 0; i < s_res && *se; ++i)
+      ++se;
+
     t+=t_res;
+    for (i= 0; i < t_res && *te; ++i)
+      ++te;
   }
+  /*
+    At least one of the expressions is zero, and we are interesed only
+    in the sign of the result.
+  */
   return (int) ( (se-s) - (te-t) );
 }
 
 
-static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
-{
-  uint s_len= (uint) strlen(s);
-  uint t_len= (uint) strlen(t);
-  uint len = (s_len > t_len) ? s_len : t_len;
-  return  my_strncasecmp_ucs2(cs, s, t, len);
-}
-
-
 static int my_strnxfrm_ucs2(CHARSET_INFO *cs, 
 	uchar *dst, uint dstlen, const uchar *src, uint srclen)
 {
@@ -1423,10 +1434,7 @@
 static
 int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
 {
-  uint s_len= (uint) strlen(s);
-  uint t_len= (uint) strlen(t);
-  uint len = (s_len > t_len) ? s_len : t_len;
-  return  my_strncasecmp_ucs2(cs, s, t, len);
+  return  my_strcasecmp_ucs2(cs, s, t);
 }
 
 

--- 1.47/strings/ctype-mb.c	2005-10-06 16:40:13 +04:00
+++ 1.48/strings/ctype-mb.c	2006-06-22 14:01:16 +04:00
@@ -24,17 +24,32 @@
 void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
 {
   register uint32 l;
-  register char *end=str+strlen(str); /* BAR TODO: remove strlen() call */
   register uchar *map=cs->to_upper;
+  /*
+    Since we process the string one (multi-byte) character at a time,
+    'end' points either cs->mbmaxlen chars forward from the current
+    position 'str', or at the end of the string if it is within
+    cs->mbmaxlen chars window.
+  */
+  register char *end=str;
+
+  for (l= 0; l < cs->mbmaxlen && *end; ++l)
+    ++end;
   
   while (*str)
   {
     if ((l=my_ismbchar(cs, str,end)))
+    {
       str+=l;
+      while (l-- && *end)
+        ++end;
+    }
     else
     { 
       *str=(char) map[(uchar)*str];
       str++;
+      if (*end)
+        ++end;
     }
   }
 }
@@ -42,17 +57,32 @@
 void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
 {
   register uint32 l;
-  register char *end=str+strlen(str);
   register uchar *map=cs->to_lower;
+  /*
+    Since we process the string one (multi-byte) character at a time,
+    'end' points either cs->mbmaxlen chars forward from the current
+    position 'str', or at the end of the string if it is within
+    cs->mbmaxlen chars window.
+  */
+  register char *end=str;
+
+  for (l= 0; l < cs->mbmaxlen && *end; ++l)
+    ++end;
   
   while (*str)
   {
     if ((l=my_ismbchar(cs, str,end)))
+    {
       str+=l;
+      while (l-- && *end)
+        ++end;
+    }
     else
     {
       *str=(char) map[(uchar)*str];
       str++;
+      if (*end)
+        ++end;
     }
   }
 }
@@ -104,23 +134,39 @@
 int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
 {
   register uint32 l;
-  register const char *end=s+strlen(s);
   register uchar *map=cs->to_upper;
-  
+  /*
+    Since we process the string one (multi-byte) character at a time,
+    'end' points either cs->mbmaxlen chars forward from the current
+    position 's', or at the end of the string if it is within
+    cs->mbmaxlen chars window.
+  */
+  register const char *end=s;
+
+  for (l= 0; l < cs->mbmaxlen && *end; ++l)
+    ++end;
+
   while (s<end)
   {
     if ((l=my_ismbchar(cs, s,end)))
     {
       while (l--)
+      {
         if (*s++ != *t++) 
           return 1;
+
+        if (*end)
+          ++end;
+      }
     }
     else if (my_mbcharlen(cs, *t) > 1)
       return 1;
     else if (map[(uchar) *s++] != map[(uchar) *t++])
       return 1;
+    else if (*end)
+      ++end;
   }
-  return *t;
+  return (*t != 0);
 }
 
 

--- 1.98/strings/ctype-utf8.c	2006-03-23 11:37:51 +03:00
+++ 1.99/strings/ctype-utf8.c	2006-06-22 14:01:17 +04:00
@@ -2143,8 +2143,27 @@
 
 static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
 {
-  uint len= (uint) strlen(s);
-  my_caseup_utf8(cs, s, len, s, len);
+  my_wc_t wc;
+  int res;
+  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+  char *end= s;
+  DBUG_ASSERT(cs->casedn_multiply == 1);
+
+  for (res= 0; res < cs->mbmaxlen && *end; ++res)
+    ++end;
+
+  while (s < end &&
+         (res= my_utf8_uni(cs, &wc, (uchar*) s, (uchar*) end)) > 0)
+  {
+    int res2;
+    int plane= (wc>>8) & 0xFF;
+    wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+    res2= my_uni_utf8(cs, wc, (uchar*) s, (uchar*) end);
+    DBUG_ASSERT(res2 == res);
+    s+= res;
+    while (res-- && *end)
+      ++end;
+  }
 }
 
 
@@ -2172,8 +2191,27 @@
 
 static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
 {
-  uint len= (uint) strlen(s);
-  my_casedn_utf8(cs, s, len, s, len);
+  my_wc_t wc;
+  int res;
+  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+  char *end= s;
+  DBUG_ASSERT(cs->casedn_multiply == 1);
+
+  for (res= 0; res < cs->mbmaxlen && *end; ++res)
+    ++end;
+
+  while (s < end &&
+         (res= my_utf8_uni(cs, &wc, (uchar*) s, (uchar*) end)) > 0)
+  {
+    int res2;
+    int plane= (wc>>8) & 0xFF;
+    wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+    res2= my_uni_utf8(cs, wc, (uchar*) s, (uchar*) end);
+    DBUG_ASSERT(res2 == res);
+    s+= res;
+    while (res-- && *end)
+      ++end;
+  }
 }
 
 
@@ -2576,7 +2614,11 @@
     1,                  /* caseup_multiply  */
     1,                  /* casedn_multiply  */
     1,                  /* mbminlen     */
+#ifdef UNICODE_32BIT
+    6,                  /* mbmaxlen     */
+#else
     3,                  /* mbmaxlen     */
+#endif
     0,                  /* min_sort_char */
     0xFFFF,             /* max_sort_char */
     ' ',                /* pad char      */
@@ -2609,7 +2651,11 @@
     1,                  /* caseup_multiply  */
     1,                  /* casedn_multiply  */
     1,                  /* mbminlen     */
+#ifdef UNICODE_32BIT
+    6,                  /* mbmaxlen     */
+#else
     3,                  /* mbmaxlen     */
+#endif
     0,                  /* min_sort_char */
     255,                /* max_sort_char */
     ' ',                /* pad char      */
@@ -2780,7 +2826,11 @@
     1,                  /* caseup_multiply  */
     1,                  /* casedn_multiply  */
     1,			/* mbminlen     */
-    3,			/* mbmaxlen     */
+#ifdef UNICODE_32BIT
+    6,                  /* mbmaxlen     */
+#else
+    3,                  /* mbmaxlen     */
+#endif
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
Thread
bk commit into 5.0 tree (kroki:1.2168) BUG#15811kroki22 Jun