List:Commits« Previous MessageNext Message »
From:Alexander Barkov Date:November 11 2010 8:44am
Subject:bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737
View as plain text  
#At file:///home/bar/mysql-bzr/mysql-5.5-bugteam.b57737/ based on revid:dmitry.shulga@strippednqay5s8s959l8x

 3114 Alexander Barkov	2010-11-11
      Bug#57737 Character sets: search fails with like, contraction, index
      
      Problem: LIKE over an indexed column optimized away good results,
      because my_like_range_utf32/utf16 returned wrong ranges for contractions.
      
      Fix: Make my_like_range_utf32/utf16 return correct ranges for contractions.
      
      Note, this code presens in my_like_range_mb and my_like_range_ucs2.
      It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess).
      
      
      added:
        @ mysql-test/include/ctype_czech.inc
        Adding shared test code
      
      modified:
        @ include/m_ctype.h
        Adding helper functions for contractions.
      
        @ mysql-test/r/ctype_uca.result
        @ mysql-test/r/ctype_utf16_uca.result
        @ mysql-test/r/ctype_utf32_uca.result
        @ mysql-test/t/ctype_uca.test
        @ mysql-test/t/ctype_utf16_uca.test
        @ mysql-test/t/ctype_utf32_uca.test
        Adding tests.
      
        @ strings/ctype-mb.c
        Pad function did not put the last character.
      
        @ strings/ctype-ucs2.c
        - my_fill_mb2 did not put the high byte, as previously
          it was used to put only characters in ASCII range.
          Now it puts high byte as well
          (needed to pupulate cs->max_sort_char correctly).
          
        - Adding contraction handling into my_like_range_utf16
        - Removing legacy "Temporary fix" from my_like_range_utf16
        - Using my_fill_mb2 instead of loop (duplicate code eliminations)
        - Adding contraction handling into my_like_range_utf32

    added:
      mysql-test/include/ctype_czech.inc
    modified:
      include/m_ctype.h
      mysql-test/r/ctype_uca.result
      mysql-test/r/ctype_utf16_uca.result
      mysql-test/r/ctype_utf32_uca.result
      mysql-test/t/ctype_uca.test
      mysql-test/t/ctype_utf16_uca.test
      mysql-test/t/ctype_utf32_uca.test
      strings/ctype-mb.c
      strings/ctype-ucs2.c
=== modified file 'include/m_ctype.h'
--- a/include/m_ctype.h	2010-09-28 15:15:58 +0000
+++ b/include/m_ctype.h	2010-11-11 08:39:03 +0000
@@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_u
 #define MY_UTF8MB4                 "utf8mb4"
 
 
+/* Helper functions to handle contraction */
+static inline my_bool
+my_cs_have_contractions(CHARSET_INFO *cs)
+{
+  return cs->contractions != NULL;
+}
+
+static inline my_bool
+my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
+{
+  return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
+}
+
+static inline my_bool
+my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
+{
+  return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
+}
+
+static inline uint16*
+my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
+{
+  return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40];
+}
+
+
 /* declarations for simple charsets */
 extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
                                  const uchar *, size_t); 

=== added file 'mysql-test/include/ctype_czech.inc'
--- a/mysql-test/include/ctype_czech.inc	1970-01-01 00:00:00 +0000
+++ b/mysql-test/include/ctype_czech.inc	2010-11-11 08:39:03 +0000
@@ -0,0 +1,12 @@
+SELECT @@collation_connection;
+--echo #
+--echo # Bug#57737 Character sets: search fails with like, contraction, index
+--echo #
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+DROP TABLE t1;

=== modified file 'mysql-test/r/ctype_uca.result'
--- a/mysql-test/r/ctype_uca.result	2008-03-26 09:51:16 +0000
+++ b/mysql-test/r/ctype_uca.result	2010-11-11 08:39:03 +0000
@@ -2888,3 +2888,63 @@ a	hex(b)	c
 DROP TABLE t1;
 set names utf8;
 End for 5.0 tests
+#
+# Start of 5.5 tests
+#
+SET collation_connection=utf8_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+utf8_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
+SET collation_connection=ucs2_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+ucs2_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
+#
+# End of 5.5 tests
+#

=== modified file 'mysql-test/r/ctype_utf16_uca.result'
--- a/mysql-test/r/ctype_utf16_uca.result	2010-02-24 09:15:34 +0000
+++ b/mysql-test/r/ctype_utf16_uca.result	2010-11-11 08:39:03 +0000
@@ -2368,6 +2368,33 @@ NULL
 NULL
 NULL
 drop table t1;
+SET collation_connection=utf16_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+utf16_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
 #
 # End of 5.5 tests
 #

=== modified file 'mysql-test/r/ctype_utf32_uca.result'
--- a/mysql-test/r/ctype_utf32_uca.result	2010-02-24 09:15:34 +0000
+++ b/mysql-test/r/ctype_utf32_uca.result	2010-11-11 08:39:03 +0000
@@ -2368,6 +2368,33 @@ NULL
 NULL
 NULL
 drop table t1;
+SET collation_connection=utf32_czech_ci;
+SELECT @@collation_connection;
+@@collation_connection
+utf32_czech_ci
+#
+# Bug#57737 Character sets: search fails with like, contraction, index
+#
+CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
+INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 ADD KEY s1 (s1);
+SELECT * FROM t1 WHERE s1 LIKE 'c%';
+s1
+c
+ce
+cé
+ch
+ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
+SELECT * FROM t1 WHERE s1 LIKE 'ch';
+s1
+ch
+DROP TABLE t1;
 #
 # End of 5.5 tests
 #

=== modified file 'mysql-test/t/ctype_uca.test'
--- a/mysql-test/t/ctype_uca.test	2008-02-20 18:49:26 +0000
+++ b/mysql-test/t/ctype_uca.test	2010-11-11 08:39:03 +0000
@@ -545,3 +545,17 @@ set collation_connection=ucs2_unicode_ci
 set names utf8;
 
 -- echo End for 5.0 tests
+
+--echo #
+--echo # Start of 5.5 tests
+--echo #
+#
+# Test my_like_range and contractions
+#
+SET collation_connection=utf8_czech_ci;
+--source include/ctype_czech.inc
+SET collation_connection=ucs2_czech_ci;
+--source include/ctype_czech.inc
+--echo #
+--echo # End of 5.5 tests
+--echo #

=== modified file 'mysql-test/t/ctype_utf16_uca.test'
--- a/mysql-test/t/ctype_utf16_uca.test	2010-02-24 09:15:34 +0000
+++ b/mysql-test/t/ctype_utf16_uca.test	2010-11-11 08:39:03 +0000
@@ -284,6 +284,12 @@ DROP TABLE IF EXISTS t1;
 set collation_connection=utf16_unicode_ci;
 --source include/ctype_regex.inc
 
+#
+# Test my_like_range and contractions
+#
+SET collation_connection=utf16_czech_ci;
+--source include/ctype_czech.inc
+
 
 --echo #
 --echo # End of 5.5 tests

=== modified file 'mysql-test/t/ctype_utf32_uca.test'
--- a/mysql-test/t/ctype_utf32_uca.test	2010-02-24 09:15:34 +0000
+++ b/mysql-test/t/ctype_utf32_uca.test	2010-11-11 08:39:03 +0000
@@ -286,6 +286,13 @@ set collation_connection=utf32_unicode_c
 --source include/ctype_regex.inc
 
 
+#
+# Test my_like_range and contractions
+#
+SET collation_connection=utf32_czech_ci;
+--source include/ctype_czech.inc
+
+
 --echo #
 --echo # End of 5.5 tests
 --echo #

=== modified file 'strings/ctype-mb.c'
--- a/strings/ctype-mb.c	2010-07-26 06:47:03 +0000
+++ b/strings/ctype-mb.c	2010-11-11 08:39:03 +0000
@@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *c
   DBUG_ASSERT(buflen > 0);
   do
   {
-    if ((str + buflen) < end)
+    if ((str + buflen) <= end)
     {
       /* Enough space for the characer */
       memcpy(str, buf, buflen);

=== modified file 'strings/ctype-ucs2.c'
--- a/strings/ctype-ucs2.c	2010-10-20 19:02:59 +0000
+++ b/strings/ctype-ucs2.c	2010-11-11 08:39:03 +0000
@@ -903,7 +903,7 @@ static void
 my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
             char *s, size_t l, int fill)
 {
-  for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2);
+  for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
 }
 
 
@@ -1590,14 +1590,19 @@ my_like_range_utf16(CHARSET_INFO *cs,
                     char *min_str,char *max_str,
                     size_t *min_length,size_t *max_length)
 {
-  const char *end=ptr+ptr_length;
-  char *min_org=min_str;
-  char *min_end=min_str+res_length;
+  const char *end= ptr + ptr_length;
+  char *min_org= min_str;
+  char *min_end= min_str + res_length;
+  char *max_end= max_str + res_length;
   size_t charlen= res_length / cs->mbmaxlen;
+  my_bool have_contractions= my_cs_have_contractions(cs);
   
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
   {
+    my_wc_t wc2;
+    int res;
+    
     if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
     {
       ptr+=2;                                        /* Skip escape */
@@ -1623,34 +1628,35 @@ my_like_range_utf16(CHARSET_INFO *cs,
       *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
                     res_length);
       *max_length= res_length;
-      do {
-        *min_str++ = 0;
-        *min_str++ = 0;
-        *max_str++ = (char) (cs->max_sort_char >> 8);
-        *max_str++ = (char) (cs->max_sort_char & 255);
-      } while (min_str + 1 < min_end);
-      return FALSE;
+      goto pad_min_max;
+    }
+    if (have_contractions && ptr + 3 < end &&
+        ptr[0] == '\0' &&
+        my_cs_can_be_contraction_head(cs, (uchar) ptr[1]) &&
+        (res= my_utf16_uni(cs, &wc2, (uchar*) ptr + 2, (uchar*) end)) > 0)
+    {
+      /* 
+         Check contraction head followed by a wildcard,
+         or contraction that does not fit to result
+      */
+      if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many) ||
+          (charlen == 1 &&
+           my_cs_can_be_contraction_tail(cs, wc2) &&
+           my_cs_contraction2_weight(cs, (uchar) ptr[1], wc2)))
+      {
+        *min_length= *max_length= res_length;
+        goto pad_min_max;
+      }
     }
     *min_str++= *max_str++ = ptr[0];
     *min_str++= *max_str++ = ptr[1];
   }
 
-  /* Temporary fix for handling w_one at end of string (key compression) */
-  {
-    char *tmp;
-    for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
-    {
-      *--tmp=' ';
-      *--tmp='\0';
-    }
-  }
-  
-  *min_length= *max_length = (size_t) (min_str - min_org);
-  while (min_str + 1 < min_end)
-  {
-    *min_str++ = *max_str++ = '\0';
-    *min_str++ = *max_str++ = ' ';      /* Because if key compression */
-  }
+  *min_length= *max_length= (size_t) (min_str - min_org);
+
+pad_min_max:
+  my_fill_mb2(cs, min_str, min_end - min_str, cs->min_sort_char);
+  my_fill_mb2(cs, max_str, max_end - max_str, cs->max_sort_char);
   return FALSE;
 }
 
@@ -2583,12 +2589,13 @@ my_like_range_utf32(CHARSET_INFO *cs,
   char *min_end= min_str + res_length;
   char *max_end= max_str + res_length;
   size_t charlen= res_length / cs->mbmaxlen;
+  my_bool have_contractions= my_cs_have_contractions(cs);
   
   DBUG_ASSERT((res_length % 4) == 0);
   
   for ( ; charlen > 0; ptr+= 4, charlen--)
   {
-    my_wc_t wc;
+    my_wc_t wc, wc2;
     int res;
     if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
     {
@@ -2640,6 +2647,24 @@ my_like_range_utf32(CHARSET_INFO *cs,
       goto pad_min_max;
     }
     
+    if (have_contractions && 
+        my_cs_can_be_contraction_head(cs, wc) &&
+        (res= my_utf32_uni(cs, &wc2, (uchar*) ptr + 4, (uchar*) end)) > 0)
+    {
+      /*
+         Check contraction head followed by a wildcard 
+         or contraction which does not fit to result
+      */
+      if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many) ||
+          (charlen == 1 &&
+           my_cs_can_be_contraction_tail(cs, wc2) &&
+           my_cs_contraction2_weight(cs, wc, wc2)))
+      {
+        *min_length= *max_length= res_length;
+        goto pad_min_max;
+      }
+    }
+    
     /* Normal character */
     if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
         my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)

Attachment: [text/bzr-bundle] bzr/bar@mysql.com-20101111083903-1hu3527l3vsqsjlx.bundle
Thread
bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737Alexander Barkov11 Nov
  • Re: bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737Roy Lyseng19 Nov
    • Re: bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737Alexander Barkov22 Nov
      • Re: bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737Roy Lyseng22 Nov
        • Re: bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737Alexander Barkov23 Nov
  • Re: bzr commit into mysql-5.5-bugteam branch (bar:3114) Bug#57737Roy Lyseng22 Nov