List:Commits« Previous MessageNext Message »
From:bar Date:June 28 2007 8:34am
Subject:bk commit into 5.0 tree (bar:1.2499) BUG#27345
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-28 13:34:44+05:00, bar@stripped +5 -0
  Bug#27345 Incorrect data returned when range-read from utf8_danish_ci indexes
  Problem: like_range() returned wrong ranges for contractions (like 'ch' in Czech').
  Fix: adding a special code to handle tricky cases:
  - contraction head followed by a wild character
  - full contraction
  - contraction part followed by another contraction part,
    but they are not a contraction together.

  mysql-test/r/ctype_uca.result@stripped, 2007-06-28 13:34:42+05:00, bar@stripped +92 -0
    Adding test case

  mysql-test/t/ctype_uca.test@stripped, 2007-06-28 13:34:42+05:00, bar@stripped +54 -0
    Adding test case

  strings/ctype-mb.c@stripped, 2007-06-28 13:34:42+05:00, bar@stripped +70 -1
    Adding test case

  strings/ctype-uca.c@stripped, 2007-06-28 13:34:42+05:00, bar@stripped +10 -1
    Allocate additional 256 bytes for flags "is contraction part".

  strings/ctype-ucs2.c@stripped, 2007-06-28 13:34:43+05:00, bar@stripped +35 -0
    Adding test case

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.myoffice.izhnet.ru
# Root:	/home/bar/mysql-work/mysql-5.0.b27345

--- 1.46/strings/ctype-uca.c	2007-06-07 17:55:53 +05:00
+++ 1.47/strings/ctype-uca.c	2007-06-28 13:34:42 +05:00
@@ -7937,10 +7937,16 @@ static my_bool create_tailoring(CHARSET_
   /* Now process contractions */
   if (ncontractions)
   {
-    uint size= 0x40*0x40*sizeof(uint16); /* 8K, for basic latin letter only */
+    /*
+      8K for weights for basic latin letter pairs,
+      plus 256 bytes for "is contraction part" flags.
+    */
+    uint size= 0x40*0x40*sizeof(uint16) + 256;
+    char *contraction_flags;
     if (!(cs->contractions= (uint16*) (*alloc)(size)))
         return 1;
     bzero((void*)cs->contractions, size);
+    contraction_flags= ((char*) cs->contractions) + 0x40*0x40;
     for (i=0; i < rc; i++)
     {
       if (rule[i].curr[1])
@@ -7966,6 +7972,9 @@ static my_bool create_tailoring(CHARSET_
         
         /* Copy base weight applying primary difference */
         cs->contractions[offsc]= offsb[0] + rule[i].diff[0];
+        /* Mark both letters as "is contraction part */
+        contraction_flags[rule[i].curr[0]]= 1;
+        contraction_flags[rule[i].curr[1]]= 1;
       }
     }
   }

--- 1.66/strings/ctype-ucs2.c	2007-05-31 15:54:38 +05:00
+++ 1.67/strings/ctype-ucs2.c	2007-06-28 13:34:43 +05:00
@@ -1524,6 +1524,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO 
   char *min_org=min_str;
   char *min_end=min_str+res_length;
   uint charlen= res_length / cs->mbmaxlen;
+  const char *contraction_flags= cs->contractions ?
+             ((const char*) cs->contractions) + 0x40*0x40 : NULL;
   
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
@@ -1545,6 +1547,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO 
     }
     if (ptr[0] == '\0' && ptr[1] == w_many)	/* '%' in SQL */
     {
+fill_max_and_min:
       /*
         Calculate length of keys:
         'a\0\0... is the smallest possible string when we have space expand
@@ -1561,6 +1564,38 @@ my_bool my_like_range_ucs2(CHARSET_INFO 
       } while (min_str + 1 < min_end);
       return 0;
     }
+
+    if (contraction_flags && ptr + 3 < end &&
+        ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
+    {
+      /* Contraction head found */
+      if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
+      {
+        /* Contraction head followed by a wildcard, quit */
+        goto fill_max_and_min;
+      }
+      
+      /*
+        Check if the second letter can be contraction part,
+        and if two letters really produce a contraction.
+      */
+      if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
+          cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
+      {
+        /* Contraction found */
+        if (charlen == 1 || min_str + 2 >= min_end)
+        {
+          /* Full contraction doesn't fit, quit */
+          goto fill_max_and_min;
+        }
+        
+        /* Put contraction head */
+        *min_str++= *max_str++= *ptr++;
+        *min_str++= *max_str++= *ptr++;
+        charlen--;
+      }
+    }
+    /* Put contraction tail, or a single character */
     *min_str++= *max_str++ = ptr[0];
     *min_str++= *max_str++ = ptr[1];
   }

--- 1.56/strings/ctype-mb.c	2007-05-31 15:54:38 +05:00
+++ 1.57/strings/ctype-mb.c	2007-06-28 13:34:42 +05:00
@@ -563,6 +563,8 @@ my_bool my_like_range_mb(CHARSET_INFO *c
   char *min_end= min_str + res_length;
   char *max_end= max_str + res_length;
   uint maxcharlen= res_length / cs->mbmaxlen;
+  const char *contraction_flags= cs->contractions ? 
+              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
 
   for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
   {
@@ -571,6 +573,7 @@ my_bool my_like_range_mb(CHARSET_INFO *c
       ptr++;                                    /* Skip escape */
     else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
     {      
+fill_max_and_min:
       /*
         Calculate length of keys:
         'a\0\0... is the smallest possible string when we have space expand
@@ -602,8 +605,74 @@ my_bool my_like_range_mb(CHARSET_INFO *c
        *min_str++= *max_str++= *ptr++;
     }
     else
-       *min_str++= *max_str++= *ptr++;    
+    {
+      /*
+        Special case for collations with contractions.
+        For example, in Chezh, 'ch' is a separate letter
+        which is sorted between 'h' and 'i'.
+        If the pattern 'abc%', 'c' at the end can mean:
+        - letter 'c' itself,
+        - beginning of the contraction 'ch'.
+
+        If we simply return this LIKE range:
+
+         'abc\min\min\min' and 'abc\max\max\max'
+
+        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
+        will only find values starting from 'abc[^h]',
+        but won't find values starting from 'abch'.
+
+        We must ignore contraction heads followed by w_one or w_many.
+        ('Contraction head' means any letter which can be the first
+        letter in a contraction)
 
+        For example, for Czech 'abc%', we will return LIKE range,
+        which is equal to LIKE range for 'ab%':
+
+        'ab\min\min\min\min' and 'ab\max\max\max\max'.
+
+      */
+      if (contraction_flags && ptr + 1 < end &&
+          contraction_flags[(uchar) *ptr])
+      {
+        /* Ptr[0] is a contraction head. */
+        
+        if (ptr[1] == w_one || ptr[1] == w_many)
+        {
+          /* Contraction head followed by a wildcard, quit. */
+          goto fill_max_and_min;
+        }
+        
+        /*
+          Some letters can be both contraction heads and contraction tails.
+          For example, in Danish 'aa' is a separate single letter which
+          is sorted after 'z'. So 'a' can be both head and tail.
+          
+          If ptr[0]+ptr[1] is a contraction,
+          then put both letters together.
+          
+          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
+          is not a contraction, then we put only ptr[0],
+          and continue with ptr[1] on the next loop.
+        */
+        if (contraction_flags[(uchar) ptr[1]] &&
+            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
+        {
+          /* Contraction found */
+          if (maxcharlen == 1 || min_str + 1 >= min_end)
+          {
+            /* Both contraction parts don't fit, quit */
+            goto fill_max_and_min;
+          }
+          
+          /* Put contraction head */
+          *min_str++= *max_str++= *ptr++;
+          maxcharlen--;
+        }
+      }
+      /* Put contraction tail, or a single character */
+      *min_str++= *max_str++= *ptr++;    
+    }
   }
 
   *min_length= *max_length = (uint) (min_str - min_org);

--- 1.18/mysql-test/r/ctype_uca.result	2007-03-27 12:20:17 +05:00
+++ 1.19/mysql-test/r/ctype_uca.result	2007-06-28 13:34:42 +05:00
@@ -2663,3 +2663,95 @@ COUNT(*)	c1
 1	
 1	a
 DROP TABLE IF EXISTS t1;
+set names utf8;
+create table t1 (
+a varchar(255),
+key a(a)
+) character set utf8 collate utf8_danish_ci;
+insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa');
+select a as like_a from t1 where a like 'a%';
+like_a
+aaaaa
+select a as like_aa from t1 where a like 'aa%';
+like_aa
+aaaaa
+select a as like_aaa from t1 where a like 'aaa%';
+like_aaa
+aaaaa
+select a as like_aaaa from t1 where a like 'aaaa%';
+like_aaaa
+aaaaa
+select a as like_aaaaa from t1 where a like 'aaaaa%';
+like_aaaaa
+aaaaa
+alter table t1 convert to character set ucs2 collate ucs2_danish_ci;
+select a as like_a from t1 where a like 'a%';
+like_a
+aaaaa
+select a as like_aa from t1 where a like 'aa%';
+like_aa
+aaaaa
+select a as like_aaa from t1 where a like 'aaa%';
+like_aaa
+aaaaa
+select a as like_aaaa from t1 where a like 'aaaa%';
+like_aaaa
+aaaaa
+select a as like_aaaaa from t1 where a like 'aaaaa%';
+like_aaaaa
+aaaaa
+drop table t1;
+create table t1 (
+a varchar(255),
+key(a)
+) character set utf8 collate utf8_spanish2_ci;
+insert into t1 values ('aaaaa'),('lllll'),('zzzzz');
+select a as like_l from t1 where a like 'l%';
+like_l
+lllll
+select a as like_ll from t1 where a like 'll%';
+like_ll
+lllll
+select a as like_lll from t1 where a like 'lll%';
+like_lll
+lllll
+select a as like_llll from t1 where a like 'llll%';
+like_llll
+lllll
+select a as like_lllll from t1 where a like 'lllll%';
+like_lllll
+lllll
+alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci;
+select a as like_l from t1 where a like 'l%';
+like_l
+lllll
+select a as like_ll from t1 where a like 'll%';
+like_ll
+lllll
+select a as like_lll from t1 where a like 'lll%';
+like_lll
+lllll
+select a as like_llll from t1 where a like 'llll%';
+like_llll
+lllll
+select a as like_lllll from t1 where a like 'lllll%';
+like_lllll
+lllll
+drop table t1;
+create table t1 (
+a varchar(255),
+key a(a)
+) character set utf8 collate utf8_czech_ci;
+insert into t1 values
+('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j');
+select * from t1 where a like 'c%';
+a
+c
+ch
+alter table t1 convert to character set ucs2 collate ucs2_czech_ci;
+select * from t1 where a like 'c%';
+a
+c
+ch
+drop table t1;
+End for 5.0 tests

--- 1.16/mysql-test/t/ctype_uca.test	2007-03-27 12:20:17 +05:00
+++ 1.17/mysql-test/t/ctype_uca.test	2007-06-28 13:34:42 +05:00
@@ -485,3 +485,57 @@ CREATE TABLE t1 (
 insert into t1 values (''),('a');
 SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
 DROP TABLE IF EXISTS t1;
+
+#
+# Bug#27345 Incorrect data returned when range-read from utf8_danish_ci indexes
+#
+set names utf8;
+create table t1 (
+  a varchar(255),
+  key a(a)
+) character set utf8 collate utf8_danish_ci;
+insert into t1 values ('åaaaa'),('ååaaa'),('aaaaa');
+select a as like_a from t1 where a like 'a%';
+select a as like_aa from t1 where a like 'aa%';
+select a as like_aaa from t1 where a like 'aaa%';
+select a as like_aaaa from t1 where a like 'aaaa%';
+select a as like_aaaaa from t1 where a like 'aaaaa%';
+alter table t1 convert to character set ucs2 collate ucs2_danish_ci;
+select a as like_a from t1 where a like 'a%';
+select a as like_aa from t1 where a like 'aa%';
+select a as like_aaa from t1 where a like 'aaa%';
+select a as like_aaaa from t1 where a like 'aaaa%';
+select a as like_aaaaa from t1 where a like 'aaaaa%';
+drop table t1;
+
+create table t1 (
+  a varchar(255),
+  key(a)
+) character set utf8 collate utf8_spanish2_ci;
+insert into t1 values ('aaaaa'),('lllll'),('zzzzz');
+select a as like_l from t1 where a like 'l%';
+select a as like_ll from t1 where a like 'll%';
+select a as like_lll from t1 where a like 'lll%';
+select a as like_llll from t1 where a like 'llll%';
+select a as like_lllll from t1 where a like 'lllll%';
+alter table t1 convert to character set ucs2 collate ucs2_spanish2_ci;
+select a as like_l from t1 where a like 'l%';
+select a as like_ll from t1 where a like 'll%';
+select a as like_lll from t1 where a like 'lll%';
+select a as like_llll from t1 where a like 'llll%';
+select a as like_lllll from t1 where a like 'lllll%';
+drop table t1;
+
+create table t1 (
+  a varchar(255),
+  key a(a)
+) character set utf8 collate utf8_czech_ci;
+-- In Czech 'ch' is a single letter between 'h' and 'i'
+insert into t1 values
+('b'),('c'),('d'),('e'),('f'),('g'),('h'),('ch'),('i'),('j');
+select * from t1 where a like 'c%';
+alter table t1 convert to character set ucs2 collate ucs2_czech_ci;
+select * from t1 where a like 'c%';
+drop table t1;
+
+-- echo End for 5.0 tests
Thread
bk commit into 5.0 tree (bar:1.2499) BUG#27345bar28 Jun