List:Commits« Previous MessageNext Message »
From:bar Date:June 30 2006 10:17am
Subject:bk commit into 4.1 tree (bar:1.2521) BUG#20471
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2521 06/06/30 15:17:13 bar@stripped +4 -0
  Bug#20471 LIKE search fails with indexed utf8 char column
  - LIKE range was calculated in a wrong way for multibyte character set:
  the loop was done on "charlen" bytes, instead of "charlen" characters.
  - Additional minor fixes for problems showed up during fixing 20471 itself.

  strings/ctype-utf8.c
    1.90 06/06/30 15:17:05 bar@stripped +1 -1
    Fixing another bug which showed up during fixing #20471:
    
    max_sort_char for utf8_bin is U+FFFF, not U+00FF

  strings/ctype-mb.c
    1.42 06/06/30 15:17:05 bar@stripped +31 -9
    - Fixing additional problem in pad_max_char:
    for UTF8 we should fill with multibyte representation of max_sort_char U+FFFF,
    but for other character sets we should fill with just 0xFF, without using
    wc_mb() (like it was in 4.0)
    - Fixing bug#20471:
    loop in my_like_range_mb must be done using "charlen" characters, not "charlen" bytes.
     

  mysql-test/t/ctype_utf8.test
    1.73 06/06/30 15:17:05 bar@stripped +70 -0
    Addign test case

  mysql-test/r/ctype_utf8.result
    1.71 06/06/30 15:17:05 bar@stripped +75 -0
    Adding test case

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.intranet.mysql.r18.ru
# Root:	/usr/home/bar/mysql-4.1.b20471

--- 1.70/mysql-test/r/ctype_utf8.result	2006-06-22 01:12:27 +05:00
+++ 1.71/mysql-test/r/ctype_utf8.result	2006-06-30 15:17:05 +05:00
@@ -1124,6 +1124,81 @@ check table t1;
 Table	Op	Msg_type	Msg_text
 test.t1	check	status	OK
 drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
+before_delete_general_ci
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
+after_delete_general_ci
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
+before_delete_unicode_ci
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
+after_delete_unicode_ci
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_bin);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
+before_delete_bin
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
+after_delete_bin
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb  default character set utf8 collate utf8_general_ci;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+gci1
+さしすせそかきくけこあいうえお
+select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+gci2
+あいうえおかきくけこさしすせそ
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_unicode_ci;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+uci1
+さしすせそかきくけこあいうえお
+select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+uci2
+あいうえおかきくけこさしすせそ
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_bin;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+bin1
+さしすせそかきくけこあいうえお
+select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+bin2
+あいうえおかきくけこさしすせそ
+drop table t1;
 SET NAMES utf8;
 CREATE TABLE t1 (id int PRIMARY KEY,
 a varchar(16) collate utf8_unicode_ci NOT NULL default '',

--- 1.72/mysql-test/t/ctype_utf8.test	2006-06-22 01:11:55 +05:00
+++ 1.73/mysql-test/t/ctype_utf8.test	2006-06-30 15:17:05 +05:00
@@ -927,6 +927,76 @@ check table t1;
 drop table t1;
 
 #
+# Bug#20471 LIKE search fails with indexed utf8 char column
+#
+set names utf8;
+create table t1 (s1 char(5) character set utf8);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
+drop table t1;
+
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
+drop table t1;
+
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_bin);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
+drop table t1;
+
+# additional tests from duplicate bug#20744 MySQL return no result
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb  default character set utf8 collate utf8_general_ci;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_unicode_ci;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_bin;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+
+
+#
 # Bug#14896: Comparison with a key in a partial index over mb chararacter field
 #
 

--- 1.41/strings/ctype-mb.c	2005-09-21 22:12:14 +05:00
+++ 1.42/strings/ctype-mb.c	2006-06-30 15:17:05 +05:00
@@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_
 
 
 /* 
-  Write max key: create a buffer with multibyte
+  Write max key:
+- for non-Unicode character sets:
+  just set to 255.
+- for Unicode character set (utf-8):
+  create a buffer with multibyte
   representation of the max_sort_char character,
   and copy it into max_str in a loop. 
 */
 static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
 {
   char buf[10];
-  char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
-                               (uchar*) buf + sizeof(buf));
+  char buflen;
+  
+  if (!(cs->state & MY_CS_UNICODE))
+  {
+    bfill(str, end - str, 255);
+    return;
+  }
+  
+  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
+                          (uchar*) buf + sizeof(buf));
+  
   DBUG_ASSERT(buflen > 0);
   do
   {
@@ -510,13 +523,12 @@ my_bool my_like_range_mb(CHARSET_INFO *c
 
   for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
   {
+    int chlen;
     if (*ptr == escape && ptr+1 != end)
     {
-      ptr++;					/* Skip escape */
-      *min_str++= *max_str++ = *ptr;
-      continue;
+      ptr++; /* Skip escape */
     }
-    if (*ptr == w_one || *ptr == w_many)	/* '_' and '%' in SQL */
+    else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */
     {
       /* Write min key  */
       *min_length= (uint) (min_str - min_org);
@@ -534,7 +546,17 @@ my_bool my_like_range_mb(CHARSET_INFO *c
       pad_max_char(cs, max_str, max_end);
       return 0;
     }
-    *min_str++= *max_str++ = *ptr;
+    
+    if ((chlen= cs->cset->ismbchar(cs, ptr, end)))
+    {
+      memcpy(min_str, ptr, chlen);
+      memcpy(max_str, ptr, chlen);
+      ptr+= chlen - 1;
+      min_str+= chlen;
+      max_str+= chlen;
+    }
+    else
+      *min_str++= *max_str++ = *ptr;
   }
   *min_length= *max_length = (uint) (min_str - min_org);
 
@@ -887,7 +909,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin
     my_strnncoll_mb_bin,
     my_strnncollsp_mb_bin,
     my_strnxfrm_mb_bin,
-    my_like_range_simple,
+    my_like_range_mb,
     my_wildcmp_mb_bin,
     my_strcasecmp_mb_bin,
     my_instr_mb,

--- 1.89/strings/ctype-utf8.c	2005-12-12 21:39:22 +04:00
+++ 1.90/strings/ctype-utf8.c	2006-06-30 15:17:05 +05:00
@@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin=
     1,                  /* mbminlen     */
     3,                  /* mbmaxlen     */
     0,                  /* min_sort_char */
-    255,                /* max_sort_char */
+    0xFFFF,             /* max_sort_char */
     0,                  /* escape_with_backslash_is_dangerous */
     &my_charset_utf8_handler,
     &my_collation_mb_bin_handler
Thread
bk commit into 4.1 tree (bar:1.2521) BUG#20471bar30 Jun