List:Commits« Previous MessageNext Message »
From:bar Date:July 20 2006 10:52am
Subject:bk commit into 4.1 tree (bar:1.2523) BUG#20471
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-07-20 15:52:48+05:00, bar@stripped +6 -0
  Bug#20471 LIKE search fails with indexed utf8 char column
  The main problem was already fixed by Igor under terms of 16674.
  Adding some additional minor fixes and tests.

  include/m_ctype.h@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +4 -0
    Adding reference to CHARSET_INFO.txt

  mysql-test/r/ctype_utf8.result@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +75 -0
    Adding test case

  mysql-test/t/ctype_utf8.test@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +70 -0
    Adding test case

  strings/CHARSET_INFO.txt@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +10 -2
    Adding comment about max_sort_char

  strings/ctype-mb.c@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +17 -4
    Restiring that non-Unicode character sets use 0xFF as pad character
    for max_str. Only Unicode character sets use wc_mb.

  strings/ctype-utf8.c@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +1 -1
    Fixed that max_sort_char for UTF8 from U+00FF to U+FFFF.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.intranet.mysql.r18.ru
# Root:	/usr/home/bar/mysql-4.1.b20471v2

--- 1.107/include/m_ctype.h	2006-07-20 15:52:57 +05:00
+++ 1.108/include/m_ctype.h	2006-07-20 15:52:57 +05:00
@@ -108,6 +108,8 @@
 
 struct charset_info_st;
 
+
+/* See strings/CHARSET_INFO.txt about information on this structure  */
 typedef struct my_collation_handler_st
 {
   my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@@ -147,6 +149,7 @@
 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
 
 
+/* See strings/CHARSET_INFO.txt about information on this structure  */
 typedef struct my_charset_handler_st
 {
   my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@@ -204,6 +207,7 @@
 extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
 
 
+/* See strings/CHARSET_INFO.txt about information on this structure  */
 typedef struct charset_info_st
 {
   uint      number;

--- 1.73/mysql-test/r/ctype_utf8.result	2006-07-20 15:52:57 +05:00
+++ 1.74/mysql-test/r/ctype_utf8.result	2006-07-20 15:52:57 +05:00
@@ -1124,6 +1124,81 @@
 Table	Op	Msg_type	Msg_text
 test.t1	check	status	OK
 drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
+before_delete_general_ci
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
+after_delete_general_ci
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
+before_delete_unicode_ci
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
+after_delete_unicode_ci
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_bin);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
+before_delete_bin
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
+after_delete_bin
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb  default character set utf8 collate utf8_general_ci;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+gci1
+さしすせそかきくけこあいうえお
+select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+gci2
+あいうえおかきくけこさしすせそ
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_unicode_ci;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+uci1
+さしすせそかきくけこあいうえお
+select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+uci2
+あいうえおかきくけこさしすせそ
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_bin;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+bin1
+さしすせそかきくけこあいうえお
+select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+bin2
+あいうえおかきくけこさしすせそ
+drop table t1;
 SET NAMES utf8;
 CREATE TABLE t1 (id int PRIMARY KEY,
 a varchar(16) collate utf8_unicode_ci NOT NULL default '',

--- 1.75/mysql-test/t/ctype_utf8.test	2006-07-20 15:52:57 +05:00
+++ 1.76/mysql-test/t/ctype_utf8.test	2006-07-20 15:52:57 +05:00
@@ -927,6 +927,76 @@
 drop table t1;
 
 #
+# Bug#20471 LIKE search fails with indexed utf8 char column
+#
+set names utf8;
+create table t1 (s1 char(5) character set utf8);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
+drop table t1;
+
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
+drop table t1;
+
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_bin);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
+drop table t1;
+
+# additional tests from duplicate bug#20744 MySQL return no result
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb  default character set utf8 collate utf8_general_ci;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_unicode_ci;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_bin;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+
+
+#
 # Bug#14896: Comparison with a key in a partial index over mb chararacter field
 #
 

--- 1.42/strings/ctype-mb.c	2006-07-20 15:52:57 +05:00
+++ 1.43/strings/ctype-mb.c	2006-07-20 15:52:57 +05:00
@@ -449,15 +449,28 @@
 
 
 /* 
-  Write max key: create a buffer with multibyte
+  Write max key:
+- for non-Unicode character sets:
+  just set to 255.
+- for Unicode character set (utf-8):
+  create a buffer with multibyte
   representation of the max_sort_char character,
   and copy it into max_str in a loop. 
 */
 static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
 {
   char buf[10];
-  char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
-                               (uchar*) buf + sizeof(buf));
+  char buflen;
+  
+  if (!(cs->state & MY_CS_UNICODE))
+  {
+    bfill(str, end - str, 255);
+    return;
+  }
+  
+  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
+                          (uchar*) buf + sizeof(buf));
+  
   DBUG_ASSERT(buflen > 0);
   do
   {
@@ -894,7 +907,7 @@
     my_strnncoll_mb_bin,
     my_strnncollsp_mb_bin,
     my_strnxfrm_mb_bin,
-    my_like_range_simple,
+    my_like_range_mb,
     my_wildcmp_mb_bin,
     my_strcasecmp_mb_bin,
     my_instr_mb,

--- 1.89/strings/ctype-utf8.c	2006-07-20 15:52:57 +05:00
+++ 1.90/strings/ctype-utf8.c	2006-07-20 15:52:57 +05:00
@@ -2373,7 +2373,7 @@
     1,                  /* mbminlen     */
     3,                  /* mbmaxlen     */
     0,                  /* min_sort_char */
-    255,                /* max_sort_char */
+    0xFFFF,             /* max_sort_char */
     0,                  /* escape_with_backslash_is_dangerous */
     &my_charset_utf8_handler,
     &my_collation_mb_bin_handler

--- 1.4/strings/CHARSET_INFO.txt	2006-07-20 15:52:57 +05:00
+++ 1.5/strings/CHARSET_INFO.txt	2006-07-20 15:52:57 +05:00
@@ -33,7 +33,7 @@
   uint      strxfrm_multiply;
   uint      mbminlen;
   uint      mbmaxlen;
-  char      max_sort_char; /* For LIKE optimization */
+  uint16    max_sort_char; /* For LIKE optimization */
 
   MY_CHARSET_HANDLER *cset;
   MY_COLLATION_HANDLER *coll;
@@ -134,7 +134,15 @@
   mbmaxlen         - maximum multibyte sequence length.
                      1 for 8bit charsets. Can be also 2 or 3.
 
-
+  max_sort_char    - for LIKE range
+                     in case of 8bit character sets - native code
+		     of maximum character (max_str pad byte);
+                     in case of UTF8 and UCS2 - Unicode code of the maximum
+		     possible character (usually U+FFFF). This code is
+		     converted to multibyte representation (usually 0xEFBFBF)
+		     and then used as a pad sequence for max_str.
+		     in case of other multibyte character sets -
+		     max_str pad byte (usually 0xFF).
 
 MY_CHARSET_HANDLER
 ==================
Thread
bk commit into 4.1 tree (bar:1.2523) BUG#20471bar20 Jul