Below is the list of changes that have just been committed into a local
4.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2006-07-20 15:52:48+05:00, bar@stripped +6 -0
Bug#20471 LIKE search fails with indexed utf8 char column
The main problem was already fixed by Igor under terms of 16674.
Adding some additional minor fixes and tests.
include/m_ctype.h@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +4 -0
Adding reference to CHARSET_INFO.txt
mysql-test/r/ctype_utf8.result@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +75 -0
Adding test case
mysql-test/t/ctype_utf8.test@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +70 -0
Adding test case
strings/CHARSET_INFO.txt@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +10 -2
Adding comment about max_sort_char
strings/ctype-mb.c@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +17 -4
Restiring that non-Unicode character sets use 0xFF as pad character
for max_str. Only Unicode character sets use wc_mb.
strings/ctype-utf8.c@stripped, 2006-07-20 15:52:44+05:00, bar@stripped +1 -1
Fixed that max_sort_char for UTF8 from U+00FF to U+FFFF.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: bar
# Host: bar.intranet.mysql.r18.ru
# Root: /usr/home/bar/mysql-4.1.b20471v2
--- 1.107/include/m_ctype.h 2006-07-20 15:52:57 +05:00
+++ 1.108/include/m_ctype.h 2006-07-20 15:52:57 +05:00
@@ -108,6 +108,8 @@
struct charset_info_st;
+
+/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct my_collation_handler_st
{
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@@ -147,6 +149,7 @@
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
+/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct my_charset_handler_st
{
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@@ -204,6 +207,7 @@
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
+/* See strings/CHARSET_INFO.txt about information on this structure */
typedef struct charset_info_st
{
uint number;
--- 1.73/mysql-test/r/ctype_utf8.result 2006-07-20 15:52:57 +05:00
+++ 1.74/mysql-test/r/ctype_utf8.result 2006-07-20 15:52:57 +05:00
@@ -1124,6 +1124,81 @@
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
+before_delete_general_ci
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
+after_delete_general_ci
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
+before_delete_unicode_ci
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
+after_delete_unicode_ci
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_bin);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
+before_delete_bin
+ペテルグル
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
+after_delete_bin
+ペテルグル
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_general_ci;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+gci1
+さしすせそかきくけこあいうえお
+select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+gci2
+あいうえおかきくけこさしすせそ
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_unicode_ci;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+uci1
+さしすせそかきくけこあいうえお
+select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+uci2
+あいうえおかきくけこさしすせそ
+drop table t1;
+set names utf8;
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_bin;
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+bin1
+さしすせそかきくけこあいうえお
+select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+bin2
+あいうえおかきくけこさしすせそ
+drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
--- 1.75/mysql-test/t/ctype_utf8.test 2006-07-20 15:52:57 +05:00
+++ 1.76/mysql-test/t/ctype_utf8.test 2006-07-20 15:52:57 +05:00
@@ -927,6 +927,76 @@
drop table t1;
#
+# Bug#20471 LIKE search fails with indexed utf8 char column
+#
+set names utf8;
+create table t1 (s1 char(5) character set utf8);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
+drop table t1;
+
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
+drop table t1;
+
+set names utf8;
+create table t1 (s1 char(5) character set utf8 collate utf8_bin);
+insert into t1 values
+('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
+create index it1 on t1 (s1);
+select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
+delete from t1 where s1 = 'Y';
+select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
+drop table t1;
+
+# additional tests from duplicate bug#20744 MySQL return no result
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_general_ci;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_unicode_ci;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+set names utf8;
+--disable_warnings
+create table t1 (a varchar(30) not null primary key)
+engine=innodb default character set utf8 collate utf8_bin;
+--enable_warnings
+insert into t1 values ('あいうえおかきくけこさしすせそ');
+insert into t1 values ('さしすせそかきくけこあいうえお');
+select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
+select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
+drop table t1;
+
+
+
+#
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
#
--- 1.42/strings/ctype-mb.c 2006-07-20 15:52:57 +05:00
+++ 1.43/strings/ctype-mb.c 2006-07-20 15:52:57 +05:00
@@ -449,15 +449,28 @@
/*
- Write max key: create a buffer with multibyte
+ Write max key:
+- for non-Unicode character sets:
+ just set to 255.
+- for Unicode character set (utf-8):
+ create a buffer with multibyte
representation of the max_sort_char character,
and copy it into max_str in a loop.
*/
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
{
char buf[10];
- char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
- (uchar*) buf + sizeof(buf));
+ char buflen;
+
+ if (!(cs->state & MY_CS_UNICODE))
+ {
+ bfill(str, end - str, 255);
+ return;
+ }
+
+ buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
+ (uchar*) buf + sizeof(buf));
+
DBUG_ASSERT(buflen > 0);
do
{
@@ -894,7 +907,7 @@
my_strnncoll_mb_bin,
my_strnncollsp_mb_bin,
my_strnxfrm_mb_bin,
- my_like_range_simple,
+ my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
--- 1.89/strings/ctype-utf8.c 2006-07-20 15:52:57 +05:00
+++ 1.90/strings/ctype-utf8.c 2006-07-20 15:52:57 +05:00
@@ -2373,7 +2373,7 @@
1, /* mbminlen */
3, /* mbmaxlen */
0, /* min_sort_char */
- 255, /* max_sort_char */
+ 0xFFFF, /* max_sort_char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_utf8_handler,
&my_collation_mb_bin_handler
--- 1.4/strings/CHARSET_INFO.txt 2006-07-20 15:52:57 +05:00
+++ 1.5/strings/CHARSET_INFO.txt 2006-07-20 15:52:57 +05:00
@@ -33,7 +33,7 @@
uint strxfrm_multiply;
uint mbminlen;
uint mbmaxlen;
- char max_sort_char; /* For LIKE optimization */
+ uint16 max_sort_char; /* For LIKE optimization */
MY_CHARSET_HANDLER *cset;
MY_COLLATION_HANDLER *coll;
@@ -134,7 +134,15 @@
mbmaxlen - maximum multibyte sequence length.
1 for 8bit charsets. Can be also 2 or 3.
-
+ max_sort_char - for LIKE range
+ in case of 8bit character sets - native code
+ of maximum character (max_str pad byte);
+ in case of UTF8 and UCS2 - Unicode code of the maximum
+ possible character (usually U+FFFF). This code is
+ converted to multibyte representation (usually 0xEFBFBF)
+ and then used as a pad sequence for max_str.
+ in case of other multibyte character sets -
+ max_str pad byte (usually 0xFF).
MY_CHARSET_HANDLER
==================
| Thread |
|---|
| • bk commit into 4.1 tree (bar:1.2523) BUG#20471 | bar | 20 Jul |