#At file:///home/bar/mysql-bzr/mysql-6.0.b37129/
2898 Alexander Barkov 2008-10-31
Bug#37129 LDML lacks <i> rule
Problem: LDML didn't understand '<i>' tag in
character set definition file Index.xml.
Manual incorrectly used '<s>' instead of '<i>' in:
http://dev.mysql.com/doc/refman/5.1/en/adding-collation-unicode-uca.html
Fix:
- Adding support for '<i>' tag. Manual should be changed to use '<i>'.
- Adding tests for the fixed version of the collation "utf8_phone_ci"
(from the above manual article).
------------- This line and the following will be ignored --------------
modified:
mysql-test/r/ctype_ldml.result
mysql-test/std_data/Index.xml
mysql-test/t/ctype_ldml.test
strings/ctype-uca.c
strings/ctype.c
unknown:
LOG
nohup.out
libmysql/probes.h@
libmysql_r/probes.h@
mysql-test/std_data/AAA
modified:
mysql-test/r/ctype_ldml.result
mysql-test/std_data/Index.xml
mysql-test/t/ctype_ldml.test
strings/ctype-uca.c
strings/ctype.c
=== modified file 'mysql-test/r/ctype_ldml.result'
--- a/mysql-test/r/ctype_ldml.result 2008-09-15 10:11:54 +0000
+++ b/mysql-test/r/ctype_ldml.result 2008-10-31 12:25:42 +0000
@@ -6,6 +6,35 @@ set names utf8;
show variables like 'character_sets_dir%';
Variable_name Value
character_sets_dir MYSQL_TEST_DIR/std_data/
+show collation like 'utf8_phone_ci';
+Collation Charset Id Default Compiled Sortlen
+utf8_phone_ci utf8 352 8
+CREATE TABLE t1 (
+name VARCHAR(64),
+phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci
+);
+INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02');
+INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04');
+INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01');
+INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03');
+INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005');
+SELECT * FROM t1 ORDER BY phone;
+name phone
+Sanja +380 (912) 8008005
+Bar +7-912-800-80-01
+Svoj +7 912 800 80 02
+Ramil (7912) 800 80 03
+Hf +7 (912) 800 80 04
+SELECT * FROM t1 WHERE phone='+7(912)800-80-01';
+name phone
+Bar +7-912-800-80-01
+SELECT * FROM t1 WHERE phone='79128008001';
+name phone
+Bar +7-912-800-80-01
+SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1';
+name phone
+Bar +7-912-800-80-01
+DROP TABLE t1;
show collation like 'utf8_test_ci';
Collation Charset Id Default Compiled Sortlen
utf8_test_ci utf8 353 8
@@ -320,6 +349,7 @@ The following tests check that two-byte
select * from information_schema.collations where id>256 order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
utf16_test_ci utf16 327 8
+utf8_phone_ci utf8 352 8
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
ucs2_vn_ci ucs2 359 8
=== modified file 'mysql-test/std_data/Index.xml'
--- a/mysql-test/std_data/Index.xml 2008-09-15 10:11:54 +0000
+++ b/mysql-test/std_data/Index.xml 2008-10-31 12:25:42 +0000
@@ -1,10 +1,20 @@
<charsets>
<charset name="utf8">
+ <collation name="utf8_phone_ci" id="352">
+ <rules>
+ <reset>\u0000</reset>
+ <i>\u0020</i> <!-- space -->
+ <i>\u0028</i> <!-- left parenthesis -->
+ <i>\u0029</i> <!-- right parenthesis -->
+ <i>\u002B</i> <!-- plus -->
+ <i>\u002D</i> <!-- hyphen -->
+ </rules>
+ </collation>
<collation name="utf8_test_ci" id="353">
<rules>
<reset>a</reset>
- <s>b</s>
+ <i>b</i>
</rules>
</collation>
=== modified file 'mysql-test/t/ctype_ldml.test'
--- a/mysql-test/t/ctype_ldml.test 2008-07-24 11:33:35 +0000
+++ b/mysql-test/t/ctype_ldml.test 2008-10-31 12:25:42 +0000
@@ -15,6 +15,22 @@ set names utf8;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
show variables like 'character_sets_dir%';
+show collation like 'utf8_phone_ci';
+CREATE TABLE t1 (
+ name VARCHAR(64),
+ phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci
+);
+INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02');
+INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04');
+INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01');
+INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03');
+INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005');
+SELECT * FROM t1 ORDER BY phone;
+SELECT * FROM t1 WHERE phone='+7(912)800-80-01';
+SELECT * FROM t1 WHERE phone='79128008001';
+SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1';
+DROP TABLE t1;
+
show collation like 'utf8_test_ci';
create table t1 (c1 char(1) character set utf8 collate utf8_test_ci);
insert into t1 values ('a');
=== modified file 'strings/ctype-uca.c'
--- a/strings/ctype-uca.c 2008-07-23 09:43:50 +0000
+++ b/strings/ctype-uca.c 2008-10-31 12:25:42 +0000
@@ -7684,6 +7684,13 @@ static my_coll_lexem_num my_coll_lexem_n
goto ex;
}
+ if (beg[0] == '=')
+ {
+ beg++;
+ rc= MY_COLL_LEXEM_DIFF;
+ goto ex;
+ }
+
if (beg[0] == '<')
{
for (beg++, lexem->diff= 1;
@@ -7844,6 +7851,10 @@ static int my_coll_rule_parse(MY_COLL_RU
item.diff[1]= 0;
item.diff[2]= 0;
}
+ else if (lexem.diff == 0)
+ {
+ item.diff[0]= item.diff[1]= item.diff[2]= 0;
+ }
if (nitems >= mitems)
{
my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
=== modified file 'strings/ctype.c'
--- a/strings/ctype.c 2008-05-29 15:44:11 +0000
+++ b/strings/ctype.c 2008-10-31 12:25:42 +0000
@@ -74,6 +74,7 @@ struct my_cs_file_section_st
#define _CS_DIFF1 19
#define _CS_DIFF2 20
#define _CS_DIFF3 21
+#define _CS_IDENTICAL 22
static struct my_cs_file_section_st sec[] =
@@ -108,6 +109,7 @@ static struct my_cs_file_section_st sec[
{_CS_DIFF1, "charsets/charset/collation/rules/p"},
{_CS_DIFF2, "charsets/charset/collation/rules/s"},
{_CS_DIFF3, "charsets/charset/collation/rules/t"},
+ {_CS_IDENTICAL, "charsets/charset/collation/rules/i"},
{0, NULL}
};
@@ -269,6 +271,7 @@ static int cs_value(MY_XML_PARSER *st,co
case _CS_DIFF1:
case _CS_DIFF2:
case _CS_DIFF3:
+ case _CS_IDENTICAL:
{
/*
Convert collation description from
@@ -276,7 +279,7 @@ static int cs_value(MY_XML_PARSER *st,co
into ICU Collation Customization expression.
*/
char arg[16];
- const char *cmd[]= {"&","<","<<","<<<"};
+ const char *cmd[]= {"&","<","<<","<<<","="};
i->cs.tailoring= i->tailoring;
mstr(arg,attr,len,sizeof(arg)-1);
if (i->tailoring_length + 20 < sizeof(i->tailoring))
| Thread |
|---|
| • bzr commit into mysql-6.0 branch (bar:2898) Bug#37129 | Alexander Barkov | 31 Oct |