From: Date: October 31 2008 1:26pm Subject: bzr commit into mysql-6.0 branch (bar:2898) Bug#37129 List-Archive: http://lists.mysql.com/commits/57553 X-Bug: 37129 Message-Id: <200810311226.m9VCQLig029897@bar.myoffice.izhnet.ru> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit #At file:///home/bar/mysql-bzr/mysql-6.0.b37129/ 2898 Alexander Barkov 2008-10-31 Bug#37129 LDML lacks rule Problem: LDML didn't understand '' tag in character set definition file Index.xml. Manual incorrectly used '' instead of '' in: http://dev.mysql.com/doc/refman/5.1/en/adding-collation-unicode-uca.html Fix: - Adding support for '' tag. Manual should be changed to use ''. - Adding tests for the fixed version of the collation "utf8_phone_ci" (from the above manual article). ------------- This line and the following will be ignored -------------- modified: mysql-test/r/ctype_ldml.result mysql-test/std_data/Index.xml mysql-test/t/ctype_ldml.test strings/ctype-uca.c strings/ctype.c unknown: LOG nohup.out libmysql/probes.h@ libmysql_r/probes.h@ mysql-test/std_data/AAA modified: mysql-test/r/ctype_ldml.result mysql-test/std_data/Index.xml mysql-test/t/ctype_ldml.test strings/ctype-uca.c strings/ctype.c === modified file 'mysql-test/r/ctype_ldml.result' --- a/mysql-test/r/ctype_ldml.result 2008-09-15 10:11:54 +0000 +++ b/mysql-test/r/ctype_ldml.result 2008-10-31 12:25:42 +0000 @@ -6,6 +6,35 @@ set names utf8; show variables like 'character_sets_dir%'; Variable_name Value character_sets_dir MYSQL_TEST_DIR/std_data/ +show collation like 'utf8_phone_ci'; +Collation Charset Id Default Compiled Sortlen +utf8_phone_ci utf8 352 8 +CREATE TABLE t1 ( +name VARCHAR(64), +phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci +); +INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02'); +INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04'); +INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01'); +INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03'); +INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005'); +SELECT * FROM t1 ORDER BY phone; +name phone +Sanja +380 (912) 8008005 +Bar +7-912-800-80-01 +Svoj +7 912 800 80 02 +Ramil (7912) 800 80 03 +Hf +7 (912) 800 80 04 +SELECT * FROM t1 WHERE phone='+7(912)800-80-01'; +name phone +Bar +7-912-800-80-01 +SELECT * FROM t1 WHERE phone='79128008001'; +name phone +Bar +7-912-800-80-01 +SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1'; +name phone +Bar +7-912-800-80-01 +DROP TABLE t1; show collation like 'utf8_test_ci'; Collation Charset Id Default Compiled Sortlen utf8_test_ci utf8 353 8 @@ -320,6 +349,7 @@ The following tests check that two-byte select * from information_schema.collations where id>256 order by id; COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN utf16_test_ci utf16 327 8 +utf8_phone_ci utf8 352 8 utf8_test_ci utf8 353 8 ucs2_test_ci ucs2 358 8 ucs2_vn_ci ucs2 359 8 === modified file 'mysql-test/std_data/Index.xml' --- a/mysql-test/std_data/Index.xml 2008-09-15 10:11:54 +0000 +++ b/mysql-test/std_data/Index.xml 2008-10-31 12:25:42 +0000 @@ -1,10 +1,20 @@ + + + \u0000 + \u0020 + \u0028 + \u0029 + \u002B + \u002D + + a - b + b === modified file 'mysql-test/t/ctype_ldml.test' --- a/mysql-test/t/ctype_ldml.test 2008-07-24 11:33:35 +0000 +++ b/mysql-test/t/ctype_ldml.test 2008-10-31 12:25:42 +0000 @@ -15,6 +15,22 @@ set names utf8; --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR show variables like 'character_sets_dir%'; +show collation like 'utf8_phone_ci'; +CREATE TABLE t1 ( + name VARCHAR(64), + phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci +); +INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02'); +INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04'); +INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01'); +INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03'); +INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005'); +SELECT * FROM t1 ORDER BY phone; +SELECT * FROM t1 WHERE phone='+7(912)800-80-01'; +SELECT * FROM t1 WHERE phone='79128008001'; +SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1'; +DROP TABLE t1; + show collation like 'utf8_test_ci'; create table t1 (c1 char(1) character set utf8 collate utf8_test_ci); insert into t1 values ('a'); === modified file 'strings/ctype-uca.c' --- a/strings/ctype-uca.c 2008-07-23 09:43:50 +0000 +++ b/strings/ctype-uca.c 2008-10-31 12:25:42 +0000 @@ -7684,6 +7684,13 @@ static my_coll_lexem_num my_coll_lexem_n goto ex; } + if (beg[0] == '=') + { + beg++; + rc= MY_COLL_LEXEM_DIFF; + goto ex; + } + if (beg[0] == '<') { for (beg++, lexem->diff= 1; @@ -7844,6 +7851,10 @@ static int my_coll_rule_parse(MY_COLL_RU item.diff[1]= 0; item.diff[2]= 0; } + else if (lexem.diff == 0) + { + item.diff[0]= item.diff[1]= item.diff[2]= 0; + } if (nitems >= mitems) { my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules"); === modified file 'strings/ctype.c' --- a/strings/ctype.c 2008-05-29 15:44:11 +0000 +++ b/strings/ctype.c 2008-10-31 12:25:42 +0000 @@ -74,6 +74,7 @@ struct my_cs_file_section_st #define _CS_DIFF1 19 #define _CS_DIFF2 20 #define _CS_DIFF3 21 +#define _CS_IDENTICAL 22 static struct my_cs_file_section_st sec[] = @@ -108,6 +109,7 @@ static struct my_cs_file_section_st sec[ {_CS_DIFF1, "charsets/charset/collation/rules/p"}, {_CS_DIFF2, "charsets/charset/collation/rules/s"}, {_CS_DIFF3, "charsets/charset/collation/rules/t"}, + {_CS_IDENTICAL, "charsets/charset/collation/rules/i"}, {0, NULL} }; @@ -269,6 +271,7 @@ static int cs_value(MY_XML_PARSER *st,co case _CS_DIFF1: case _CS_DIFF2: case _CS_DIFF3: + case _CS_IDENTICAL: { /* Convert collation description from @@ -276,7 +279,7 @@ static int cs_value(MY_XML_PARSER *st,co into ICU Collation Customization expression. */ char arg[16]; - const char *cmd[]= {"&","<","<<","<<<"}; + const char *cmd[]= {"&","<","<<","<<<","="}; i->cs.tailoring= i->tailoring; mstr(arg,attr,len,sizeof(arg)-1); if (i->tailoring_length + 20 < sizeof(i->tailoring))