List:Commits« Previous MessageNext Message »
From:Alexander Barkov Date:October 31 2008 12:26pm
Subject:bzr commit into mysql-6.0 branch (bar:2898) Bug#37129
View as plain text  
#At file:///home/bar/mysql-bzr/mysql-6.0.b37129/

 2898 Alexander Barkov	2008-10-31
      Bug#37129 LDML lacks <i> rule
      Problem: LDML didn't understand '<i>' tag in
      character set definition file Index.xml.
      Manual incorrectly used '<s>' instead of '<i>' in:
       http://dev.mysql.com/doc/refman/5.1/en/adding-collation-unicode-uca.html
      Fix:
      - Adding support for '<i>' tag. Manual should be changed to use '<i>'.
      - Adding tests for the fixed version of the collation "utf8_phone_ci"
      (from the above manual article).
      
      ------------- This line and the following will be ignored --------------
      
      modified:
        mysql-test/r/ctype_ldml.result
        mysql-test/std_data/Index.xml
        mysql-test/t/ctype_ldml.test
        strings/ctype-uca.c
        strings/ctype.c
      unknown:
        LOG
        nohup.out
        libmysql/probes.h@
        libmysql_r/probes.h@
        mysql-test/std_data/AAA
modified:
  mysql-test/r/ctype_ldml.result
  mysql-test/std_data/Index.xml
  mysql-test/t/ctype_ldml.test
  strings/ctype-uca.c
  strings/ctype.c

=== modified file 'mysql-test/r/ctype_ldml.result'
--- a/mysql-test/r/ctype_ldml.result	2008-09-15 10:11:54 +0000
+++ b/mysql-test/r/ctype_ldml.result	2008-10-31 12:25:42 +0000
@@ -6,6 +6,35 @@ set names utf8;
 show variables like 'character_sets_dir%';
 Variable_name	Value
 character_sets_dir	MYSQL_TEST_DIR/std_data/
+show collation like 'utf8_phone_ci';
+Collation	Charset	Id	Default	Compiled	Sortlen
+utf8_phone_ci	utf8	352			8
+CREATE TABLE t1 (
+name VARCHAR(64),
+phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci
+);
+INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02');
+INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04');
+INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01');
+INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03');
+INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005');
+SELECT * FROM t1 ORDER BY phone;
+name	phone
+Sanja	+380 (912) 8008005
+Bar	+7-912-800-80-01
+Svoj	+7 912 800 80 02
+Ramil	(7912) 800 80 03
+Hf	+7 (912) 800 80 04
+SELECT * FROM t1 WHERE phone='+7(912)800-80-01';
+name	phone
+Bar	+7-912-800-80-01
+SELECT * FROM t1 WHERE phone='79128008001';
+name	phone
+Bar	+7-912-800-80-01
+SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1';
+name	phone
+Bar	+7-912-800-80-01
+DROP TABLE t1;
 show collation like 'utf8_test_ci';
 Collation	Charset	Id	Default	Compiled	Sortlen
 utf8_test_ci	utf8	353			8
@@ -320,6 +349,7 @@ The following tests check that two-byte 
 select * from information_schema.collations where id>256 order by id;
 COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
 utf16_test_ci	utf16	327			8
+utf8_phone_ci	utf8	352			8
 utf8_test_ci	utf8	353			8
 ucs2_test_ci	ucs2	358			8
 ucs2_vn_ci	ucs2	359			8

=== modified file 'mysql-test/std_data/Index.xml'
--- a/mysql-test/std_data/Index.xml	2008-09-15 10:11:54 +0000
+++ b/mysql-test/std_data/Index.xml	2008-10-31 12:25:42 +0000
@@ -1,10 +1,20 @@
 <charsets>
 
   <charset name="utf8">
+    <collation name="utf8_phone_ci" id="352">
+      <rules>
+        <reset>\u0000</reset>
+          <i>\u0020</i> <!-- space -->
+          <i>\u0028</i> <!-- left parenthesis -->
+          <i>\u0029</i> <!-- right parenthesis -->
+          <i>\u002B</i> <!-- plus -->
+          <i>\u002D</i> <!-- hyphen -->
+      </rules>
+    </collation>
     <collation name="utf8_test_ci" id="353">
       <rules>
         <reset>a</reset>
-        <s>b</s>
+        <i>b</i>
       </rules>
     </collation>
 

=== modified file 'mysql-test/t/ctype_ldml.test'
--- a/mysql-test/t/ctype_ldml.test	2008-07-24 11:33:35 +0000
+++ b/mysql-test/t/ctype_ldml.test	2008-10-31 12:25:42 +0000
@@ -15,6 +15,22 @@ set names utf8;
 --replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
 show variables like 'character_sets_dir%';
 
+show collation like 'utf8_phone_ci';
+CREATE TABLE t1 (
+ name VARCHAR(64),
+ phone VARCHAR(64) CHARACTER SET utf8 COLLATE utf8_phone_ci
+);
+INSERT INTO t1 VALUES ('Svoj','+7 912 800 80 02');
+INSERT INTO t1 VALUES ('Hf','+7 (912) 800 80 04');
+INSERT INTO t1 VALUES ('Bar','+7-912-800-80-01');
+INSERT INTO t1 VALUES ('Ramil','(7912) 800 80 03');
+INSERT INTO t1 VALUES ('Sanja','+380 (912) 8008005');
+SELECT * FROM t1 ORDER BY phone;
+SELECT * FROM t1 WHERE phone='+7(912)800-80-01';
+SELECT * FROM t1 WHERE phone='79128008001';
+SELECT * FROM t1 WHERE phone='7 9 1 2 8 0 0 8 0 0 1';
+DROP TABLE t1;
+
 show collation like 'utf8_test_ci';
 create table t1 (c1 char(1) character set utf8 collate utf8_test_ci);
 insert into t1 values ('a');

=== modified file 'strings/ctype-uca.c'
--- a/strings/ctype-uca.c	2008-07-23 09:43:50 +0000
+++ b/strings/ctype-uca.c	2008-10-31 12:25:42 +0000
@@ -7684,6 +7684,13 @@ static my_coll_lexem_num my_coll_lexem_n
       goto ex;
     }
     
+    if (beg[0] == '=')
+    {
+      beg++;
+      rc= MY_COLL_LEXEM_DIFF;
+      goto ex;
+    }
+    
     if (beg[0] == '<')
     {
       for (beg++, lexem->diff= 1;
@@ -7844,6 +7851,10 @@ static int my_coll_rule_parse(MY_COLL_RU
           item.diff[1]= 0;
           item.diff[2]= 0;
         }
+        else if (lexem.diff == 0)
+        {
+          item.diff[0]= item.diff[1]= item.diff[2]= 0;
+        }
         if (nitems >= mitems)
         {
           my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");

=== modified file 'strings/ctype.c'
--- a/strings/ctype.c	2008-05-29 15:44:11 +0000
+++ b/strings/ctype.c	2008-10-31 12:25:42 +0000
@@ -74,6 +74,7 @@ struct my_cs_file_section_st
 #define	_CS_DIFF1	19
 #define	_CS_DIFF2	20
 #define	_CS_DIFF3	21
+#define	_CS_IDENTICAL	22
 
 
 static struct my_cs_file_section_st sec[] =
@@ -108,6 +109,7 @@ static struct my_cs_file_section_st sec[
   {_CS_DIFF1,		"charsets/charset/collation/rules/p"},
   {_CS_DIFF2,		"charsets/charset/collation/rules/s"},
   {_CS_DIFF3,		"charsets/charset/collation/rules/t"},
+  {_CS_IDENTICAL,	"charsets/charset/collation/rules/i"},
   {0,	NULL}
 };
 
@@ -269,6 +271,7 @@ static int cs_value(MY_XML_PARSER *st,co
   case _CS_DIFF1:
   case _CS_DIFF2:
   case _CS_DIFF3:
+  case _CS_IDENTICAL:
     {
       /*
         Convert collation description from
@@ -276,7 +279,7 @@ static int cs_value(MY_XML_PARSER *st,co
         into ICU Collation Customization expression.
       */
       char arg[16];
-      const char *cmd[]= {"&","<","<<","<<<"};
+      const char *cmd[]= {"&","<","<<","<<<","="};
       i->cs.tailoring= i->tailoring;
       mstr(arg,attr,len,sizeof(arg)-1);
       if (i->tailoring_length + 20 < sizeof(i->tailoring))

Thread
bzr commit into mysql-6.0 branch (bar:2898) Bug#37129Alexander Barkov31 Oct