List:Commits« Previous MessageNext Message »
From:Chad MILLER Date:May 31 2007 8:11pm
Subject:bk commit into 5.1 tree (cmiller:1.2474) BUG#24674
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of cmiller. When cmiller does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-05-31 16:11:49-04:00, cmiller@stripped +5 -0
  Contributed patch from Harshula Jayasuriya,  CLA signed 21 Feb 2007.
  
  Bug#24674: Add Sinhala script collation
  
  The Default Unicode Collation Element Table (DUCET) and the Unicode 
  binary order are insufficient to properly sort Sinhala script.
  
  A standard Sinhala collation order has been established as part of 
  SLS1134 (SCCII Part 1 : Collation Sequence).

  config/ac-macros/character_sets.m4@stripped, 2007-05-31 16:11:47-04:00, cmiller@stripped +2 -2
    Add Sinhala collation reference.

  mysql-test/r/ctype_utf8.result@stripped, 2007-05-31 16:11:47-04:00, cmiller@stripped +110 -0
    Verify collation order matches predictions.
    ---
    Include the hexadecimal, as requested by submitter.

  mysql-test/t/ctype_utf8.test@stripped, 2007-05-31 16:11:47-04:00, cmiller@stripped +13 -0
    Test predicted collation order.
    ---
    Include the hexadecimal, as requested by submitter.

  mysys/charset-def.c@stripped, 2007-05-31 16:11:47-04:00, cmiller@stripped +4 -0
    Insert new collation.

  strings/ctype-uca.c@stripped, 2007-05-31 16:11:47-04:00, cmiller@stripped +94 -1
    Add new utf8 and ucs2 collations for Sinhala.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	cmiller
# Host:	zippy.cornsilk.net
# Root:	/home/cmiller/work/mysql/mysql-5.2-maint--bug26474

--- 1.43/strings/ctype-uca.c	2007-01-22 07:10:42 -05:00
+++ 1.44/strings/ctype-uca.c	2007-05-31 16:11:47 -04:00
@@ -6712,6 +6712,34 @@ static const char hungarian[]=
     "&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
     "&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170";
 
+/*
+  SCCII Part 1 : Collation Sequence (SLS1134)
+  2006/11/24
+  Harshula Jayasuriya <harshula at gmail dot com>
+  Language Technology Research Lab, University of Colombo / ICTA
+*/
+#if 0
+static const char sinhala[]=
+    "& \\u0D96 < \\u0D82 < \\u0D83"
+    "& \\u0DA5 < \\u0DA4"
+    "& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3"
+    "& \\u0DDE < \\u0DCA";
+#else
+static const char sinhala[]=
+    "& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C < \\u0D9D"
+              "< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 < \\u0DA3"
+              "< \\u0DA5 < \\u0DA4 < \\u0DA6"
+              "< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB < \\u0DAC"
+              "< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1"
+              "< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 < \\u0DB8"
+              "< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 < \\u0DC1"
+              "< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6"
+              "< \\u0DCF"
+              "< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 < \\u0DD6"
+              "< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 < \\u0DDA"
+              "< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA";
+#endif
+
 
 /*
   Unicode Collation Algorithm:
@@ -7441,7 +7469,7 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
 /*
   Collation language is implemented according to
   subset of ICU Collation Customization (tailorings):
-  http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+  http://icu.sourceforge.net/userguide/Collate_Customization.html
   
   Collation language elements:
   Delimiters:
@@ -8668,6 +8696,39 @@ CHARSET_INFO my_charset_ucs2_hungarian_u
 };
 
 
+CHARSET_INFO my_charset_ucs2_sinhala_uca_ci=
+{
+    147,0,0,		/* number       */
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    "ucs2",		/* cs name    */
+    "ucs2_sinhala_ci",	/* name         */
+    "",			/* comment      */
+    sinhala,		/* tailoring    */
+    NULL,		/* ctype        */
+    NULL,		/* to_lower     */
+    NULL,		/* to_upper     */
+    NULL,		/* sort_order   */
+    NULL,		/* contractions */
+    NULL,		/* sort_order_big*/
+    NULL,		/* tab_to_uni   */
+    NULL,		/* tab_from_uni */
+    my_unicase_default, /* caseinfo     */
+    NULL,		/* state_map    */
+    NULL,		/* ident_map    */
+    8,			/* strxfrm_multiply */
+    1,                  /* caseup_multiply  */
+    1,                  /* casedn_multiply  */
+    2,			/* mbminlen     */
+    2,			/* mbmaxlen     */
+    9,			/* min_sort_char */
+    0xFFFF,		/* max_sort_char */
+    ' ',                /* pad char      */
+    0,                  /* escape_with_backslash_is_dangerous */
+    &my_charset_ucs2_handler,
+    &my_collation_ucs2_uca_handler
+};
+
+
 #endif
 
 
@@ -9316,6 +9377,38 @@ CHARSET_INFO my_charset_utf8_hungarian_u
     1,                  /* caseup_multiply  */
     1,                  /* casedn_multiply  */
     1,			/* mbminlen     */
+    3,			/* mbmaxlen     */
+    9,			/* min_sort_char */
+    0xFFFF,		/* max_sort_char */
+    ' ',                /* pad char      */
+    0,                  /* escape_with_backslash_is_dangerous */
+    &my_charset_utf8_handler,
+    &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8_sinhala_uca_ci=
+{
+    211,0,0,		/* number       */
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+    "utf8",		/* cs name    */
+    "utf8_sinhala_ci",	/* name         */
+    "",			/* comment      */
+    sinhala,		/* tailoring    */
+    ctype_utf8,		/* ctype        */
+    NULL,		/* to_lower     */
+    NULL,		/* to_upper     */
+    NULL,		/* sort_order   */
+    NULL,		/* contractions */
+    NULL,		/* sort_order_big*/
+    NULL,		/* tab_to_uni   */
+    NULL,		/* tab_from_uni */
+    my_unicase_default, /* caseinfo     */
+    NULL,		/* state_map    */
+    NULL,		/* ident_map    */
+    8,			/* strxfrm_multiply */
+    1,                  /* caseup_multiply  */
+    1,                  /* casedn_multiply  */
+    3,			/* mbminlen     */
     3,			/* mbmaxlen     */
     9,			/* min_sort_char */
     0xFFFF,		/* max_sort_char */

--- 1.113/mysql-test/r/ctype_utf8.result	2006-11-20 09:04:57 -05:00
+++ 1.114/mysql-test/r/ctype_utf8.result	2007-05-31 16:11:47 -04:00
@@ -1602,3 +1602,113 @@ colA	colB	colA	colB
 1	foo	1	foo
 2	foo bar	2	foo bar
 DROP TABLE t1, t2;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+predicted_order int NOT NULL,
+utf8_encoding VARCHAR(10) NOT NULL
+) CHARACTER SET utf8;
+INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93, x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56, x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25, x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'), (81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68, x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38, x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40, x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'), (9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64, x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39, x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34, x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'), (17, x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15, x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'), (85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36, x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37, x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62, x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12, x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76, x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71, x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'), (31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67, x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682');
+SELECT predicted_order, hex(utf8_encoding) FROM t1 ORDER BY utf8_encoding COLLATE utf8_sinhala_ci;
+predicted_order	hex(utf8_encoding)
+1	E0B7B4
+2	E0B685
+3	E0B686
+4	E0B687
+5	E0B688
+6	E0B689
+7	E0B68A
+8	E0B68B
+9	E0B68C
+10	E0B68D
+11	E0B68E
+12	E0B68F
+13	E0B690
+14	E0B691
+15	E0B692
+16	E0B693
+17	E0B694
+18	E0B695
+19	E0B696
+20	E0B682
+21	E0B683
+22	E0B69A
+23	E0B69AE0B78F
+24	E0B69AE0B790
+25	E0B69AE0B791
+26	E0B69AE0B792
+27	E0B69AE0B793
+28	E0B69AE0B794
+29	E0B69AE0B796
+30	E0B69AE0B798
+31	E0B69AE0B7B2
+32	E0B69AE0B79F
+33	E0B69AE0B7B3
+34	E0B69AE0B799
+35	E0B69AE0B79A
+36	E0B69AE0B79B
+37	E0B69AE0B79C
+38	E0B69AE0B79D
+39	E0B69AE0B79E
+40	E0B69AE0B78A
+41	E0B69B
+42	E0B69C
+43	E0B69D
+44	E0B69E
+45	E0B69F
+46	E0B6A0
+47	E0B6A1
+48	E0B6A2
+49	E0B6A3
+50	E0B6A5
+51	E0B6A4
+52	E0B6A6
+53	E0B6A7
+54	E0B6A8
+55	E0B6A9
+56	E0B6AA
+57	E0B6AB
+58	E0B6AC
+59	E0B6AD
+60	E0B6AE
+61	E0B6AF
+62	E0B6B0
+63	E0B6B1
+64	E0B6B3
+65	E0B6B4
+66	E0B6B5
+67	E0B6B6
+68	E0B6B7
+69	E0B6B8
+70	E0B6B9
+71	E0B6BA
+72	E0B6BB
+73	E0B6BBE0B78AE2808D
+74	E0B6BD
+75	E0B780
+76	E0B781
+77	E0B782
+78	E0B783
+79	E0B784
+80	E0B785
+81	E0B786
+82	E0B78F
+83	E0B790
+84	E0B791
+85	E0B792
+86	E0B793
+87	E0B794
+88	E0B796
+89	E0B798
+90	E0B7B2
+91	E0B79F
+92	E0B7B3
+93	E0B799
+94	E0B79A
+95	E0B79B
+96	E0B79C
+97	E0B79D
+98	E0B79E
+99	E0B78A
+100	E0B78AE2808DE0B6BA
+101	E0B78AE2808DE0B6BB
+DROP TABLE t1;

--- 1.103/mysql-test/t/ctype_utf8.test	2007-01-22 12:08:44 -05:00
+++ 1.104/mysql-test/t/ctype_utf8.test	2007-05-31 16:11:47 -04:00
@@ -1296,3 +1296,16 @@ INSERT INTO t2 (colA, colB) VALUES (1, '
 SELECT * FROM t1 JOIN t2 ON t1.colA=t2.colA AND t1.colB=t2.colB
 WHERE t1.colA < 3;
 DROP TABLE t1, t2;
+#
+# Bug#26474: Add Sinhala script (Sri Lanka) collation to MySQL
+#
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+CREATE TABLE t1 (
+        predicted_order int NOT NULL,
+        utf8_encoding VARCHAR(10) NOT NULL
+) CHARACTER SET utf8;
+INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93, x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56, x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25, x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'), (81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68, x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38, x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40, x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'), (9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64, x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39, x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34, x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'), (17, x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15, x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'), (85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36, x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37, x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62, x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12, x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76, x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71, x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'), (31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67, x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682');
+SELECT predicted_order, hex(utf8_encoding) FROM t1 ORDER BY utf8_encoding COLLATE utf8_sinhala_ci;
+DROP TABLE t1;

--- 1.8/config/ac-macros/character_sets.m4	2005-09-12 10:35:24 -04:00
+++ 1.9/config/ac-macros/character_sets.m4	2007-05-31 16:11:47 -04:00
@@ -346,7 +346,7 @@ case $default_charset in 
       define(UCSC3, ucs2_esperanto_ci ucs2_estonian_ci ucs2_icelandic_ci)
       define(UCSC4, ucs2_latvian_ci ucs2_lithuanian_ci)
       define(UCSC5, ucs2_persian_ci ucs2_polish_ci ucs2_romanian_ci)
-      define(UCSC6, ucs2_slovak_ci ucs2_slovenian_ci)
+      define(UCSC6, ucs2_sinhala_ci ucs2_slovak_ci ucs2_slovenian_ci)
       define(UCSC7, ucs2_spanish2_ci ucs2_spanish_ci)
       define(UCSC8, ucs2_swedish_ci ucs2_turkish_ci)
       define(UCSC9, ucs2_unicode_ci)
@@ -370,7 +370,7 @@ case $default_charset in 
         define(UTFC3, utf8_esperanto_ci utf8_estonian_ci utf8_icelandic_ci)
         define(UTFC4, utf8_latvian_ci utf8_lithuanian_ci)
         define(UTFC5, utf8_persian_ci utf8_polish_ci utf8_romanian_ci)
-        define(UTFC6, utf8_slovak_ci utf8_slovenian_ci)
+        define(UTFC6, utf8_sinhala_ci utf8_slovak_ci utf8_slovenian_ci)
         define(UTFC7, utf8_spanish2_ci utf8_spanish_ci)
         define(UTFC8, utf8_swedish_ci utf8_turkish_ci)
         define(UTFC9, utf8_unicode_ci)

--- 1.17/mysys/charset-def.c	2006-12-23 14:19:45 -05:00
+++ 1.18/mysys/charset-def.c	2007-05-31 16:11:47 -04:00
@@ -43,6 +43,7 @@ extern CHARSET_INFO my_charset_ucs2_roma
 extern CHARSET_INFO my_charset_ucs2_persian_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci;
 extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
 #endif
 
 #ifdef HAVE_CHARSET_utf8
@@ -65,6 +66,7 @@ extern CHARSET_INFO my_charset_utf8_roma
 extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
 extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci;
 extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_sinhala_uca_ci;
 #ifdef HAVE_UTF8_GENERAL_CS
 extern CHARSET_INFO my_charset_utf8_general_cs;
 #endif
@@ -154,6 +156,7 @@ my_bool init_compiled_charsets(myf flags
   add_compiled_collation(&my_charset_ucs2_persian_uca_ci);
   add_compiled_collation(&my_charset_ucs2_esperanto_uca_ci);
   add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci);
+  add_compiled_collation(&my_charset_ucs2_sinhala_uca_ci);
 #endif
 #endif
 
@@ -188,6 +191,7 @@ my_bool init_compiled_charsets(myf flags
   add_compiled_collation(&my_charset_utf8_persian_uca_ci);
   add_compiled_collation(&my_charset_utf8_esperanto_uca_ci);
   add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
+  add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
 #endif
 #endif
 
Thread
bk commit into 5.1 tree (cmiller:1.2474) BUG#24674Chad MILLER31 May