Below is the list of changes that have just been committed into a local
5.2 repository of tsmith. When tsmith does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-08-30 16:41:07-06:00, tsmith@stripped +5 -0
Contributed patch from Harshula Jayasuriya, CLA signed 21 Feb 2007.
Bug#26474: Add Sinhala script collation
The Default Unicode Collation Element Table (DUCET) and the Unicode
binary order are insufficient to properly sort Sinhala script.
A standard Sinhala collation order has been established as part of
SLS1134 (SCCII Part 1 : Collation Sequence).
config/ac-macros/character_sets.m4@stripped, 2007-08-30 16:41:06-06:00,
tsmith@stripped +2 -2
Add Sinhala collation reference.
mysql-test/r/ctype_utf8.result@stripped, 2007-08-30 16:41:06-06:00,
tsmith@stripped +111 -0
Verify collation order matches predictions.
---
Include the hexadecimal, as requested by submitter.
mysql-test/t/ctype_utf8.test@stripped, 2007-08-30 16:41:06-06:00, tsmith@stripped
+15 -0
Test predicted collation order.
---
Include the hexadecimal, as requested by submitter.
mysys/charset-def.c@stripped, 2007-08-30 16:41:06-06:00, tsmith@stripped +4 -0
Insert new collation.
strings/ctype-uca.c@stripped, 2007-08-30 16:41:06-06:00, tsmith@stripped +98 -1
Add new utf8 and ucs2 collations for Sinhala.
diff -Nrup a/config/ac-macros/character_sets.m4 b/config/ac-macros/character_sets.m4
--- a/config/ac-macros/character_sets.m4 2007-03-27 04:06:38 -06:00
+++ b/config/ac-macros/character_sets.m4 2007-08-30 16:41:06 -06:00
@@ -346,7 +346,7 @@ case $default_charset in
define(UCSC3, ucs2_esperanto_ci ucs2_estonian_ci ucs2_icelandic_ci)
define(UCSC4, ucs2_latvian_ci ucs2_lithuanian_ci)
define(UCSC5, ucs2_persian_ci ucs2_polish_ci ucs2_romanian_ci)
- define(UCSC6, ucs2_slovak_ci ucs2_slovenian_ci)
+ define(UCSC6, ucs2_sinhala_ci ucs2_slovak_ci ucs2_slovenian_ci)
define(UCSC7, ucs2_spanish2_ci ucs2_spanish_ci)
define(UCSC8, ucs2_swedish_ci ucs2_turkish_ci)
define(UCSC9, ucs2_unicode_ci)
@@ -370,7 +370,7 @@ case $default_charset in
define(UTFC3, utf8_esperanto_ci utf8_estonian_ci utf8_icelandic_ci)
define(UTFC4, utf8_latvian_ci utf8_lithuanian_ci)
define(UTFC5, utf8_persian_ci utf8_polish_ci utf8_romanian_ci)
- define(UTFC6, utf8_slovak_ci utf8_slovenian_ci)
+ define(UTFC6, utf8_sinhala_ci utf8_slovak_ci utf8_slovenian_ci)
define(UTFC7, utf8_spanish2_ci utf8_spanish_ci)
define(UTFC8, utf8_swedish_ci utf8_turkish_ci)
define(UTFC9, utf8_unicode_ci)
diff -Nrup a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
--- a/mysql-test/r/ctype_utf8.result 2007-08-24 17:19:51 -06:00
+++ b/mysql-test/r/ctype_utf8.result 2007-08-30 16:41:06 -06:00
@@ -1854,3 +1854,114 @@ b
c
drop table t1;
set max_sort_length=default;
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (
+predicted_order int NOT NULL,
+utf8_encoding VARCHAR(10) NOT NULL
+) CHARACTER SET utf8;
+INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93,
x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56,
x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25,
x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'),
(81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68,
x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38,
x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40,
x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'),
(9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64,
x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39,
x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34,
x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'),
(17,
x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15,
x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'),
(85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36,
x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37,
x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62,
x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12,
x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76,
x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71,
x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'),
(31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67,
x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682');
+SELECT predicted_order, hex(utf8_encoding) FROM t1 ORDER BY utf8_encoding COLLATE
utf8_sinhala_ci;
+predicted_order hex(utf8_encoding)
+1 E0B7B4
+2 E0B685
+3 E0B686
+4 E0B687
+5 E0B688
+6 E0B689
+7 E0B68A
+8 E0B68B
+9 E0B68C
+10 E0B68D
+11 E0B68E
+12 E0B68F
+13 E0B690
+14 E0B691
+15 E0B692
+16 E0B693
+17 E0B694
+18 E0B695
+19 E0B696
+20 E0B682
+21 E0B683
+22 E0B69A
+23 E0B69AE0B78F
+24 E0B69AE0B790
+25 E0B69AE0B791
+26 E0B69AE0B792
+27 E0B69AE0B793
+28 E0B69AE0B794
+29 E0B69AE0B796
+30 E0B69AE0B798
+31 E0B69AE0B7B2
+32 E0B69AE0B79F
+33 E0B69AE0B7B3
+34 E0B69AE0B799
+35 E0B69AE0B79A
+36 E0B69AE0B79B
+37 E0B69AE0B79C
+38 E0B69AE0B79D
+39 E0B69AE0B79E
+40 E0B69AE0B78A
+41 E0B69B
+42 E0B69C
+43 E0B69D
+44 E0B69E
+45 E0B69F
+46 E0B6A0
+47 E0B6A1
+48 E0B6A2
+49 E0B6A3
+50 E0B6A5
+51 E0B6A4
+52 E0B6A6
+53 E0B6A7
+54 E0B6A8
+55 E0B6A9
+56 E0B6AA
+57 E0B6AB
+58 E0B6AC
+59 E0B6AD
+60 E0B6AE
+61 E0B6AF
+62 E0B6B0
+63 E0B6B1
+64 E0B6B3
+65 E0B6B4
+66 E0B6B5
+67 E0B6B6
+68 E0B6B7
+69 E0B6B8
+70 E0B6B9
+71 E0B6BA
+72 E0B6BB
+73 E0B6BBE0B78AE2808D
+74 E0B6BD
+75 E0B780
+76 E0B781
+77 E0B782
+78 E0B783
+79 E0B784
+80 E0B785
+81 E0B786
+82 E0B78F
+83 E0B790
+84 E0B791
+85 E0B792
+86 E0B793
+87 E0B794
+88 E0B796
+89 E0B798
+90 E0B7B2
+91 E0B79F
+92 E0B7B3
+93 E0B799
+94 E0B79A
+95 E0B79B
+96 E0B79C
+97 E0B79D
+98 E0B79E
+99 E0B78A
+100 E0B78AE2808DE0B6BA
+101 E0B78AE2808DE0B6BB
+DROP TABLE t1;
+End of tests
diff -Nrup a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
--- a/mysql-test/t/ctype_utf8.test 2007-08-24 17:19:52 -06:00
+++ b/mysql-test/t/ctype_utf8.test 2007-08-30 16:41:06 -06:00
@@ -1426,3 +1426,18 @@ alter table t1 modify a varchar(128) cha
select * from t1 order by a;
drop table t1;
set max_sort_length=default;
+#
+# Bug#26474: Add Sinhala script (Sri Lanka) collation to MySQL
+#
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+CREATE TABLE t1 (
+ predicted_order int NOT NULL,
+ utf8_encoding VARCHAR(10) NOT NULL
+) CHARACTER SET utf8;
+INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93,
x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56,
x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25,
x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'),
(81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68,
x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38,
x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40,
x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'),
(9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64,
x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39,
x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34,
x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'),
(17,
x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15,
x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'),
(85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36,
x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37,
x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62,
x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12,
x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76,
x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71,
x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'),
(31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67,
x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682');
+SELECT predicted_order, hex(utf8_encoding) FROM t1 ORDER BY utf8_encoding COLLATE
utf8_sinhala_ci;
+DROP TABLE t1;
+
+--echo End of tests
diff -Nrup a/mysys/charset-def.c b/mysys/charset-def.c
--- a/mysys/charset-def.c 2007-06-21 13:02:09 -06:00
+++ b/mysys/charset-def.c 2007-08-30 16:41:06 -06:00
@@ -42,6 +42,7 @@ extern CHARSET_INFO my_charset_ucs2_roma
extern CHARSET_INFO my_charset_ucs2_persian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci;
extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
#endif
#ifdef HAVE_CHARSET_utf8
@@ -63,6 +64,7 @@ extern CHARSET_INFO my_charset_utf8_roma
extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci;
+extern CHARSET_INFO my_charset_utf8_sinhala_uca_ci;
#ifdef HAVE_UTF8_GENERAL_CS
extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
@@ -152,6 +154,7 @@ my_bool init_compiled_charsets(myf flags
add_compiled_collation(&my_charset_ucs2_persian_uca_ci);
add_compiled_collation(&my_charset_ucs2_esperanto_uca_ci);
add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci);
+ add_compiled_collation(&my_charset_ucs2_sinhala_uca_ci);
#endif
#endif
@@ -186,6 +189,7 @@ my_bool init_compiled_charsets(myf flags
add_compiled_collation(&my_charset_utf8_persian_uca_ci);
add_compiled_collation(&my_charset_utf8_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
+ add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
#endif
#endif
diff -Nrup a/strings/ctype-uca.c b/strings/ctype-uca.c
--- a/strings/ctype-uca.c 2007-07-06 04:19:48 -06:00
+++ b/strings/ctype-uca.c 2007-08-30 16:41:06 -06:00
@@ -6712,6 +6712,34 @@ static const char hungarian[]=
"&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
"&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170";
+/*
+ SCCII Part 1 : Collation Sequence (SLS1134)
+ 2006/11/24
+ Harshula Jayasuriya <harshula at gmail dot com>
+ Language Technology Research Lab, University of Colombo / ICTA
+*/
+#if 0
+static const char sinhala[]=
+ "& \\u0D96 < \\u0D82 < \\u0D83"
+ "& \\u0DA5 < \\u0DA4"
+ "& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3"
+ "& \\u0DDE < \\u0DCA";
+#else
+static const char sinhala[]=
+ "& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C <
\\u0D9D"
+ "< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 <
\\u0DA3"
+ "< \\u0DA5 < \\u0DA4 < \\u0DA6"
+ "< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB <
\\u0DAC"
+ "< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1"
+ "< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 <
\\u0DB8"
+ "< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 <
\\u0DC1"
+ "< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6"
+ "< \\u0DCF"
+ "< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 <
\\u0DD6"
+ "< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 <
\\u0DDA"
+ "< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA";
+#endif
+
/*
Unicode Collation Algorithm:
@@ -7464,7 +7492,7 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
/*
Collation language is implemented according to
subset of ICU Collation Customization (tailorings):
- http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+ http://icu.sourceforge.net/userguide/Collate_Customization.html
Collation language elements:
Delimiters:
@@ -8738,6 +8766,41 @@ CHARSET_INFO my_charset_ucs2_hungarian_u
};
+CHARSET_INFO my_charset_ucs2_sinhala_uca_ci=
+{
+ 147,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "ucs2", /* cs name */
+ "ucs2_sinhala_ci", /* name */
+ "", /* comment */
+ sinhala, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ 1, /* levels_for_compare */
+ 1, /* levels_for_order */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+
#endif
@@ -9388,6 +9451,40 @@ CHARSET_INFO my_charset_utf8_esperanto_u
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
+ 3, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ 1, /* levels_for_compare */
+ 1, /* levels_for_order */
+ &my_charset_utf8_handler,
+ &my_collation_any_uca_handler
+};
+
+CHARSET_INFO my_charset_utf8_sinhala_uca_ci=
+{
+ 211,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ "utf8", /* cs name */
+ "utf8_sinhala_ci", /* name */
+ "", /* comment */
+ sinhala, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* contractions */
+ NULL, /* sort_order_big*/
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 3, /* mbminlen */
3, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
| Thread |
|---|
| • bk commit into 5.2 tree (tsmith:1.2591) BUG#26474 | tim | 31 Aug |