Below is the list of changes that have just been committed into a local
6.0 repository of bar. When bar does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-03-17 17:45:57+04:00, bar@stripped +5 -0
Bug#33077 Character sets: weight of supplementary characters is not 0xfffd
Problem: interpretation of 0xFFFC was wrong.
weight_string returned 0x0DC6 as weight for supplementary
characters (which is weight for character U+FFFC)
Fix: return 0xFFFC instead of 0x0DC6.
mysql-test/r/ctype_uca.result@stripped, 2008-03-17 17:45:53+04:00, bar@stripped +3 -0
Adding test
mysql-test/r/ctype_utf16_uca.result@stripped, 2008-03-17 17:45:53+04:00, bar@stripped +2 -2
Fixing test
mysql-test/r/ctype_utf32_uca.result@stripped, 2008-03-17 17:45:53+04:00, bar@stripped +2 -2
fixing test
mysql-test/t/ctype_uca.test@stripped, 2008-03-17 17:45:54+04:00, bar@stripped +4 -0
Adding test
strings/ctype-uca.c@stripped, 2008-03-17 17:45:54+04:00, bar@stripped +4 -2
Return 0xFFFD as weight for all supplementary characters.
diff -Nrup a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result
--- a/mysql-test/r/ctype_uca.result 2008-03-08 02:10:25 +04:00
+++ b/mysql-test/r/ctype_uca.result 2008-03-17 17:45:53 +04:00
@@ -3057,3 +3057,6 @@ hex(weight_string(cast(_latin1 0xDF6368
select hex(weight_string(cast(_latin1 0xDF6368 as char) as char(4)));
hex(weight_string(cast(_latin1 0xDF6368 as char) as char(4)))
0FEA0FEA0EE20209
+select hex(weight_string(_utf8 0xF0908080 /* U+10000 */ collate utf8_unicode_ci));
+hex(weight_string(_utf8 0xF0908080 /* U+10000 */ collate utf8_unicode_ci))
+FFFD
diff -Nrup a/mysql-test/r/ctype_utf16_uca.result b/mysql-test/r/ctype_utf16_uca.result
--- a/mysql-test/r/ctype_utf16_uca.result 2008-03-09 09:36:48 +04:00
+++ b/mysql-test/r/ctype_utf16_uca.result 2008-03-17 17:45:53 +04:00
@@ -2346,10 +2346,10 @@ hex(weight_string('abc' as char(5)))
0E330E4A0E6002090209
select hex(weight_string(_utf16 0xD800DC00 collate utf16_unicode_ci));
hex(weight_string(_utf16 0xD800DC00 collate utf16_unicode_ci))
-0DC6
+FFFD
select hex(weight_string(_utf16 0xD800DC01 collate utf16_unicode_ci));
hex(weight_string(_utf16 0xD800DC01 collate utf16_unicode_ci))
-0DC6
+FFFD
select @@collation_connection;
@@collation_connection
utf16_unicode_ci
diff -Nrup a/mysql-test/r/ctype_utf32_uca.result b/mysql-test/r/ctype_utf32_uca.result
--- a/mysql-test/r/ctype_utf32_uca.result 2008-03-09 09:36:49 +04:00
+++ b/mysql-test/r/ctype_utf32_uca.result 2008-03-17 17:45:53 +04:00
@@ -2346,10 +2346,10 @@ hex(weight_string('abc' as char(5)))
0E330E4A0E6002090209
select hex(weight_string(_utf32 0x10000 collate utf32_unicode_ci));
hex(weight_string(_utf32 0x10000 collate utf32_unicode_ci))
-0DC6
+FFFD
select hex(weight_string(_utf32 0x10001 collate utf32_unicode_ci));
hex(weight_string(_utf32 0x10001 collate utf32_unicode_ci))
-0DC6
+FFFD
select @@collation_connection;
@@collation_connection
utf32_unicode_ci
diff -Nrup a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test
--- a/mysql-test/t/ctype_uca.test 2007-12-19 18:03:39 +04:00
+++ b/mysql-test/t/ctype_uca.test 2008-03-17 17:45:54 +04:00
@@ -559,3 +559,7 @@ set @@collation_connection=utf8_czech_ci
set @@collation_connection=ucs2_czech_ci;
--source include/weight_string_chde.inc
+#
+# Bug#33077 weight of supplementary characters is not 0xfffd
+#
+select hex(weight_string(_utf8 0xF0908080 /* U+10000 */ collate utf8_unicode_ci));
diff -Nrup a/strings/ctype-uca.c b/strings/ctype-uca.c
--- a/strings/ctype-uca.c 2007-10-22 17:02:45 +05:00
+++ b/strings/ctype-uca.c 2008-03-17 17:45:54 +04:00
@@ -6961,6 +6961,7 @@ static void my_uca_scanner_init_any(my_u
static int my_uca_scanner_next_any(my_uca_scanner *scanner)
{
+ static uint16 end_of_line= 0;
/*
Check if the weights for the previous character have been
@@ -6986,8 +6987,9 @@ static int my_uca_scanner_next_any(my_uc
if (wc > 0xFFFF)
{
/* Replacement character for non-BMP */
- scanner->page= 0xFF;
- scanner->code= 0xFD;
+ scanner->wbeg= &end_of_line;
+ scanner->sbeg+= mb_len;
+ return 0xFFFD;
}
else
{