List:Commits« Previous MessageNext Message »
From:bar Date:December 6 2007 8:52am
Subject:bk commit into 6.0 tree (bar:1.2700) BUG#32393
View as plain text  
Below is the list of changes that have just been committed into a local
6.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-12-06 12:52:29+04:00, bar@stripped +3 -0
  Bug#32393 Character sets: illegal characters in utf16 columns
  Problem: utf16 column allowed to put wrong Unicode characters 
  through conversion from another Unicode character set.
  Fix: Disallow Unicode characters in conversion.

  mysql-test/r/ctype_utf16.result@stripped, 2007-12-06 12:52:27+04:00, bar@stripped +18 -0
    Adding test

  mysql-test/t/ctype_utf16.test@stripped, 2007-12-06 12:52:27+04:00, bar@stripped +23 -0
    Adding test

  strings/ctype-ucs2.c@stripped, 2007-12-06 12:52:27+04:00, bar@stripped +3 -1
    Don't allow to put Unicode characters in the range 0xD800..0xDFFF -
    they are forbidden characters in utf16
    (they have for special purpose - surrogate parts).

diff -Nrup a/mysql-test/r/ctype_utf16.result b/mysql-test/r/ctype_utf16.result
--- a/mysql-test/r/ctype_utf16.result	2007-12-06 11:42:16 +04:00
+++ b/mysql-test/r/ctype_utf16.result	2007-12-06 12:52:27 +04:00
@@ -941,6 +941,24 @@ D800DCFF
 DBFFDC00
 DBFFDCFF
 drop table t1;
+create table t1 (s1 varchar(50) character set ucs2);
+insert into t1 values (0xdf84);
+alter table t1 modify column s1 varchar(50) character set utf16;
+Warnings:
+Warning	1366	Incorrect string value: '\xDF\x84' for column 's1' at row 1
+select hex(s1) from t1;
+hex(s1)
+003F
+drop table t1;
+create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16);
+insert into t1 (s1) values (0xdf84);
+update t1 set s2 = s1;
+Warnings:
+Warning	1366	Incorrect string value: '\xDF\x84' for column 's2' at row 1
+select hex(s2) from t1;
+hex(s2)
+003F
+drop table t1;
 create table t1 (a char(10)) character set utf16;
 insert into t1 values ('a   ');
 select hex(a) from t1;
diff -Nrup a/mysql-test/t/ctype_utf16.test b/mysql-test/t/ctype_utf16.test
--- a/mysql-test/t/ctype_utf16.test	2007-12-06 11:42:16 +04:00
+++ b/mysql-test/t/ctype_utf16.test	2007-12-06 12:52:27 +04:00
@@ -564,6 +564,29 @@ select hex(a) from t1;
 drop table t1;
 
 #
+# Bug#32393 Character sets: illegal characters in utf16 columns
+#
+# Tests that cs->cset->wc_mb() doesn't accept surrogate parts
+#
+# via alter
+#
+create table t1 (s1 varchar(50) character set ucs2);
+insert into t1 values (0xdf84);
+alter table t1 modify column s1 varchar(50) character set utf16;
+select hex(s1) from t1;
+drop table t1;
+#
+# via update
+#
+create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16);
+insert into t1 (s1) values (0xdf84);
+update t1 set s2 = s1;
+select hex(s2) from t1;
+drop table t1;
+
+
+
+#
 # Testing cs->cset->lengthsp()
 #
 create table t1 (a char(10)) character set utf16;
diff -Nrup a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
--- a/strings/ctype-ucs2.c	2007-10-24 12:27:44 +05:00
+++ b/strings/ctype-ucs2.c	2007-12-06 12:52:27 +04:00
@@ -1038,7 +1038,7 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attri
 
 #define MY_UTF16_HIGH_HEAD(x)  ((((uchar) (x)) & 0xFC) == 0xD8)
 #define MY_UTF16_LOW_HEAD(x)   ((((uchar) (x)) & 0xFC) == 0xDC)
-
+#define MY_UTF16_SURROGATE(x)  (((x) & 0xF800) == 0xD800)
 
 static int
 my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
@@ -1090,6 +1090,8 @@ my_uni_utf16(CHARSET_INFO *cs __attribut
   {
     if (s + 2 > e)
       return MY_CS_TOOSMALL2;
+    if (MY_UTF16_SURROGATE(wc))
+      return MY_CS_ILUNI;
     *s++= (uchar) (wc >> 8);
     *s= (uchar) (wc & 0xFF);
     return 2;
Thread
bk commit into 6.0 tree (bar:1.2700) BUG#32393bar6 Dec