List:Commits« Previous MessageNext Message »
From:Alexander Barkov Date:March 1 2011 12:10pm
Subject:bzr commit into mysql-5.5 branch (alexander.barkov:3360) Bug#44793
Bug#11753363
View as plain text  
#At file:///home/bar/mysql-bzr/mysql-5.5.b44793/ based on revid:magne.mahre@stripped

 3360 Alexander Barkov	2011-03-01
      Bug#11753363 (bug#44793) CHARACTER SETS: CASE CLAUSE, UCS2 OR UTF32, FAILURE
      
      Problem: in case of string CASE/WHEN arguments with different
      character sets, Item_func_case::find_item() called comparator
      cmp_items[x] on mixed character set Items, so a 8-bit value could
      be errouneously referenced to as being utf16/utf32 value,
      which led to crash on DBUG_ASSERT() because of wrong value length.
      This was wrong, as string comparator expects arguments in the same
      character set.
      
      Fix: modify Item_func_case's argument list after calling
      agg_arg_charsets_for_comparison() - put the Items in "agg" array
      back to "args", because some of the Items in the "agg" array might
      have been changed to character set converters:
      - to Item_func_conv_charset for non-constant items
      - to Item_string for constant items
      
      In other words, perform the same substitution which is done in
      all other operations string comparison or string result operations:
      
      Replace
        CASE         latin1_item              WHEN utf16_item THEN ... END
      to
        CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END
      
      Replace
        CASE utf16_item WHEN         latin1_item              THEN ... END
      to
        CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END
      
      
        @ mysql-test/r/ctype_utf16.result
        @ mysql-test/r/ctype_utf32.result
        @ mysql-test/t/ctype_utf16.test
        @ mysql-test/t/ctype_utf32.test
        Adding tests
      
        @ sql/item_cmpfunc.cc
        Put "agg" back to "args".
      
        @ sql/sql_string.cc
        Backporting a fix for String::set_or_copy_aligned() from 5.6,
        for better test coverage:
        "SELECT _utf16 0x61" should expand the string to 0x0061 rather
        than to 0x000061.
        This fix was made in 5.6 under terms of "WL#4616 Implement UTF16-LE".

    modified:
      mysql-test/r/ctype_utf16.result
      mysql-test/r/ctype_utf32.result
      mysql-test/t/ctype_utf16.test
      mysql-test/t/ctype_utf32.test
      sql/item_cmpfunc.cc
      sql/sql_string.cc
=== modified file 'mysql-test/r/ctype_utf16.result'
--- a/mysql-test/r/ctype_utf16.result	2010-11-24 14:52:57 +0000
+++ b/mysql-test/r/ctype_utf16.result	2011-03-01 12:09:37 +0000
@@ -30,13 +30,13 @@ binary 'a  a' > 'a'	binary 'a  \0' > 'a'
 1	1	1
 select hex(_utf16 0x44);
 hex(_utf16 0x44)
-00000044
+0044
 select hex(_utf16 0x3344);
 hex(_utf16 0x3344)
 3344
 select hex(_utf16 0x113344);
 hex(_utf16 0x113344)
-000000113344
+00113344
 CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
 INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
 SELECT hex(word) FROM t1 ORDER BY word;
@@ -434,10 +434,10 @@ aardvarz
 DROP TABLE t1;
 SELECT hex(cast(0xAA as char character set utf16));
 hex(cast(0xAA as char character set utf16))
-000000AA
+00AA
 SELECT hex(convert(0xAA using utf16));
 hex(convert(0xAA using utf16))
-000000AA
+00AA
 CREATE TABLE t1 (a char(10) character set utf16);
 INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
 SELECT HEX(a) FROM t1;
@@ -1102,5 +1102,20 @@ t2	CREATE TABLE `t2` (
 ) ENGINE=MyISAM DEFAULT CHARSET=latin1
 DROP TABLE t1, t2;
 #
+# Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
+#
+SELECT CASE _latin1'a' WHEN _utf16'a' THEN 'A' END;
+CASE _latin1'a' WHEN _utf16'a' THEN 'A' END
+A
+SELECT CASE _utf16'a' WHEN _latin1'a' THEN 'A' END;
+CASE _utf16'a' WHEN _latin1'a' THEN 'A' END
+A
+CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf16);
+INSERT INTO t1 VALUES ('a');
+SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
+CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END
+b
+DROP TABLE t1;
+#
 # End of 5.5 tests
 #

=== modified file 'mysql-test/r/ctype_utf32.result'
--- a/mysql-test/r/ctype_utf32.result	2010-09-28 15:15:58 +0000
+++ b/mysql-test/r/ctype_utf32.result	2011-03-01 12:09:37 +0000
@@ -1152,5 +1152,20 @@ d
 f
 DROP TABLE t1;
 #
+# Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
+#
+SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
+CASE _latin1'a' WHEN _utf32'a' THEN 'A' END
+A
+SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
+CASE _utf32'a' WHEN _latin1'a' THEN 'A' END
+A
+CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('a');
+SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
+CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END
+b
+DROP TABLE t1;
+#
 # End of 5.5 tests
 #

=== modified file 'mysql-test/t/ctype_utf16.test'
--- a/mysql-test/t/ctype_utf16.test	2010-09-28 15:15:58 +0000
+++ b/mysql-test/t/ctype_utf16.test	2011-03-01 12:09:37 +0000
@@ -745,6 +745,15 @@ CREATE TABLE t2 AS SELECT CONCAT(s1) FRO
 SHOW CREATE TABLE t2;
 DROP TABLE t1, t2;
 
+--echo #
+--echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
+--echo #
+SELECT CASE _latin1'a' WHEN _utf16'a' THEN 'A' END;
+SELECT CASE _utf16'a' WHEN _latin1'a' THEN 'A' END;
+CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf16);
+INSERT INTO t1 VALUES ('a');
+SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
+DROP TABLE t1;
 
 #
 ## TODO: add tests for all engines

=== modified file 'mysql-test/t/ctype_utf32.test'
--- a/mysql-test/t/ctype_utf32.test	2010-09-28 15:15:58 +0000
+++ b/mysql-test/t/ctype_utf32.test	2011-03-01 12:09:37 +0000
@@ -831,5 +831,15 @@ SELECT * FROM t1 WHERE b BETWEEN 'a' AND
 DROP TABLE t1;
 
 --echo #
+--echo # Bug#11753363 (Bug#44793) Character sets: case clause, ucs2 or utf32, failure
+--echo #
+SELECT CASE _latin1'a' WHEN _utf32'a' THEN 'A' END;
+SELECT CASE _utf32'a' WHEN _latin1'a' THEN 'A' END;
+CREATE TABLE t1 (s1 CHAR(5) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('a');
+SELECT CASE s1 WHEN 'a' THEN 'b' ELSE 'c' END FROM t1;
+DROP TABLE t1;
+
+--echo #
 --echo # End of 5.5 tests
 --echo #

=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2011-02-18 07:32:40 +0000
+++ b/sql/item_cmpfunc.cc	2011-03-01 12:09:37 +0000
@@ -3054,20 +3054,59 @@ void Item_func_case::fix_length_and_dec(
     agg[0]= args[first_expr_num];
     left_result_type= agg[0]->result_type();
 
+    /*
+      As the first expression and WHEN expressions
+      are intermixed in args[] array THEN and ELSE items,
+      extract the first expression and all WHEN expressions into 
+      a temporary array, to process them easier.
+    */
     for (nagg= 0; nagg < ncases/2 ; nagg++)
       agg[nagg+1]= args[nagg*2];
     nagg++;
     if (!(found_types= collect_cmp_types(agg, nagg)))
       return;
+    if (found_types & (1 << STRING_RESULT))
+    {
+      /*
+        If we'll do string comparison, we also need to aggregate
+        character set and collation for first/WHEN items and
+        install converters for some of them to cmp_collation when necessary.
+        This is done because cmp_item compatators cannot compare
+        strings in two different character sets.
+        Some examples when we install converters:
+
+        1. Converter installed for the first expression:
+
+           CASE         latin1_item              WHEN utf16_item THEN ... END
+
+        is replaced to:
+
+           CASE CONVERT(latin1_item USING utf16) WHEN utf16_item THEN ... END
+
+        2. Converter installed for the left WHEN item:
 
+          CASE utf16_item WHEN         latin1_item              THEN ... END
+
+        is replaced to:
+
+           CASE utf16_item WHEN CONVERT(latin1_item USING utf16) THEN ... END
+      */
+      if (agg_arg_charsets_for_comparison(cmp_collation, agg, nagg))
+        return;
+      /*
+        Now copy first expression and all WHEN expressions back to args[]
+        arrray, because some of the items might have been changed to converters
+        (e.g. Item_func_conv_charset, or Item_string for constants).
+      */
+      args[first_expr_num]= agg[0];
+      for (nagg= 0; nagg < ncases / 2; nagg++)
+        args[nagg * 2]= agg[nagg + 1];
+    }
     for (i= 0; i <= (uint)DECIMAL_RESULT; i++)
     {
       if (found_types & (1 << i) && !cmp_items[i])
       {
         DBUG_ASSERT((Item_result)i != ROW_RESULT);
-        if ((Item_result)i == STRING_RESULT &&
-            agg_arg_charsets_for_comparison(cmp_collation, agg, nagg))
-          return;
         if (!(cmp_items[i]=
             cmp_item::get_comparator((Item_result)i,
                                      cmp_collation.collation)))

=== modified file 'sql/sql_string.cc'
--- a/sql/sql_string.cc	2011-01-13 08:07:21 +0000
+++ b/sql/sql_string.cc	2011-03-01 12:09:37 +0000
@@ -252,8 +252,8 @@ bool String::copy_aligned(const char *st
 			  CHARSET_INFO *cs)
 {
   /* How many bytes are in incomplete character */
-  offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
-  DBUG_ASSERT(offset && offset != cs->mbmaxlen);
+  offset= cs->mbminlen - offset; /* How many zeros we should prepend */
+  DBUG_ASSERT(offset && offset != cs->mbminlen);
 
   uint32 aligned_length= arg_length + offset;
   if (alloc(aligned_length))


Attachment: [text/bzr-bundle] bzr/alexander.barkov@oracle.com-20110301120937-k4bscs7hb2diqa7k.bundle
Thread
bzr commit into mysql-5.5 branch (alexander.barkov:3360) Bug#44793Bug#11753363Alexander Barkov1 Mar