List:Internals« Previous MessageNext Message »
From:antony Date:August 17 2005 1:27am
Subject:bk commit into 5.0 tree (acurtis:1.1986) BUG#12600
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of antony. When antony does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.1986 05/08/17 02:27:28 acurtis@stripped +3 -0
  Bug#12600
    "Inserting into LONGTEXT gives strange error message"
    Failed to parse long UTF8 sequences

  strings/ctype-utf8.c
    1.96 05/08/17 02:27:04 acurtis@stripped +34 -27
    always be able to parse up to 6 byte UTF8 sequences, even when
    only handling 16bit unicode output.

  mysql-test/t/ctype_utf8.test
    1.66 05/08/17 02:27:04 acurtis@stripped +15 -0
    Test for bug 12600

  mysql-test/r/ctype_utf8.result
    1.70 05/08/17 02:27:03 acurtis@stripped +7 -0
    Test for bug 12600

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	acurtis
# Host:	ltantony.xiphis.org
# Root:	/usr/home/antony/work2/p2-bug12600

--- 1.69/mysql-test/r/ctype_utf8.result	2005-07-22 10:47:04 +01:00
+++ 1.70/mysql-test/r/ctype_utf8.result	2005-08-17 02:27:03 +01:00
@@ -1023,3 +1023,10 @@
 xxx
 yyy
 DROP TABLE t1;
+SET @@SQL_MODE="ANSI_QUOTES,NO_BACKSLASH_ESCAPES,TRADITIONAL,IGNORE_SPACE";
+CREATE TABLE t1 (a longtext);
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+ALTER TABLE t1 MODIFY a longtext character set utf8;
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+DROP TABLE t1;
+SET @@SQL_MODE="";

--- 1.65/mysql-test/t/ctype_utf8.test	2005-07-28 15:09:48 +01:00
+++ 1.66/mysql-test/t/ctype_utf8.test	2005-08-17 02:27:04 +01:00
@@ -857,3 +857,18 @@
 SELECT DISTINCT id FROM t1 ORDER BY id;
 
 DROP TABLE t1;
+
+#
+# Bug#12600 Inserting into LONGTEXT gives strange error message
+#
+# ef87b1 f48fbfbf f0908080 c3bc d098 e4b880 ec9a81
+# string includes illegal UTF8 sequences.
+#
+SET @@SQL_MODE="ANSI_QUOTES,NO_BACKSLASH_ESCAPES,TRADITIONAL,IGNORE_SPACE";
+CREATE TABLE t1 (a longtext);
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+ALTER TABLE t1 MODIFY a longtext character set utf8;
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+DROP TABLE t1;
+SET @@SQL_MODE="";
+

--- 1.95/strings/ctype-utf8.c	2005-07-26 12:43:35 +01:00
+++ 1.96/strings/ctype-utf8.c	2005-08-17 02:27:04 +01:00
@@ -1945,6 +1945,8 @@
                        my_wc_t * pwc, const uchar *s, const uchar *e)
 {
   unsigned char c;
+  unsigned int wc;
+  int result;
 
   if (s >= e)
     return MY_CS_TOOFEW(0);
@@ -1976,14 +1978,13 @@
     if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
       return MY_CS_ILSEQ;
 
-    *pwc = ((my_wc_t) (c & 0x0f) << 12)   |
-           ((my_wc_t) (s[1] ^ 0x80) << 6) |
-            (my_wc_t) (s[2] ^ 0x80);
+    wc = ((uint) (c & 0x0f) << 12)   |
+         ((uint) (s[1] ^ 0x80) << 6) |
+          (uint) (s[2] ^ 0x80);
 
-    return 3;
+    result= 3;
   }
-#ifdef UNICODE_32BIT
-  else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
+  else if (c < 0xf8)
   {
     if (s+4 > e) /* We need 4 characters */
       return MY_CS_TOOFEW(0);
@@ -1994,14 +1995,14 @@
           (c >= 0xf1 || s[1] >= 0x90)))
       return MY_CS_ILSEQ;
 
-    *pwc = ((my_wc_t) (c & 0x07) << 18)    |
-           ((my_wc_t) (s[1] ^ 0x80) << 12) |
-           ((my_wc_t) (s[2] ^ 0x80) << 6)  |
-            (my_wc_t) (s[3] ^ 0x80);
+    wc = ((uint) (c & 0x07) << 18)    |
+         ((uint) (s[1] ^ 0x80) << 12) |
+         ((uint) (s[2] ^ 0x80) << 6)  |
+          (uint) (s[3] ^ 0x80);
 
-    return 4;
+    result= 4;
   }
-   else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
+  else if (c < 0xfc)
   {
     if (s+5 >e) /* We need 5 characters */
       return MY_CS_TOOFEW(0);
@@ -2013,14 +2014,14 @@
           (c >= 0xf9 || s[1] >= 0x88)))
       return MY_CS_ILSEQ;
 
-    *pwc = ((my_wc_t) (c & 0x03) << 24) |
-           ((my_wc_t) (s[1] ^ 0x80) << 18) |
-           ((my_wc_t) (s[2] ^ 0x80) << 12) |
-           ((my_wc_t) (s[3] ^ 0x80) << 6) |
-            (my_wc_t) (s[4] ^ 0x80);
-    return 5;
+    wc = ((uint) (c & 0x03) << 24) |
+         ((uint) (s[1] ^ 0x80) << 18) |
+         ((uint) (s[2] ^ 0x80) << 12) |
+         ((uint) (s[3] ^ 0x80) << 6) |
+          (uint) (s[4] ^ 0x80);
+    result= 5;
   }
-  else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
+  else if (c < 0xfe)
   {
     if ( s+6 >e ) /* We need 6 characters */
       return MY_CS_TOOFEW(0);
@@ -2033,16 +2034,22 @@
           (c >= 0xfd || s[1] >= 0x84)))
       return MY_CS_ILSEQ;
 
-    *pwc = ((my_wc_t) (c & 0x01) << 30)
-      | ((my_wc_t) (s[1] ^ 0x80) << 24)
-      | ((my_wc_t) (s[2] ^ 0x80) << 18)
-      | ((my_wc_t) (s[3] ^ 0x80) << 12)
-      | ((my_wc_t) (s[4] ^ 0x80) << 6)
-      | (my_wc_t) (s[5] ^ 0x80);
-    return 6;
+    wc = ((uint) (c & 0x01) << 30)
+       | ((uint) (s[1] ^ 0x80) << 24)
+       | ((uint) (s[2] ^ 0x80) << 18)
+       | ((uint) (s[3] ^ 0x80) << 12)
+       | ((uint) (s[4] ^ 0x80) << 6)
+       | (uint) (s[5] ^ 0x80);
+    result= 6;
   }
+  else
+    return MY_CS_ILSEQ;
+#ifndef UNICODE_32BIT
+  if (wc > 0xffff)
+    wc= 0xfffd;
 #endif
-  return MY_CS_ILSEQ;
+  *pwc = (my_wc_t) wc;
+  return result;
 }
 
 static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
Thread
bk commit into 5.0 tree (acurtis:1.1986) BUG#12600antony17 Aug