Below is the list of changes that have just been committed into a local
5.0 repository of antony. When antony does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.1986 05/08/17 02:27:28 acurtis@stripped +3 -0
Bug#12600
"Inserting into LONGTEXT gives strange error message"
Failed to parse long UTF8 sequences
strings/ctype-utf8.c
1.96 05/08/17 02:27:04 acurtis@stripped +34 -27
always be able to parse up to 6 byte UTF8 sequences, even when
only handling 16bit unicode output.
mysql-test/t/ctype_utf8.test
1.66 05/08/17 02:27:04 acurtis@stripped +15 -0
Test for bug 12600
mysql-test/r/ctype_utf8.result
1.70 05/08/17 02:27:03 acurtis@stripped +7 -0
Test for bug 12600
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: acurtis
# Host: ltantony.xiphis.org
# Root: /usr/home/antony/work2/p2-bug12600
--- 1.69/mysql-test/r/ctype_utf8.result 2005-07-22 10:47:04 +01:00
+++ 1.70/mysql-test/r/ctype_utf8.result 2005-08-17 02:27:03 +01:00
@@ -1023,3 +1023,10 @@
xxx
yyy
DROP TABLE t1;
+SET @@SQL_MODE="ANSI_QUOTES,NO_BACKSLASH_ESCAPES,TRADITIONAL,IGNORE_SPACE";
+CREATE TABLE t1 (a longtext);
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+ALTER TABLE t1 MODIFY a longtext character set utf8;
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+DROP TABLE t1;
+SET @@SQL_MODE="";
--- 1.65/mysql-test/t/ctype_utf8.test 2005-07-28 15:09:48 +01:00
+++ 1.66/mysql-test/t/ctype_utf8.test 2005-08-17 02:27:04 +01:00
@@ -857,3 +857,18 @@
SELECT DISTINCT id FROM t1 ORDER BY id;
DROP TABLE t1;
+
+#
+# Bug#12600 Inserting into LONGTEXT gives strange error message
+#
+# ef87b1 f48fbfbf f0908080 c3bc d098 e4b880 ec9a81
+# string includes illegal UTF8 sequences.
+#
+SET @@SQL_MODE="ANSI_QUOTES,NO_BACKSLASH_ESCAPES,TRADITIONAL,IGNORE_SPACE";
+CREATE TABLE t1 (a longtext);
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+ALTER TABLE t1 MODIFY a longtext character set utf8;
+INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81);
+DROP TABLE t1;
+SET @@SQL_MODE="";
+
--- 1.95/strings/ctype-utf8.c 2005-07-26 12:43:35 +01:00
+++ 1.96/strings/ctype-utf8.c 2005-08-17 02:27:04 +01:00
@@ -1945,6 +1945,8 @@
my_wc_t * pwc, const uchar *s, const uchar *e)
{
unsigned char c;
+ unsigned int wc;
+ int result;
if (s >= e)
return MY_CS_TOOFEW(0);
@@ -1976,14 +1978,13 @@
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >=
0xe1 || s[1] >= 0xa0)))
return MY_CS_ILSEQ;
- *pwc = ((my_wc_t) (c & 0x0f) << 12) |
- ((my_wc_t) (s[1] ^ 0x80) << 6) |
- (my_wc_t) (s[2] ^ 0x80);
+ wc = ((uint) (c & 0x0f) << 12) |
+ ((uint) (s[1] ^ 0x80) << 6) |
+ (uint) (s[2] ^ 0x80);
- return 3;
+ result= 3;
}
-#ifdef UNICODE_32BIT
- else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
+ else if (c < 0xf8)
{
if (s+4 > e) /* We need 4 characters */
return MY_CS_TOOFEW(0);
@@ -1994,14 +1995,14 @@
(c >= 0xf1 || s[1] >= 0x90)))
return MY_CS_ILSEQ;
- *pwc = ((my_wc_t) (c & 0x07) << 18) |
- ((my_wc_t) (s[1] ^ 0x80) << 12) |
- ((my_wc_t) (s[2] ^ 0x80) << 6) |
- (my_wc_t) (s[3] ^ 0x80);
+ wc = ((uint) (c & 0x07) << 18) |
+ ((uint) (s[1] ^ 0x80) << 12) |
+ ((uint) (s[2] ^ 0x80) << 6) |
+ (uint) (s[3] ^ 0x80);
- return 4;
+ result= 4;
}
- else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
+ else if (c < 0xfc)
{
if (s+5 >e) /* We need 5 characters */
return MY_CS_TOOFEW(0);
@@ -2013,14 +2014,14 @@
(c >= 0xf9 || s[1] >= 0x88)))
return MY_CS_ILSEQ;
- *pwc = ((my_wc_t) (c & 0x03) << 24) |
- ((my_wc_t) (s[1] ^ 0x80) << 18) |
- ((my_wc_t) (s[2] ^ 0x80) << 12) |
- ((my_wc_t) (s[3] ^ 0x80) << 6) |
- (my_wc_t) (s[4] ^ 0x80);
- return 5;
+ wc = ((uint) (c & 0x03) << 24) |
+ ((uint) (s[1] ^ 0x80) << 18) |
+ ((uint) (s[2] ^ 0x80) << 12) |
+ ((uint) (s[3] ^ 0x80) << 6) |
+ (uint) (s[4] ^ 0x80);
+ result= 5;
}
- else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
+ else if (c < 0xfe)
{
if ( s+6 >e ) /* We need 6 characters */
return MY_CS_TOOFEW(0);
@@ -2033,16 +2034,22 @@
(c >= 0xfd || s[1] >= 0x84)))
return MY_CS_ILSEQ;
- *pwc = ((my_wc_t) (c & 0x01) << 30)
- | ((my_wc_t) (s[1] ^ 0x80) << 24)
- | ((my_wc_t) (s[2] ^ 0x80) << 18)
- | ((my_wc_t) (s[3] ^ 0x80) << 12)
- | ((my_wc_t) (s[4] ^ 0x80) << 6)
- | (my_wc_t) (s[5] ^ 0x80);
- return 6;
+ wc = ((uint) (c & 0x01) << 30)
+ | ((uint) (s[1] ^ 0x80) << 24)
+ | ((uint) (s[2] ^ 0x80) << 18)
+ | ((uint) (s[3] ^ 0x80) << 12)
+ | ((uint) (s[4] ^ 0x80) << 6)
+ | (uint) (s[5] ^ 0x80);
+ result= 6;
}
+ else
+ return MY_CS_ILSEQ;
+#ifndef UNICODE_32BIT
+ if (wc > 0xffff)
+ wc= 0xfffd;
#endif
- return MY_CS_ILSEQ;
+ *pwc = (my_wc_t) wc;
+ return result;
}
static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
| Thread |
|---|
| • bk commit into 5.0 tree (acurtis:1.1986) BUG#12600 | antony | 17 Aug |