From: Date: August 17 2005 3:27am Subject: bk commit into 5.0 tree (acurtis:1.1986) BUG#12600 List-Archive: http://lists.mysql.com/internals/28363 X-Bug: 12600 Message-Id: <200508170127.j7H1Rifv060721@ltantony.xiphis.org> Below is the list of changes that have just been committed into a local 5.0 repository of antony. When antony does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.1986 05/08/17 02:27:28 acurtis@stripped +3 -0 Bug#12600 "Inserting into LONGTEXT gives strange error message" Failed to parse long UTF8 sequences strings/ctype-utf8.c 1.96 05/08/17 02:27:04 acurtis@stripped +34 -27 always be able to parse up to 6 byte UTF8 sequences, even when only handling 16bit unicode output. mysql-test/t/ctype_utf8.test 1.66 05/08/17 02:27:04 acurtis@stripped +15 -0 Test for bug 12600 mysql-test/r/ctype_utf8.result 1.70 05/08/17 02:27:03 acurtis@stripped +7 -0 Test for bug 12600 # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: acurtis # Host: ltantony.xiphis.org # Root: /usr/home/antony/work2/p2-bug12600 --- 1.69/mysql-test/r/ctype_utf8.result 2005-07-22 10:47:04 +01:00 +++ 1.70/mysql-test/r/ctype_utf8.result 2005-08-17 02:27:03 +01:00 @@ -1023,3 +1023,10 @@ xxx yyy DROP TABLE t1; +SET @@SQL_MODE="ANSI_QUOTES,NO_BACKSLASH_ESCAPES,TRADITIONAL,IGNORE_SPACE"; +CREATE TABLE t1 (a longtext); +INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81); +ALTER TABLE t1 MODIFY a longtext character set utf8; +INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81); +DROP TABLE t1; +SET @@SQL_MODE=""; --- 1.65/mysql-test/t/ctype_utf8.test 2005-07-28 15:09:48 +01:00 +++ 1.66/mysql-test/t/ctype_utf8.test 2005-08-17 02:27:04 +01:00 @@ -857,3 +857,18 @@ SELECT DISTINCT id FROM t1 ORDER BY id; DROP TABLE t1; + +# +# Bug#12600 Inserting into LONGTEXT gives strange error message +# +# ef87b1 f48fbfbf f0908080 c3bc d098 e4b880 ec9a81 +# string includes illegal UTF8 sequences. +# +SET @@SQL_MODE="ANSI_QUOTES,NO_BACKSLASH_ESCAPES,TRADITIONAL,IGNORE_SPACE"; +CREATE TABLE t1 (a longtext); +INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81); +ALTER TABLE t1 MODIFY a longtext character set utf8; +INSERT INTO t1 VALUES (0xef87b1f48fbfbff0908080c3bcd098e4b880ec9a81); +DROP TABLE t1; +SET @@SQL_MODE=""; + --- 1.95/strings/ctype-utf8.c 2005-07-26 12:43:35 +01:00 +++ 1.96/strings/ctype-utf8.c 2005-08-17 02:27:04 +01:00 @@ -1945,6 +1945,8 @@ my_wc_t * pwc, const uchar *s, const uchar *e) { unsigned char c; + unsigned int wc; + int result; if (s >= e) return MY_CS_TOOFEW(0); @@ -1976,14 +1978,13 @@ if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0))) return MY_CS_ILSEQ; - *pwc = ((my_wc_t) (c & 0x0f) << 12) | - ((my_wc_t) (s[1] ^ 0x80) << 6) | - (my_wc_t) (s[2] ^ 0x80); + wc = ((uint) (c & 0x0f) << 12) | + ((uint) (s[1] ^ 0x80) << 6) | + (uint) (s[2] ^ 0x80); - return 3; + result= 3; } -#ifdef UNICODE_32BIT - else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32) + else if (c < 0xf8) { if (s+4 > e) /* We need 4 characters */ return MY_CS_TOOFEW(0); @@ -1994,14 +1995,14 @@ (c >= 0xf1 || s[1] >= 0x90))) return MY_CS_ILSEQ; - *pwc = ((my_wc_t) (c & 0x07) << 18) | - ((my_wc_t) (s[1] ^ 0x80) << 12) | - ((my_wc_t) (s[2] ^ 0x80) << 6) | - (my_wc_t) (s[3] ^ 0x80); + wc = ((uint) (c & 0x07) << 18) | + ((uint) (s[1] ^ 0x80) << 12) | + ((uint) (s[2] ^ 0x80) << 6) | + (uint) (s[3] ^ 0x80); - return 4; + result= 4; } - else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32) + else if (c < 0xfc) { if (s+5 >e) /* We need 5 characters */ return MY_CS_TOOFEW(0); @@ -2013,14 +2014,14 @@ (c >= 0xf9 || s[1] >= 0x88))) return MY_CS_ILSEQ; - *pwc = ((my_wc_t) (c & 0x03) << 24) | - ((my_wc_t) (s[1] ^ 0x80) << 18) | - ((my_wc_t) (s[2] ^ 0x80) << 12) | - ((my_wc_t) (s[3] ^ 0x80) << 6) | - (my_wc_t) (s[4] ^ 0x80); - return 5; + wc = ((uint) (c & 0x03) << 24) | + ((uint) (s[1] ^ 0x80) << 18) | + ((uint) (s[2] ^ 0x80) << 12) | + ((uint) (s[3] ^ 0x80) << 6) | + (uint) (s[4] ^ 0x80); + result= 5; } - else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32) + else if (c < 0xfe) { if ( s+6 >e ) /* We need 6 characters */ return MY_CS_TOOFEW(0); @@ -2033,16 +2034,22 @@ (c >= 0xfd || s[1] >= 0x84))) return MY_CS_ILSEQ; - *pwc = ((my_wc_t) (c & 0x01) << 30) - | ((my_wc_t) (s[1] ^ 0x80) << 24) - | ((my_wc_t) (s[2] ^ 0x80) << 18) - | ((my_wc_t) (s[3] ^ 0x80) << 12) - | ((my_wc_t) (s[4] ^ 0x80) << 6) - | (my_wc_t) (s[5] ^ 0x80); - return 6; + wc = ((uint) (c & 0x01) << 30) + | ((uint) (s[1] ^ 0x80) << 24) + | ((uint) (s[2] ^ 0x80) << 18) + | ((uint) (s[3] ^ 0x80) << 12) + | ((uint) (s[4] ^ 0x80) << 6) + | (uint) (s[5] ^ 0x80); + result= 6; } + else + return MY_CS_ILSEQ; +#ifndef UNICODE_32BIT + if (wc > 0xffff) + wc= 0xfffd; #endif - return MY_CS_ILSEQ; + *pwc = (my_wc_t) wc; + return result; } static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,