From: bar Date: March 23 2006 10:14am Subject: bk commit into 5.1 tree (bar:1.2212) List-Archive: http://lists.mysql.com/commits/4058 Message-Id: <200603231014.k2NAEmwK090227@bar.intranet.mysql.r18.ru> Below is the list of changes that have just been committed into a local 5.1 repository of bar. When bar does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2212 06/03/23 14:14:32 bar@stripped +17 -0 Merge mysql.com:/usr/home/bar/mysql-5.0 into mysql.com:/usr/home/bar/mysql-5.1-new include/m_ctype.h 1.120 06/03/23 14:14:13 bar@stripped +0 -4 After merge fix. strings/ctype-utf8.c 1.101 06/03/23 14:07:40 bar@stripped +0 -0 Auto merged strings/ctype-ujis.c 1.71 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-ucs2.c 1.59 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-tis620.c 1.93 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-sjis.c 1.90 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-simple.c 1.77 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-latin1.c 1.50 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-gbk.c 1.80 06/03/23 14:07:39 bar@stripped +0 -0 Auto merged strings/ctype-gb2312.c 1.64 06/03/23 14:07:38 bar@stripped +0 -0 Auto merged strings/ctype-eucjpms.c 1.14 06/03/23 14:07:38 bar@stripped +0 -0 Auto merged strings/ctype-euc_kr.c 1.67 06/03/23 14:07:37 bar@stripped +0 -0 Auto merged strings/ctype-cp932.c 1.13 06/03/23 14:07:37 bar@stripped +0 -0 Auto merged strings/ctype-bin.c 1.69 06/03/23 14:07:37 bar@stripped +0 -0 Auto merged strings/ctype-big5.c 1.89 06/03/23 14:07:37 bar@stripped +0 -0 Auto merged mysql-test/t/ctype_ujis.test 1.19 06/03/23 14:07:36 bar@stripped +0 -0 Auto merged mysql-test/r/ctype_ujis.result 1.19 06/03/23 14:07:36 bar@stripped +0 -0 Auto merged # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: bar # Host: bar.intranet.mysql.r18.ru # Root: /usr/home/bar/mysql-5.1-new/RESYNC --- 1.119/include/m_ctype.h 2006-03-23 10:19:45 +04:00 +++ 1.120/include/m_ctype.h 2006-03-23 14:14:13 +04:00 @@ -44,6 +44,7 @@ uint16 sort; } MY_UNICASE_INFO; + extern MY_UNICASE_INFO *my_unicase_default[256]; extern MY_UNICASE_INFO *my_unicase_turkish[256]; @@ -56,10 +57,18 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; -#define MY_CS_ILSEQ 0 -#define MY_CS_ILUNI 0 -#define MY_CS_TOOSMALL -1 -#define MY_CS_TOOFEW(n) (-1-(n)) +/* wm_wc and wc_mb return codes */ +#define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */ +#define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */ +#define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */ +#define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */ +#define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */ +/* These following three are currently not really used */ +#define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */ +#define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */ +#define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */ +/* A helper macros for "need at least n bytes" */ +#define MY_CS_TOOSMALLN(n) (-100-(n)) #define MY_SEQ_INTTAIL 1 #define MY_SEQ_SPACES 2 --- 1.88/strings/ctype-big5.c 2006-02-02 09:58:55 +04:00 +++ 1.89/strings/ctype-big5.c 2006-03-23 14:07:37 +04:00 @@ -6275,7 +6275,7 @@ int hi=s[0]; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; if (hi<0x80) { @@ -6284,10 +6284,10 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1]))) - return MY_CS_ILSEQ; + return -2; return 2; } --- 1.66/strings/ctype-euc_kr.c 2006-02-02 09:59:12 +04:00 +++ 1.67/strings/ctype-euc_kr.c 2006-03-23 14:07:37 +04:00 @@ -8601,7 +8601,7 @@ return MY_CS_ILUNI; if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; s[0]=code>>8; s[1]=code&0xFF; @@ -8617,7 +8617,7 @@ int hi=s[0]; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; if (hi<0x80) { @@ -8626,10 +8626,10 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1]))) - return MY_CS_ILSEQ; + return -2; return 2; } --- 1.63/strings/ctype-gb2312.c 2006-02-02 09:59:15 +04:00 +++ 1.64/strings/ctype-gb2312.c 2006-03-23 14:07:38 +04:00 @@ -5651,7 +5651,7 @@ return MY_CS_ILUNI; if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; code|=0x8080; s[0]=code>>8; @@ -5668,7 +5668,7 @@ hi=(int) s[0]; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; if (hi<0x80) { @@ -5677,10 +5677,10 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F))) - return MY_CS_ILSEQ; + return -2; return 2; } --- 1.79/strings/ctype-gbk.c 2006-02-02 09:59:17 +04:00 +++ 1.80/strings/ctype-gbk.c 2006-03-23 14:07:39 +04:00 @@ -9902,7 +9902,7 @@ return MY_CS_ILUNI; if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; s[0]=code>>8; s[1]=code&0xFF; @@ -9916,7 +9916,7 @@ int hi; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; hi=s[0]; @@ -9927,10 +9927,10 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1]))) - return MY_CS_ILSEQ; + return -2; return 2; --- 1.89/strings/ctype-sjis.c 2006-02-02 09:59:25 +04:00 +++ 1.90/strings/ctype-sjis.c 2006-03-23 14:07:39 +04:00 @@ -4516,7 +4516,7 @@ mb: if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; s[0]=code>>8; s[1]=code&0xFF; @@ -4530,7 +4530,7 @@ int hi=s[0]; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; if (hi < 0x80) { @@ -4545,10 +4545,10 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_sjis_uni_onechar((hi<<8)+s[1]))) - return MY_CS_ILSEQ; + return -2; return 2; } --- 1.92/strings/ctype-tis620.c 2006-02-02 09:59:27 +04:00 +++ 1.93/strings/ctype-tis620.c 2006-03-23 14:07:39 +04:00 @@ -827,10 +827,10 @@ const unsigned char *end __attribute__((unused))) { if (str >= end) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; *wc=cs_to_uni[*str]; - return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1; + return (!wc[0] && str[0]) ? -1 : 1; } static --- 1.70/strings/ctype-ujis.c 2006-02-02 09:59:30 +04:00 +++ 1.71/strings/ctype-ujis.c 2006-03-23 14:07:39 +04:00 @@ -242,7 +242,7 @@ const uchar *e __attribute__((unused))) { wc[0]=tab_jisx0201_uni[*s]; - return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1; + return (!wc[0] && s[0]) ? -1 : 1; } @@ -8341,7 +8341,7 @@ int c1,c2,c3; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; c1=s[0]; @@ -8353,7 +8353,7 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; c2=s[1]; @@ -8368,7 +8368,7 @@ { pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80)); if (!pwc[0]) - return MY_CS_ILSEQ; + return -2; } else { @@ -8388,7 +8388,7 @@ ret = my_mb_wc_jisx0201(cs,pwc,s+1,e); if (ret!=1) - return ret; + return -2; return 2; } @@ -8399,7 +8399,7 @@ return MY_CS_ILSEQ; if (s+3>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL3; c3=s[2]; if (c3 < 0xA1 || c3>=0xFF) @@ -8408,8 +8408,8 @@ if (c2<0xF5) { pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80)); - if (!pwc) - return MY_CS_ILSEQ; + if (!pwc[0]) + return -3; } else { @@ -8440,7 +8440,7 @@ if ((jp=my_uni_jisx0208_onechar(wc))) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; jp+=0x8080; s[0]=jp>>8; @@ -8452,7 +8452,7 @@ if (my_wc_mb_jisx0201(c,wc,s,e) == 1) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; s[1]= s[0]; s[0]= 0x8E; return 2; @@ -8462,7 +8462,7 @@ if ((jp=my_uni_jisx0212_onechar(wc))) { if (s+3>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL3; jp+=0x8080; s[0]=0x8F; @@ -8476,7 +8476,7 @@ if (wc>=0xE000 && wc<0xE3AC) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; c1=((unsigned)(wc-0xE000)/94)+0xF5; s[0]=c1; @@ -8490,7 +8490,7 @@ if (wc>=0xE3AC && wc<0xE758) { if (s+3>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL3; s[0]=0x8F; c1=((unsigned)(wc-0xE3AC)/94)+0xF5; --- 1.68/strings/ctype-bin.c 2006-03-23 10:19:45 +04:00 +++ 1.69/strings/ctype-bin.c 2006-03-23 14:07:37 +04:00 @@ -246,7 +246,7 @@ const unsigned char *end __attribute__((unused))) { if (str >= end) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; *wc=str[0]; return 1; --- 1.49/strings/ctype-latin1.c 2006-02-02 09:59:19 +04:00 +++ 1.50/strings/ctype-latin1.c 2006-03-23 14:07:39 +04:00 @@ -363,10 +363,10 @@ const unsigned char *end __attribute__((unused))) { if (str >= end) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; *wc=cs_to_uni[*str]; - return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1; + return (!wc[0] && str[0]) ? -1 : 1; } static --- 1.58/strings/ctype-ucs2.c 2006-02-02 09:59:28 +04:00 +++ 1.59/strings/ctype-ucs2.c 2006-03-23 14:07:39 +04:00 @@ -94,7 +94,7 @@ my_wc_t * pwc, const uchar *s, const uchar *e) { if (s+2 > e) /* Need 2 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; *pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]); return 2; @@ -104,7 +104,7 @@ my_wc_t wc, uchar *r, uchar *e) { if ( r+2 > e ) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; r[0]= (uchar) (wc >> 8); r[1]= (uchar) (wc & 0xFF); --- 1.76/strings/ctype-simple.c 2006-02-02 09:59:23 +04:00 +++ 1.77/strings/ctype-simple.c 2006-03-23 14:07:39 +04:00 @@ -239,10 +239,10 @@ const unsigned char *end __attribute__((unused))) { if (str >= end) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; *wc=cs->tab_to_uni[*str]; - return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1; + return (!wc[0] && str[0]) ? -1 : 1; } int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, --- 1.100/strings/ctype-utf8.c 2006-02-02 09:59:32 +04:00 +++ 1.101/strings/ctype-utf8.c 2006-03-23 14:07:40 +04:00 @@ -1949,7 +1949,7 @@ unsigned char c; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; c= s[0]; if (c < 0x80) @@ -1962,7 +1962,7 @@ else if (c < 0xe0) { if (s+2 > e) /* We need 2 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!((s[1] ^ 0x80) < 0x40)) return MY_CS_ILSEQ; @@ -1973,7 +1973,7 @@ else if (c < 0xf0) { if (s+3 > e) /* We need 3 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL3; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0))) return MY_CS_ILSEQ; @@ -1988,7 +1988,7 @@ else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32) { if (s+4 > e) /* We need 4 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL4; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && @@ -2006,7 +2006,7 @@ else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32) { if (s+5 >e) /* We need 5 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL5; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && @@ -2025,7 +2025,7 @@ else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32) { if ( s+6 >e ) /* We need 6 characters */ - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL6; if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && @@ -2076,7 +2076,7 @@ Because of it (r+count > e), not (r+count-1 >e ) */ if ( r+count > e ) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALLN(count); switch (count) { /* Fall through all cases!!! */ --- 1.18/mysql-test/r/ctype_ujis.result 2006-02-22 13:09:48 +04:00 +++ 1.19/mysql-test/r/ctype_ujis.result 2006-03-23 14:07:36 +04:00 @@ -2307,6 +2307,12 @@ c2h ab_def drop table t1; +select hex(convert(_ujis 0xA5FE41 using ucs2)); +hex(convert(_ujis 0xA5FE41 using ucs2)) +003F0041 +select hex(convert(_ujis 0x8FABF841 using ucs2)); +hex(convert(_ujis 0x8FABF841 using ucs2)) +003F0041 DROP TABLE IF EXISTS t1, t2; DROP PROCEDURE IF EXISTS sp1; set names ujis; --- 1.18/mysql-test/t/ctype_ujis.test 2006-02-13 01:26:23 +04:00 +++ 1.19/mysql-test/t/ctype_ujis.test 2006-03-23 14:07:36 +04:00 @@ -1152,6 +1152,21 @@ -- source include/ctype_innodb_like.inc -- source include/ctype_like_escape.inc +# +# Bugs#15375: Unassigned multibyte codes are broken +# into parts when converting to Unicode. +# This query should return 0x003F0041. I.e. it should +# scan unassigned double-byte character 0xA5FE, convert +# it as QUESTION MARK 0x003F and then scan the next +# character, which is a single byte character 0x41. +# +select hex(convert(_ujis 0xA5FE41 using ucs2)); +# This one should return 0x003F0041: +# scan unassigned three-byte character 0x8FABF8, +# convert it as QUESTION MARK 0x003F and then scan +# the next character, which is a single byte character 0x41. +select hex(convert(_ujis 0x8FABF841 using ucs2)); + # End of 4.1 tests --disable_warnings DROP TABLE IF EXISTS t1, t2; --- 1.12/strings/ctype-cp932.c 2006-02-02 09:59:10 +04:00 +++ 1.13/strings/ctype-cp932.c 2006-03-23 14:07:37 +04:00 @@ -5355,7 +5355,7 @@ int hi=s[0]; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; if (hi < 0x80) { @@ -5370,10 +5370,10 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1]))) - return MY_CS_ILSEQ; + return -2; return 2; } --- 1.13/strings/ctype-eucjpms.c 2006-02-02 09:59:13 +04:00 +++ 1.14/strings/ctype-eucjpms.c 2006-03-23 14:07:38 +04:00 @@ -243,7 +243,7 @@ const uchar *e __attribute__((unused))) { wc[0]=tab_jisx0201_uni[*s]; - return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1; + return (!wc[0] && s[0]) ? -1 : 1; } @@ -8473,7 +8473,7 @@ int c1,c2,c3; if (s >= e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL; c1=s[0]; @@ -8485,7 +8485,7 @@ } if (s+2>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL2; c2=s[1]; @@ -8500,7 +8500,7 @@ { pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80)); if (!pwc[0]) - return MY_CS_ILSEQ; + return -2; } else { @@ -8520,7 +8520,7 @@ ret = my_mb_wc_jisx0201(cs,pwc,s+1,e); if (ret!=1) - return ret; + return -2; return 2; } @@ -8531,7 +8531,7 @@ return MY_CS_ILSEQ; if (s+3>e) - return MY_CS_TOOFEW(0); + return MY_CS_TOOSMALL3; c3=s[2]; if (c3 < 0xA1 || c3>=0xFF) @@ -8540,8 +8540,8 @@ if (c2<0xF5) { pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80)); - if (!pwc) - return MY_CS_ILSEQ; + if (!pwc[0]) + return -3; } else { @@ -8572,7 +8572,7 @@ if ((jp=my_uni_jisx0208_onechar(wc))) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; jp+=0x8080; s[0]=jp>>8; @@ -8584,7 +8584,7 @@ if (my_wc_mb_jisx0201(c,wc,s,e) == 1) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; s[1]= s[0]; s[0]= 0x8E; return 2; @@ -8594,7 +8594,7 @@ if ((jp=my_uni_jisx0212_onechar(wc))) { if (s+3>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL3; jp+=0x8080; s[0]=0x8F; @@ -8608,7 +8608,7 @@ if (wc>=0xE000 && wc<0xE3AC) { if (s+2>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL2; c1=((unsigned)(wc-0xE000)/94)+0xF5; s[0]=c1; @@ -8622,7 +8622,7 @@ if (wc>=0xE3AC && wc<0xE758) { if (s+3>e) - return MY_CS_TOOSMALL; + return MY_CS_TOOSMALL3; s[0]=0x8F; c1=((unsigned)(wc-0xE3AC)/94)+0xF5;