Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2107 06/03/23 12:41:28 bar@stripped +17 -0
Merge mysql.com:/usr/home/bar/mysql-4.1.b15376
into mysql.com:/usr/home/bar/mysql-5.0
mysql-test/r/ctype_ujis.result
1.17 06/03/23 12:41:15 bar@stripped +6 -6
After merge fix
include/m_ctype.h
1.116 06/03/23 12:40:36 bar@stripped +5 -0
After merge fix.
strings/ctype-utf8.c
1.98 06/03/23 12:37:51 bar@stripped +0 -0
Auto merged
strings/ctype-ujis.c
1.70 06/03/23 12:37:51 bar@stripped +0 -0
Auto merged
strings/ctype-ucs2.c
1.59 06/03/23 12:37:51 bar@stripped +0 -0
Auto merged
strings/ctype-tis620.c
1.92 06/03/23 12:37:50 bar@stripped +0 -0
Auto merged
strings/ctype-sjis.c
1.89 06/03/23 12:37:50 bar@stripped +0 -0
Auto merged
strings/ctype-simple.c
1.76 06/03/23 12:37:50 bar@stripped +0 -0
Auto merged
strings/ctype-latin1.c
1.49 06/03/23 12:37:50 bar@stripped +0 -0
Auto merged
strings/ctype-gbk.c
1.79 06/03/23 12:37:50 bar@stripped +0 -0
Auto merged
strings/ctype-gb2312.c
1.63 06/03/23 12:37:50 bar@stripped +0 -0
Auto merged
strings/ctype-euc_kr.c
1.66 06/03/23 12:37:49 bar@stripped +0 -0
Auto merged
strings/ctype-bin.c
1.68 06/03/23 12:37:49 bar@stripped +0 -0
Auto merged
strings/ctype-big5.c
1.89 06/03/23 12:37:49 bar@stripped +0 -0
Auto merged
sql/sql_string.cc
1.91 06/03/23 12:37:49 bar@stripped +0 -0
Auto merged
mysql-test/t/ctype_ujis.test
1.18 06/03/23 12:37:49 bar@stripped +0 -0
Auto merged
BitKeeper/deleted/.del-ctype-cp932.c
1.11 06/03/23 12:37:49 bar@stripped +0 -0
Auto merged
BitKeeper/deleted/.del-ctype-cp932.c
1.1.8.2 06/03/23 12:37:48 bar@stripped +0 -0
Merge rename: strings/ctype-cp932.c -> BitKeeper/deleted/.del-ctype-cp932.c
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: bar
# Host: bar.intranet.mysql.r18.ru
# Root: /usr/home/bar/mysql-5.0/RESYNC
--- 1.115/include/m_ctype.h 2006-03-23 10:17:22 +04:00
+++ 1.116/include/m_ctype.h 2006-03-23 12:40:36 +04:00
@@ -44,6 +44,7 @@
uint16 sort;
} MY_UNICASE_INFO;
+
extern MY_UNICASE_INFO *my_unicase_default[256];
extern MY_UNICASE_INFO *my_unicase_turkish[256];
@@ -51,6 +52,19 @@
#define MY_CS_ILUNI 0
#define MY_CS_TOOSMALL -1
#define MY_CS_TOOFEW(n) (-1-(n))
+
+/* wm_wc and wc_mb return codes */
+#define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
+#define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
+#define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
+/* These following three are currently not really used */
+#define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
+/* A helper macros for "need at least n bytes" */
+#define MY_CS_TOOSMALLN(n) (-100-(n))
#define MY_SEQ_INTTAIL 1
#define MY_SEQ_SPACES 2
--- 1.90/sql/sql_string.cc 2005-11-20 22:47:02 +04:00
+++ 1.91/sql/sql_string.cc 2006-03-23 12:37:49 +04:00
@@ -819,8 +819,18 @@
from++;
wc= '?';
}
+ else if (cnvres > MY_CS_TOOSMALL)
+ {
+ /*
+ A correct multibyte sequence detected
+ But it doesn't have Unicode mapping.
+ */
+ error_count++;
+ from+= (-cnvres);
+ wc= '?';
+ }
else
- break; // Impossible char.
+ break; // Not enough characters
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
--- 1.88/strings/ctype-big5.c 2005-10-13 21:32:59 +05:00
+++ 1.89/strings/ctype-big5.c 2006-03-23 12:37:49 +04:00
@@ -6275,7 +6275,7 @@
int hi=s[0];
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
if (hi<0x80)
{
@@ -6284,10 +6284,10 @@
}
if (s+2>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
- return MY_CS_ILSEQ;
+ return -2;
return 2;
}
--- 1.65/strings/ctype-euc_kr.c 2006-01-13 16:39:24 +04:00
+++ 1.66/strings/ctype-euc_kr.c 2006-03-23 12:37:49 +04:00
@@ -8601,7 +8601,7 @@
return MY_CS_ILUNI;
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
s[0]=code>>8;
s[1]=code&0xFF;
@@ -8617,7 +8617,7 @@
int hi=s[0];
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
if (hi<0x80)
{
@@ -8626,10 +8626,10 @@
}
if (s+2>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
- return MY_CS_ILSEQ;
+ return -2;
return 2;
}
--- 1.62/strings/ctype-gb2312.c 2006-01-13 16:39:24 +04:00
+++ 1.63/strings/ctype-gb2312.c 2006-03-23 12:37:50 +04:00
@@ -5651,7 +5651,7 @@
return MY_CS_ILUNI;
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
code|=0x8080;
s[0]=code>>8;
@@ -5668,7 +5668,7 @@
hi=(int) s[0];
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
if (hi<0x80)
{
@@ -5677,10 +5677,10 @@
}
if (s+2>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
- return MY_CS_ILSEQ;
+ return -2;
return 2;
}
--- 1.78/strings/ctype-gbk.c 2005-10-13 19:10:01 +05:00
+++ 1.79/strings/ctype-gbk.c 2006-03-23 12:37:50 +04:00
@@ -9902,7 +9902,7 @@
return MY_CS_ILUNI;
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
s[0]=code>>8;
s[1]=code&0xFF;
@@ -9916,7 +9916,7 @@
int hi;
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
hi=s[0];
@@ -9927,10 +9927,10 @@
}
if (s+2>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
- return MY_CS_ILSEQ;
+ return -2;
return 2;
--- 1.88/strings/ctype-sjis.c 2005-10-13 19:10:06 +05:00
+++ 1.89/strings/ctype-sjis.c 2006-03-23 12:37:50 +04:00
@@ -4516,7 +4516,7 @@
mb:
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
s[0]=code>>8;
s[1]=code&0xFF;
@@ -4530,7 +4530,7 @@
int hi=s[0];
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
if (hi < 0x80)
{
@@ -4545,10 +4545,10 @@
}
if (s+2>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
if (!(pwc[0]=func_sjis_uni_onechar((hi<<8)+s[1])))
- return MY_CS_ILSEQ;
+ return -2;
return 2;
}
--- 1.91/strings/ctype-tis620.c 2005-10-13 21:32:59 +05:00
+++ 1.92/strings/ctype-tis620.c 2006-03-23 12:37:50 +04:00
@@ -827,10 +827,10 @@
const unsigned char *end __attribute__((unused)))
{
if (str >= end)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
*wc=cs_to_uni[*str];
- return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+ return (!wc[0] && str[0]) ? -1 : 1;
}
static
--- 1.69/strings/ctype-ujis.c 2005-10-13 19:10:13 +05:00
+++ 1.70/strings/ctype-ujis.c 2006-03-23 12:37:51 +04:00
@@ -242,7 +242,7 @@
const uchar *e __attribute__((unused)))
{
wc[0]=tab_jisx0201_uni[*s];
- return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1;
+ return (!wc[0] && s[0]) ? -1 : 1;
}
@@ -8341,7 +8341,7 @@
int c1,c2,c3;
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
c1=s[0];
@@ -8353,7 +8353,7 @@
}
if (s+2>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
c2=s[1];
@@ -8368,7 +8368,7 @@
{
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
if (!pwc[0])
- return MY_CS_ILSEQ;
+ return -2;
}
else
{
@@ -8388,7 +8388,7 @@
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
if (ret!=1)
- return ret;
+ return -2;
return 2;
}
@@ -8399,7 +8399,7 @@
return MY_CS_ILSEQ;
if (s+3>e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL3;
c3=s[2];
if (c3 < 0xA1 || c3>=0xFF)
@@ -8408,8 +8408,8 @@
if (c2<0xF5)
{
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
- if (!pwc)
- return MY_CS_ILSEQ;
+ if (!pwc[0])
+ return -3;
}
else
{
@@ -8440,7 +8440,7 @@
if ((jp=my_uni_jisx0208_onechar(wc)))
{
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
jp+=0x8080;
s[0]=jp>>8;
@@ -8452,7 +8452,7 @@
if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
{
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
s[1]= s[0];
s[0]= 0x8E;
return 2;
@@ -8462,7 +8462,7 @@
if ((jp=my_uni_jisx0212_onechar(wc)))
{
if (s+3>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL3;
jp+=0x8080;
s[0]=0x8F;
@@ -8476,7 +8476,7 @@
if (wc>=0xE000 && wc<0xE3AC)
{
if (s+2>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
c1=((unsigned)(wc-0xE000)/94)+0xF5;
s[0]=c1;
@@ -8490,7 +8490,7 @@
if (wc>=0xE3AC && wc<0xE758)
{
if (s+3>e)
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL3;
s[0]=0x8F;
c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
--- 1.67/strings/ctype-bin.c 2006-03-23 10:17:22 +04:00
+++ 1.68/strings/ctype-bin.c 2006-03-23 12:37:49 +04:00
@@ -246,7 +246,7 @@
const unsigned char *end __attribute__((unused)))
{
if (str >= end)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
*wc=str[0];
return 1;
--- 1.48/strings/ctype-latin1.c 2005-10-14 14:02:14 +05:00
+++ 1.49/strings/ctype-latin1.c 2006-03-23 12:37:50 +04:00
@@ -363,10 +363,10 @@
const unsigned char *end __attribute__((unused)))
{
if (str >= end)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
*wc=cs_to_uni[*str];
- return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+ return (!wc[0] && str[0]) ? -1 : 1;
}
static
--- 1.58/strings/ctype-ucs2.c 2005-12-28 18:23:05 +04:00
+++ 1.59/strings/ctype-ucs2.c 2006-03-23 12:37:51 +04:00
@@ -94,7 +94,7 @@
my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
*pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
return 2;
@@ -104,7 +104,7 @@
my_wc_t wc, uchar *r, uchar *e)
{
if ( r+2 > e )
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALL2;
r[0]= (uchar) (wc >> 8);
r[1]= (uchar) (wc & 0xFF);
--- 1.75/strings/ctype-simple.c 2005-10-20 01:47:02 +05:00
+++ 1.76/strings/ctype-simple.c 2006-03-23 12:37:50 +04:00
@@ -239,10 +239,10 @@
const unsigned char *end __attribute__((unused)))
{
if (str >= end)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
*wc=cs->tab_to_uni[*str];
- return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+ return (!wc[0] && str[0]) ? -1 : 1;
}
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
--- 1.97/strings/ctype-utf8.c 2005-10-13 19:10:14 +05:00
+++ 1.98/strings/ctype-utf8.c 2006-03-23 12:37:51 +04:00
@@ -1947,7 +1947,7 @@
unsigned char c;
if (s >= e)
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL;
c= s[0];
if (c < 0x80)
@@ -1960,7 +1960,7 @@
else if (c < 0xe0)
{
if (s+2 > e) /* We need 2 characters */
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL2;
if (!((s[1] ^ 0x80) < 0x40))
return MY_CS_ILSEQ;
@@ -1971,7 +1971,7 @@
else if (c < 0xf0)
{
if (s+3 > e) /* We need 3 characters */
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL3;
if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >=
0xe1 || s[1] >= 0xa0)))
return MY_CS_ILSEQ;
@@ -1986,7 +1986,7 @@
else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
{
if (s+4 > e) /* We need 4 characters */
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL4;
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
@@ -2004,7 +2004,7 @@
else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
{
if (s+5 >e) /* We need 5 characters */
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL5;
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
@@ -2023,7 +2023,7 @@
else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
{
if ( s+6 >e ) /* We need 6 characters */
- return MY_CS_TOOFEW(0);
+ return MY_CS_TOOSMALL6;
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
@@ -2074,7 +2074,7 @@
Because of it (r+count > e), not (r+count-1 >e )
*/
if ( r+count > e )
- return MY_CS_TOOSMALL;
+ return MY_CS_TOOSMALLN(count);
switch (count) {
/* Fall through all cases!!! */
--- 1.16/mysql-test/r/ctype_ujis.result 2005-12-07 18:01:06 +04:00
+++ 1.17/mysql-test/r/ctype_ujis.result 2006-03-23 12:41:15 +04:00
@@ -2337,3 +2337,9 @@
set names default;
set character_set_database=default;
set character_set_server=default;
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+hex(convert(_ujis 0xA5FE41 using ucs2))
+003F0041
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+hex(convert(_ujis 0x8FABF841 using ucs2))
+003F0041
--- 1.17/mysql-test/t/ctype_ujis.test 2005-12-07 18:01:06 +04:00
+++ 1.18/mysql-test/t/ctype_ujis.test 2006-03-23 12:37:49 +04:00
@@ -1152,6 +1152,21 @@
-- source include/ctype_innodb_like.inc
-- source include/ctype_like_escape.inc
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xA5FE, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+# This one should return 0x003F0041:
+# scan unassigned three-byte character 0x8FABF8,
+# convert it as QUESTION MARK 0x003F and then scan
+# the next character, which is a single byte character 0x41.
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+
# End of 4.1 tests
--disable_warnings
DROP TABLE IF EXISTS t1, t2;
| Thread |
|---|
| • bk commit into 5.0 tree (bar:1.2107) | bar | 23 Mar |