List:Commits« Previous MessageNext Message »
From:bar Date:December 12 2005 5:42pm
Subject:bk commit into 4.1 tree (bar:1.2488) BUG#15375
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2488 05/12/12 21:42:09 bar@stripped +21 -0
    Bug#15375 Unassigned multibyte codes are broken
    into parts when converting to Unicode.
  m_ctype.h:
    Reorganizing mb_wc return codes to be able
    to return "an unassigned N-byte-long character".
  sql_string.cc:
    Adding code to detect and properly handle
    unassigned characters (i.e. the those character
    which are correctly formed according to the 
    character specifications, but don't have Unicode
    mapping).
  Many files:
    Fixing conversion function to return new codes.
  ctype_ujis.test, ctype_gbk.test, ctype_big5.test:
    Adding a test case.
  ctype_ujis.result, ctype_gbk.result, ctype_big5.result:
    Fixing results accordingly.

  strings/ctype-utf8.c
    1.89 05/12/12 21:39:22 bar@stripped +7 -7
    Fixing conversion function to return new codes.

  strings/ctype-ujis.c
    1.65 05/12/12 21:39:20 bar@stripped +13 -13
    Fixing conversion function to return new codes.

  strings/ctype-ucs2.c
    1.46 05/12/12 21:39:18 bar@stripped +2 -2
    Fixing conversion function to return new codes.

  strings/ctype-tis620.c
    1.84 05/12/12 21:39:16 bar@stripped +2 -2
    Fixing conversion function to return new codes.

  strings/ctype-sjis.c
    1.82 05/12/12 21:39:14 bar@stripped +4 -4
    Fixing conversion function to return new codes.

  strings/ctype-simple.c
    1.68 05/12/12 21:39:13 bar@stripped +2 -2
    Fixing conversion function to return new codes.

  strings/ctype-latin1.c
    1.44 05/12/12 21:39:11 bar@stripped +2 -2
    Fixing conversion function to return new codes.

  strings/ctype-gbk.c
    1.73 05/12/12 21:39:09 bar@stripped +4 -4
    Fixing conversion function to return new codes.

  strings/ctype-gb2312.c
    1.58 05/12/12 21:39:07 bar@stripped +4 -4
    Fixing conversion function to return new codes.

  strings/ctype-euc_kr.c
    1.61 05/12/12 21:39:06 bar@stripped +4 -4
    Fixing conversion function to return new codes.

  strings/ctype-cp932.c
    1.8 05/12/12 21:39:04 bar@stripped +3 -3
    Fixing conversion function to return new codes.

  strings/ctype-bin.c
    1.59 05/12/12 21:38:59 bar@stripped +1 -1
    Fixing conversion function to return new codes.

  strings/ctype-big5.c
    1.80 05/12/12 21:38:37 bar@stripped +3 -3
    Fixing conversion function to return new codes.

  sql/sql_string.cc
    1.94 05/12/12 21:37:01 bar@stripped +11 -1
    Adding code to detect and properly hanlde
    unassigned characters (i.e. the those character
    which are correctly formed according to the 
    character specifications, but don't have Unicode
    mapping).

  mysql-test/t/ctype_ujis.test
    1.16 05/12/12 21:36:56 bar@stripped +15 -0
    Adding a test case.

  mysql-test/t/ctype_gbk.test
    1.4 05/12/12 21:36:52 bar@stripped +10 -0
    Adding a test case.

  mysql-test/t/ctype_big5.test
    1.11 05/12/12 21:36:47 bar@stripped +10 -0
    Adding a test case.

  mysql-test/r/ctype_ujis.result
    1.13 05/12/12 21:36:44 bar@stripped +6 -0
    Fixing results accordingly.

  mysql-test/r/ctype_gbk.result
    1.3 05/12/12 21:36:39 bar@stripped +3 -0
    Fixing results accordingly.

  mysql-test/r/ctype_big5.result
    1.11 05/12/12 21:36:21 bar@stripped +3 -0
    Fixing results accordingly.

  include/m_ctype.h
    1.106 05/12/12 21:32:42 bar@stripped +13 -4
    Reorganizing mb_wc return codes to be able
    to return "an unassigned N-byte long character".
    Bug#15375 Unassigned multibyte codes are broken into parts when converting to Unicode.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.intranet.mysql.r18.ru
# Root:	/usr/home/bar/mysql-4.1.b15376

--- 1.105/include/m_ctype.h	2005-08-17 13:26:26 +05:00
+++ 1.106/include/m_ctype.h	2005-12-12 21:32:42 +04:00
@@ -44,10 +44,19 @@
   uint16 sort;
 } MY_UNICASE_INFO;
 
-#define MY_CS_ILSEQ	0
-#define MY_CS_ILUNI	0
-#define MY_CS_TOOSMALL	-1
-#define MY_CS_TOOFEW(n)	(-1-(n))
+
+/* wm_wc and wc_mb return codes */
+#define MY_CS_ILSEQ	0     /* Wrong by sequence: wb_wc                   */
+#define MY_CS_ILUNI	0     /* Cannot encode Unicode to charset: wc_mb    */
+#define MY_CS_TOOSMALL  -101  /* Need at least one byte:    wc_mb and mb_wc */
+#define MY_CS_TOOSMALL2 -102  /* Need at least two bytes:   wc_mb and mb_wc */
+#define MY_CS_TOOSMALL3 -103  /* Need at least three bytes: wc_mb and mb_wc */
+/* These following three are currently not really used */
+#define MY_CS_TOOSMALL4 -104  /* Need at least 4 bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL5 -105  /* Need at least 5 bytes: wc_mb and mb_wc */
+#define MY_CS_TOOSMALL6 -106  /* Need at least 6 bytes: wc_mb and mb_wc */
+/* A helper macros for "need at least n bytes" */
+#define MY_CS_TOOSMALLN(n)    (-100-(n))
 
 #define MY_SEQ_INTTAIL	1
 #define MY_SEQ_SPACES	2

--- 1.93/sql/sql_string.cc	2005-06-05 22:38:42 +05:00
+++ 1.94/sql/sql_string.cc	2005-12-12 21:37:01 +04:00
@@ -806,8 +806,18 @@
       from++;
       wc= '?';
     }
+    else if (cnvres > MY_CS_TOOSMALL)
+    {
+      /*
+        A correct multibyte sequence detected
+        But it doesn't have Unicode mapping.
+      */
+      error_count++;
+      from+= (-cnvres);
+      wc= '?';
+    }
     else
-      break;					// Impossible char.
+      break;  // Not enough characters
 
 outp:
     if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)

--- 1.79/strings/ctype-big5.c	2005-10-05 19:19:19 +05:00
+++ 1.80/strings/ctype-big5.c	2005-12-12 21:38:37 +04:00
@@ -6259,7 +6259,7 @@
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi<0x80)
   {
@@ -6268,10 +6268,10 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
 
   if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }

--- 1.60/strings/ctype-euc_kr.c	2005-08-17 13:26:28 +05:00
+++ 1.61/strings/ctype-euc_kr.c	2005-12-12 21:39:06 +04:00
@@ -8601,7 +8601,7 @@
     return MY_CS_ILUNI;
   
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   s[0]=code>>8;
   s[1]=code&0xFF;
@@ -8617,7 +8617,7 @@
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi<0x80)
   {
@@ -8626,10 +8626,10 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }

--- 1.57/strings/ctype-gb2312.c	2005-08-17 13:26:28 +05:00
+++ 1.58/strings/ctype-gb2312.c	2005-12-12 21:39:07 +04:00
@@ -5651,7 +5651,7 @@
     return MY_CS_ILUNI;
   
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   code|=0x8080;
   s[0]=code>>8;
@@ -5668,7 +5668,7 @@
   hi=(int) s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi<0x80)
   {
@@ -5677,10 +5677,10 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }

--- 1.72/strings/ctype-gbk.c	2005-09-21 22:12:12 +05:00
+++ 1.73/strings/ctype-gbk.c	2005-12-12 21:39:09 +04:00
@@ -9889,7 +9889,7 @@
     return MY_CS_ILUNI;
   
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
     
   s[0]=code>>8;
   s[1]=code&0xFF;
@@ -9903,7 +9903,7 @@
   int hi;
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   hi=s[0];
   
@@ -9914,10 +9914,10 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
     
   if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
   

--- 1.81/strings/ctype-sjis.c	2005-09-21 22:12:18 +05:00
+++ 1.82/strings/ctype-sjis.c	2005-12-12 21:39:14 +04:00
@@ -4501,7 +4501,7 @@
 
 mb:
   if (s+2>e)
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   s[0]=code>>8;
   s[1]=code&0xFF;
@@ -4515,7 +4515,7 @@
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi < 0x80)
   {
@@ -4530,10 +4530,10 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_sjis_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }

--- 1.83/strings/ctype-tis620.c	2005-10-13 13:25:00 +05:00
+++ 1.84/strings/ctype-tis620.c	2005-12-12 21:39:16 +04:00
@@ -820,10 +820,10 @@
 		  const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=cs_to_uni[*str];
-  return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && str[0]) ? -1 : 1;
 }
 
 static

--- 1.64/strings/ctype-ujis.c	2005-08-17 13:26:29 +05:00
+++ 1.65/strings/ctype-ujis.c	2005-12-12 21:39:20 +04:00
@@ -242,7 +242,7 @@
 		  const uchar *e __attribute__((unused)))
 {
   wc[0]=tab_jisx0201_uni[*s];
-  return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && s[0]) ? -1 : 1;
 }
 
 
@@ -8341,7 +8341,7 @@
   int c1,c2,c3;
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   c1=s[0];
   
@@ -8353,7 +8353,7 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
     
   c2=s[1];
   
@@ -8368,7 +8368,7 @@
     {
       pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
       if (!pwc[0])
-        return MY_CS_ILSEQ;
+        return -2;
     }
     else
     {
@@ -8388,7 +8388,7 @@
     
     ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
     if (ret!=1)
-      return ret;
+      return -2;
     return 2;
   }
   
@@ -8399,7 +8399,7 @@
       return MY_CS_ILSEQ;
     
     if (s+3>e)
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL3;
     
     c3=s[2];
     if (c3 < 0xA1 || c3>=0xFF)
@@ -8408,8 +8408,8 @@
     if (c2<0xF5)
     {
       pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
-      if (!pwc)
-        return MY_CS_ILSEQ;
+      if (!pwc[0])
+        return -3;
     }
     else
     {
@@ -8440,7 +8440,7 @@
   if ((jp=my_uni_jisx0208_onechar(wc)))
   {
     if (s+2>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL2;
       
     jp+=0x8080;
     s[0]=jp>>8;
@@ -8452,7 +8452,7 @@
   if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
   {
     if (s+2>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL2;
     s[1]= s[0];
     s[0]= 0x8E;
     return 2;
@@ -8462,7 +8462,7 @@
   if ((jp=my_uni_jisx0212_onechar(wc)))
   {
     if (s+3>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL3;
       
     jp+=0x8080;
     s[0]=0x8F;
@@ -8476,7 +8476,7 @@
   if (wc>=0xE000 && wc<0xE3AC)
   { 
     if (s+2>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL2;
       
     c1=((unsigned)(wc-0xE000)/94)+0xF5;
     s[0]=c1;
@@ -8490,7 +8490,7 @@
   if (wc>=0xE3AC && wc<0xE758)
   {
     if (s+3>e)
-      return MY_CS_TOOSMALL;
+      return MY_CS_TOOSMALL3;
       
     s[0]=0x8F;
     c1=((unsigned)(wc-0xE3AC)/94)+0xF5;

--- 1.10/mysql-test/r/ctype_big5.result	2005-10-05 19:20:19 +05:00
+++ 1.11/mysql-test/r/ctype_big5.result	2005-12-12 21:36:21 +04:00
@@ -189,3 +189,6 @@
 hex(a)
 E5ABBA
 drop table t1;
+select hex(convert(_big5 0xC84041 using ucs2));
+hex(convert(_big5 0xC84041 using ucs2))
+003F0041

--- 1.10/mysql-test/t/ctype_big5.test	2005-10-05 19:20:08 +05:00
+++ 1.11/mysql-test/t/ctype_big5.test	2005-12-12 21:36:47 +04:00
@@ -53,4 +53,14 @@
 select hex(a) from t1 where a = _big5 0xF9DC;
 drop table t1;
 
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xC840, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_big5 0xC84041 using ucs2));
+
 # End of 4.1 tests

--- 1.58/strings/ctype-bin.c	2005-08-17 13:26:27 +05:00
+++ 1.59/strings/ctype-bin.c	2005-12-12 21:38:59 +04:00
@@ -220,7 +220,7 @@
 			const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=str[0];
   return 1;

--- 1.43/strings/ctype-latin1.c	2005-09-16 14:19:45 +05:00
+++ 1.44/strings/ctype-latin1.c	2005-12-12 21:39:11 +04:00
@@ -363,10 +363,10 @@
 		    const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=cs_to_uni[*str];
-  return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && str[0]) ? -1 : 1;
 }
 
 static

--- 1.45/strings/ctype-ucs2.c	2005-10-18 20:03:21 +05:00
+++ 1.46/strings/ctype-ucs2.c	2005-12-12 21:39:18 +04:00
@@ -95,7 +95,7 @@
 		       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
   if (s+2 > e) /* Need 2 characters */
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   *pwc= ((unsigned char)s[0]) * 256  + ((unsigned char)s[1]);
   return 2;
@@ -105,7 +105,7 @@
 		       my_wc_t wc, uchar *r, uchar *e)
 {
   if ( r+2 > e ) 
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALL2;
   
   r[0]= (uchar) (wc >> 8);
   r[1]= (uchar) (wc & 0xFF);

--- 1.67/strings/ctype-simple.c	2005-10-18 20:03:20 +05:00
+++ 1.68/strings/ctype-simple.c	2005-12-12 21:39:13 +04:00
@@ -207,10 +207,10 @@
 		  const unsigned char *end __attribute__((unused)))
 {
   if (str >= end)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   *wc=cs->tab_to_uni[*str];
-  return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1;
+  return (!wc[0] && str[0]) ? -1 : 1;
 }
 
 int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,

--- 1.88/strings/ctype-utf8.c	2005-08-17 13:26:29 +05:00
+++ 1.89/strings/ctype-utf8.c	2005-12-12 21:39:22 +04:00
@@ -1765,7 +1765,7 @@
   unsigned char c;
 
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
 
   c= s[0];
   if (c < 0x80)
@@ -1778,7 +1778,7 @@
   else if (c < 0xe0)
   {
     if (s+2 > e) /* We need 2 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL2;
 
     if (!((s[1] ^ 0x80) < 0x40))
       return MY_CS_ILSEQ;
@@ -1789,7 +1789,7 @@
   else if (c < 0xf0)
   {
     if (s+3 > e) /* We need 3 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL3;
 
     if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
       return MY_CS_ILSEQ;
@@ -1804,7 +1804,7 @@
   else if (c < 0xf8 && sizeof(my_wc_t)*8 >= 32)
   {
     if (s+4 > e) /* We need 4 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL4;
 
     if (!((s[1] ^ 0x80) < 0x40 &&
           (s[2] ^ 0x80) < 0x40 &&
@@ -1822,7 +1822,7 @@
    else if (c < 0xfc && sizeof(my_wc_t)*8 >= 32)
   {
     if (s+5 >e) /* We need 5 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL5;
 
     if (!((s[1] ^ 0x80) < 0x40 &&
           (s[2] ^ 0x80) < 0x40 &&
@@ -1841,7 +1841,7 @@
   else if (c < 0xfe && sizeof(my_wc_t)*8 >= 32)
   {
     if ( s+6 >e ) /* We need 6 characters */
-      return MY_CS_TOOFEW(0);
+      return MY_CS_TOOSMALL6;
 
     if (!((s[1] ^ 0x80) < 0x40   &&
           (s[2] ^ 0x80) < 0x40   &&
@@ -1892,7 +1892,7 @@
     Because of it (r+count > e), not (r+count-1 >e )
    */
   if ( r+count > e )
-    return MY_CS_TOOSMALL;
+    return MY_CS_TOOSMALLN(count);
 
   switch (count) {
     /* Fall through all cases!!! */

--- 1.2/mysql-test/r/ctype_gbk.result	2005-09-21 22:17:25 +05:00
+++ 1.3/mysql-test/r/ctype_gbk.result	2005-12-12 21:36:39 +04:00
@@ -165,3 +165,6 @@
 A1A1
 A3A0
 DROP TABLE t1;
+select hex(convert(_gbk 0xA14041 using ucs2));
+hex(convert(_gbk 0xA14041 using ucs2))
+003F0041

--- 1.3/mysql-test/t/ctype_gbk.test	2005-09-21 22:13:32 +05:00
+++ 1.4/mysql-test/t/ctype_gbk.test	2005-12-12 21:36:52 +04:00
@@ -31,4 +31,14 @@
 SELECT hex(a) FROM t1 ORDER BY a;
 DROP TABLE t1;
 
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xA140, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_gbk 0xA14041 using ucs2));
+
 # End of 4.1 tests

--- 1.12/mysql-test/r/ctype_ujis.result	2005-09-21 22:17:30 +05:00
+++ 1.13/mysql-test/r/ctype_ujis.result	2005-12-12 21:36:44 +04:00
@@ -2307,3 +2307,9 @@
 c2h
 ab_def
 drop table t1;
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+hex(convert(_ujis 0xA5FE41 using ucs2))
+003F0041
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+hex(convert(_ujis 0x8FABF841 using ucs2))
+003F0041

--- 1.15/mysql-test/t/ctype_ujis.test	2005-09-21 22:13:37 +05:00
+++ 1.16/mysql-test/t/ctype_ujis.test	2005-12-12 21:36:56 +04:00
@@ -1152,4 +1152,19 @@
 -- source include/ctype_innodb_like.inc
 -- source include/ctype_like_escape.inc
 
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xA5FE, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+# This one should return 0x003F0041:
+# scan unassigned three-byte character 0x8FABF8,
+# convert it as QUESTION MARK 0x003F and then scan
+# the next character, which is a single byte character 0x41.
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+
 # End of 4.1 tests

--- 1.7/strings/ctype-cp932.c	2005-09-21 22:11:48 +05:00
+++ 1.8/strings/ctype-cp932.c	2005-12-12 21:39:04 +04:00
@@ -5355,7 +5355,7 @@
   int hi=s[0];
   
   if (s >= e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL;
   
   if (hi < 0x80)
   {
@@ -5370,10 +5370,10 @@
   }
   
   if (s+2>e)
-    return MY_CS_TOOFEW(0);
+    return MY_CS_TOOSMALL2;
   
   if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
-    return MY_CS_ILSEQ;
+    return -2;
   
   return 2;
 }
Thread
bk commit into 4.1 tree (bar:1.2488) BUG#15375bar12 Dec