Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2006-10-30 14:40:15+04:00, bar@stripped +9 -0
Bug#20404: SHOW CREATE TABLE fails with Turkish I
Problem: SHOW CREATE TABLE printed garbage in table
name for tables having TURKISH I
(i.e. LATIN CAPITABLE LETTER I WITH DOT ABOVE)
when lower-case-table-name=1.
Reason: In some cases during lower/upper conversion in utf8,
the result string can be shorter the original string
(including the above letter). Old implementation of caseup_str()
and casedn_str() didn't handle the result length properly,
assuming that length cannot change.
This fix changes the result type of cs->cset->casedn_str()
and cs->cset->caseup_str() from VOID to UINT, to return
the result length, as well as put '\0' terminator on a
proper place.
Also, my_caseup_str_utf8() and my_casedn_str_utf8() were
rewritten not to use strlen() for performance purposes.
It was done with help of adding of new functions - my_utf8_uni_no_range()
and my_uni_utf8_no_range() - for null terminated strings.
include/m_ctype.h@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +6 -6
Changeing return type from void to int for caseup_str() and casedn_str()
mysql-test/r/lowercase_table.result@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +24
-0
Adding test case
mysql-test/t/lowercase_table.test@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +20 -0
Adding test case
sql/sql_parse.cc@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +1 -1
Set table->table.length to result of my_casedn_str().
strings/ctype-bin.c@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +2 -1
Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-mb.c@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +14 -10
Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-simple.c@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +12 -6
Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-ucs2.c@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +5 -3
Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-utf8.c@stripped, 2006-10-30 14:40:09+04:00, bar@stripped +129 -6
Changeing return type from void to int for caseup_str() and casedn_str().
Optimization, to get rid of strlen():
Adding my_utf8_uni_no_range() and my_uni_utf8_no_range() - for null
terninated strings.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: bar
# Host: bar.intranet.mysql.r18.ru
# Root: /usr/home/bar/mysql-5.0.b20404
--- 1.122/include/m_ctype.h 2006-10-30 14:40:25 +04:00
+++ 1.123/include/m_ctype.h 2006-10-30 14:40:25 +04:00
@@ -178,8 +178,8 @@
unsigned char *s,unsigned char *e);
/* Functions for case and sort convertion */
- void (*caseup_str)(struct charset_info_st *, char *);
- void (*casedn_str)(struct charset_info_st *, char *);
+ uint (*caseup_str)(struct charset_info_st *, char *);
+ uint (*casedn_str)(struct charset_info_st *, char *);
uint (*caseup)(struct charset_info_st *, char *src, uint srclen,
char *dst, uint dstlen);
uint (*casedn)(struct charset_info_st *, char *src, uint srclen,
@@ -311,8 +311,8 @@
/* Functions for 8bit */
-extern void my_caseup_str_8bit(CHARSET_INFO *, char *);
-extern void my_casedn_str_8bit(CHARSET_INFO *, char *);
+extern uint my_caseup_str_8bit(CHARSET_INFO *, char *);
+extern uint my_casedn_str_8bit(CHARSET_INFO *, char *);
extern uint my_caseup_8bit(CHARSET_INFO *, char *src, uint srclen,
char *dst, uint dstlen);
extern uint my_casedn_8bit(CHARSET_INFO *, char *src, uint srclen,
@@ -399,8 +399,8 @@
/* Functions for multibyte charsets */
-extern void my_caseup_str_mb(CHARSET_INFO *, char *);
-extern void my_casedn_str_mb(CHARSET_INFO *, char *);
+extern uint my_caseup_str_mb(CHARSET_INFO *, char *);
+extern uint my_casedn_str_mb(CHARSET_INFO *, char *);
extern uint my_caseup_mb(CHARSET_INFO *, char *src, uint srclen,
char *dst, uint dstlen);
extern uint my_casedn_mb(CHARSET_INFO *, char *src, uint srclen,
--- 1.582/sql/sql_parse.cc 2006-10-30 14:40:25 +04:00
+++ 1.583/sql/sql_parse.cc 2006-10-30 14:40:25 +04:00
@@ -6177,7 +6177,7 @@
ptr->alias= alias_str;
if (lower_case_table_names && table->table.length)
- my_casedn_str(files_charset_info, table->table.str);
+ table->table.length= my_casedn_str(files_charset_info, table->table.str);
ptr->table_name=table->table.str;
ptr->table_name_length=table->table.length;
ptr->lock_type= lock_type;
--- 1.70/strings/ctype-bin.c 2006-10-30 14:40:25 +04:00
+++ 1.71/strings/ctype-bin.c 2006-10-30 14:40:25 +04:00
@@ -211,9 +211,10 @@
/* This function is used for all conversion functions */
-static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
+static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
char *str __attribute__((unused)))
{
+ return 0;
}
static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
--- 1.62/strings/ctype-ucs2.c 2006-10-30 14:40:25 +04:00
+++ 1.63/strings/ctype-ucs2.c 2006-10-30 14:40:25 +04:00
@@ -159,13 +159,13 @@
}
-static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
+static uint my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
char * s __attribute__((unused)))
{
+ return 0;
}
-
static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
char *dst __attribute__((unused)),
uint dstlen __attribute__((unused)))
@@ -188,9 +188,11 @@
return srclen;
}
-static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+
+static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char * s __attribute__((unused)))
{
+ return 0;
}
--- 1.51/strings/ctype-mb.c 2006-10-30 14:40:25 +04:00
+++ 1.52/strings/ctype-mb.c 2006-10-30 14:40:25 +04:00
@@ -21,40 +21,44 @@
#ifdef USE_MB
-void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
+uint my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
- register uchar *map=cs->to_upper;
+ register uchar *map= cs->to_upper;
+ char *str_orig= str;
while (*str)
{
/* Pointing after the '\0' is safe here. */
- if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
- str+=l;
+ if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+ str+= l;
else
{
- *str=(char) map[(uchar)*str];
+ *str= (char) map[(uchar)*str];
str++;
}
}
+ return str - str_orig;
}
-void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
+uint my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
- register uchar *map=cs->to_lower;
+ register uchar *map= cs->to_lower;
+ char *str_orig= str;
while (*str)
{
/* Pointing after the '\0' is safe here. */
- if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
- str+=l;
+ if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+ str+= l;
else
{
- *str=(char) map[(uchar)*str];
+ *str= (char) map[(uchar)*str];
str++;
}
}
+ return str - str_orig;
}
uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen,
--- 1.77/strings/ctype-simple.c 2006-10-30 14:40:25 +04:00
+++ 1.78/strings/ctype-simple.c 2006-10-30 14:40:25 +04:00
@@ -188,19 +188,25 @@
}
-void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
+uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{
- register uchar *map=cs->to_upper;
- while ((*str = (char) map[(uchar) *str]) != 0)
+ register uchar *map= cs->to_upper;
+ char *str_orig= str;
+ while ((*str= (char) map[(uchar) *str]) != 0)
str++;
+ return str - str_orig;
}
-void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
+
+uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
{
- register uchar *map=cs->to_lower;
- while ((*str = (char) map[(uchar)*str]) != 0)
+ register uchar *map= cs->to_lower;
+ char *str_orig= str;
+ while ((*str= (char) map[(uchar) *str]) != 0)
str++;
+ return str - str_orig;
}
+
uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen,
char *dst __attribute__((unused)),
--- 1.100/strings/ctype-utf8.c 2006-10-30 14:40:25 +04:00
+++ 1.101/strings/ctype-utf8.c 2006-10-30 14:40:25 +04:00
@@ -2045,6 +2045,52 @@
return MY_CS_ILSEQ;
}
+
+/*
+ The same as above, but without range check
+ for example, for a null-terminated string
+*/
+static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t * pwc, const uchar *s)
+{
+ unsigned char c;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc = c;
+ return 1;
+ }
+
+ if (c < 0xc2)
+ return MY_CS_ILSEQ;
+
+ if (c < 0xe0)
+ {
+ if (!((s[1] ^ 0x80) < 0x40))
+ return MY_CS_ILSEQ;
+
+ *pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
+ return 2;
+ }
+
+ if (c < 0xf0)
+ {
+ if (!((s[1] ^ 0x80) < 0x40 &&
+ (s[2] ^ 0x80) < 0x40 &&
+ (c >= 0xe1 || s[1] >= 0xa0)))
+ return MY_CS_ILSEQ;
+
+ *pwc= ((my_wc_t) (c & 0x0f) << 12) |
+ ((my_wc_t) (s[1] ^ 0x80) << 6) |
+ (my_wc_t) (s[2] ^ 0x80);
+
+ return 3;
+ }
+ return MY_CS_ILSEQ;
+}
+
+
static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t wc, uchar *r, uchar *e)
{
@@ -2091,6 +2137,34 @@
}
+/*
+ The same as above, but without range check.
+*/
+static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t wc, uchar *r)
+{
+ int count;
+
+ if (wc < 0x80)
+ count= 1;
+ else if (wc < 0x800)
+ count= 2;
+ else if (wc < 0x10000)
+ count= 3;
+ else
+ return MY_CS_ILUNI;
+
+ switch (count)
+ {
+ /* Fall through all cases!!! */
+ case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
+ case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
+ case 1: r[0]= (uchar) wc;
+ }
+ return count;
+}
+
+
static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen,
char *dst, uint dstlen)
{
@@ -2141,10 +2215,26 @@
}
-static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
+static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
{
- uint len= (uint) strlen(s);
- my_caseup_utf8(cs, s, len, s, len);
+ my_wc_t wc;
+ int srcres, dstres;
+ char *dst= src, *dst0= src;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(cs->caseup_multiply == 1);
+
+ while (*src &&
+ (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
+ {
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+ *dst= '\0';
+ return (uint) (dst - dst0);
}
@@ -2170,10 +2260,43 @@
return (uint) (dst - dst0);
}
-static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
+
+static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
{
- uint len= (uint) strlen(s);
- my_casedn_utf8(cs, s, len, s, len);
+ my_wc_t wc;
+ int srcres, dstres;
+ char *dst= src, *dst0= src;
+ MY_UNICASE_INFO **uni_plane= cs->caseinfo;
+ DBUG_ASSERT(cs->casedn_multiply == 1);
+
+ while (*src &&
+ (srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
+ {
+ int plane= (wc>>8) & 0xFF;
+ wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
+ break;
+ src+= srcres;
+ dst+= dstres;
+ }
+
+ /*
+ In rare cases lower string can be shorter than
+ the original string, for example:
+
+ "U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
+ (which is 0xC4B0 in utf8, i.e. two bytes)
+
+ is converted into
+
+ "U+0069 LATIN SMALL LETTER I"
+ (which is 0x69 in utf8, i.e. one byte)
+
+ So, we need to put '\0' terminator after converting.
+ */
+
+ *dst= '\0';
+ return (uint) (dst - dst0);
}
--- 1.17/mysql-test/r/lowercase_table.result 2006-10-30 14:40:25 +04:00
+++ 1.18/mysql-test/r/lowercase_table.result 2006-10-30 14:40:25 +04:00
@@ -84,3 +84,27 @@
drop table t1, t2;
show tables;
Tables_in_test
+set names utf8;
+drop table if exists İ,İİ;
+create table İ (s1 int);
+show create table İ;
+Table Create Table
+İ CREATE TABLE `i` (
+ `s1` int(11) default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+show tables;
+Tables_in_test
+i
+drop table İ;
+create table İİ (s1 int);
+show create table İİ;
+Table Create Table
+İİ CREATE TABLE `ii` (
+ `s1` int(11) default NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+show tables;
+Tables_in_test
+ii
+drop table İİ;
+set names latin1;
+End of 5.0 tests
--- 1.22/mysql-test/t/lowercase_table.test 2006-10-30 14:40:25 +04:00
+++ 1.23/mysql-test/t/lowercase_table.test 2006-10-30 14:40:25 +04:00
@@ -85,3 +85,23 @@
show tables;
# End of 4.1 tests
+
+
+#
+# Bug#20404: SHOW CREATE TABLE fails with Turkish I
+#
+set names utf8;
+--disable_warnings
+drop table if exists İ,İİ;
+--enable_warnings
+create table İ (s1 int);
+show create table İ;
+show tables;
+drop table İ;
+create table İİ (s1 int);
+show create table İİ;
+show tables;
+drop table İİ;
+set names latin1;
+
+--echo End of 5.0 tests
| Thread |
|---|
| • bk commit into 5.0 tree (bar:1.2292) BUG#20404 | bar | 30 Oct |