Below is the list of changes that have just been committed into a local
5.0 repository of uchum. When uchum does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-11-16 23:44:55+04:00, gshchepa@stripped +4 -0
Fixed bug #31677.
1. Multi-byte characters was never escaped, because they
was always skipped before escaping process.
2. 8bit escape characters, termination and enclosed characters
were silently ignored by SELECT INTO query, but LOAD DATA INFILE
algorithm is 8bit-clear, so data was corrupted during
encoding.
mysql-test/r/outfile_loaddata.result@stripped, 2007-11-16 23:44:50+04:00, gshchepa@stripped +59 -0
Added test case for bug #31677.
mysql-test/t/outfile_loaddata.test@stripped, 2007-11-16 23:44:49+04:00, gshchepa@stripped +89 -0
Added test case for bug #31677.
sql/sql_class.cc@stripped, 2007-11-16 23:44:41+04:00, gshchepa@stripped +21 -19
Fixed bug #31677.
1. Multi-byte skipping has been moved to the end of data
escaping cycle to take into account first bytes of
multi-byte characters.
2. SELECT INTO OUTFILE encoding was not 8bit clear, it
has been fixed for a symmetry with the LOAD DATA INFILE
decoding algorithm.
sql/sql_load.cc@stripped, 2007-11-16 23:44:47+04:00, gshchepa@stripped +27 -26
Fixed bug #31677.
Multi-byte skipping has been moved to the end of decoding
cycle to un-escape first bytes of multi-byte characters.
diff -Nrup a/mysql-test/r/outfile_loaddata.result b/mysql-test/r/outfile_loaddata.result
--- a/mysql-test/r/outfile_loaddata.result 2007-10-23 16:15:18 +05:00
+++ b/mysql-test/r/outfile_loaddata.result 2007-11-16 23:44:50 +04:00
@@ -82,4 +82,63 @@ c1 c2
-r- =raker=
DROP TABLE t2;
DROP TABLE t1;
+#
+# Bug#31677: SELECT INTO OUTFILE never escapes multibyte character
+#
+SET NAMES utf8;
+SET character_set_database= utf8;
+CREATE TABLE t1 (c1 VARCHAR(256), c2 VARCHAR(256)) CHARACTER SET utf8;
+INSERT INTO t1 VALUES ('Á', 'Á');
+SELECT * FROM t1;
+c1 c2
+Á Á
+# No FIELDS ENCLOSED/TERMINATED:
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'));
+HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'))
+C38109C3810A
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 ;
+SELECT * FROM t1;
+c1 c2
+Á Á
+# FIELDS ENCLOSED BY 0xC3
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FIELDS ENCLOSED BY 0xC3 FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'));
+HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'))
+C35CC381C309C35CC381C30A
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3;
+SELECT * FROM t1;
+c1 c2
+Á Á
+# FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81:
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81 FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'));
+HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'))
+C35CC381C381C35CC381C30A
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81;
+SELECT * FROM t1;
+c1 c2
+Á Á
+DROP TABLE t1;
+SET character_set_database= default;
+SET NAMES default;
+# Single-byte 8bit chars enclosed by 8bit char (FIELDS ENCLOSED BY 0xC3):
+CREATE TABLE t1 (c1 VARCHAR(256));
+INSERT INTO t1 VALUES (0xC3);
+SELECT HEX(c1) FROM t1;
+HEX(c1)
+C3
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FIELDS ENCLOSED BY 0xC3 FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('file'));
+HEX(LOAD_FILE('file'))
+NULL
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3;
+SELECT HEX(c1) FROM t1;
+HEX(c1)
+C3
+DROP TABLE t1;
# End of 5.0 tests.
diff -Nrup a/mysql-test/t/outfile_loaddata.test b/mysql-test/t/outfile_loaddata.test
--- a/mysql-test/t/outfile_loaddata.test 2007-10-23 16:15:18 +05:00
+++ b/mysql-test/t/outfile_loaddata.test 2007-11-16 23:44:49 +04:00
@@ -86,4 +86,93 @@ DROP TABLE t2;
DROP TABLE t1;
+--echo #
+--echo # Bug#31677: SELECT INTO OUTFILE never escapes multibyte character
+--echo #
+
+SET NAMES utf8;
+SET character_set_database= utf8;
+
+CREATE TABLE t1 (c1 VARCHAR(256), c2 VARCHAR(256)) CHARACTER SET utf8;
+INSERT INTO t1 VALUES ('Á', 'Á');
+SELECT * FROM t1;
+
+--let $file=$MYSQLTEST_VARDIR/tmp/bug31677.txt
+
+--let $clauses=
+--echo # No FIELDS ENCLOSED/TERMINATED:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('$file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT * FROM t1;
+
+--remove_file $file
+
+
+--let $clauses=FIELDS ENCLOSED BY 0xC3
+--echo # $clauses
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('$file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT * FROM t1;
+
+--remove_file $file
+
+
+--let $clauses=FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81
+--echo # $clauses:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('$file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT * FROM t1;
+
+--remove_file $file
+
+DROP TABLE t1;
+SET character_set_database= default;
+SET NAMES default;
+
+
+--let $clauses=FIELDS ENCLOSED BY 0xC3
+--echo # Single-byte 8bit chars enclosed by 8bit char ($clauses):
+
+CREATE TABLE t1 (c1 VARCHAR(256));
+INSERT INTO t1 VALUES (0xC3);
+SELECT HEX(c1) FROM t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT HEX(c1) FROM t1;
+
+--remove_file $file
+DROP TABLE t1;
+
--echo # End of 5.0 tests.
diff -Nrup a/sql/sql_class.cc b/sql/sql_class.cc
--- a/sql/sql_class.cc 2007-11-10 23:44:15 +04:00
+++ b/sql/sql_class.cc 2007-11-16 23:44:41 +04:00
@@ -1219,16 +1219,18 @@ select_export::prepare(List<Item> &list,
}
}
field_term_length=exchange->field_term->length();
- field_term_char= field_term_length ? (*exchange->field_term)[0] : INT_MAX;
+ field_term_char= field_term_length ?
+ (int) (uchar) (*exchange->field_term)[0] : INT_MAX;
if (!exchange->line_term->length())
exchange->line_term=exchange->field_term; // Use this if it exists
- field_sep_char= (exchange->enclosed->length() ? (*exchange->enclosed)[0] :
- field_term_char);
- escape_char= (exchange->escaped->length() ? (*exchange->escaped)[0] : -1);
+ field_sep_char= (exchange->enclosed->length() ?
+ (int) (uchar) (*exchange->enclosed)[0] : field_term_char);
+ escape_char= (exchange->escaped->length() ?
+ (int) (uchar) (*exchange->escaped)[0] : -1);
is_ambiguous_field_sep= test(strchr(ESCAPE_CHARS, field_sep_char));
is_unsafe_field_sep= test(strchr(NUMERIC_CHARS, field_sep_char));
line_sep_char= (exchange->line_term->length() ?
- (*exchange->line_term)[0] : INT_MAX);
+ (int) (uchar) (*exchange->line_term)[0] : INT_MAX);
if (!field_term_length)
exchange->opt_enclosed=0;
if (!exchange->enclosed->length())
@@ -1332,18 +1334,6 @@ bool select_export::send_data(List<Item>
pos != end ;
pos++)
{
-#ifdef USE_MB
- if (use_mb(res_charset))
- {
- int l;
- if ((l=my_ismbchar(res_charset, pos, end)))
- {
- pos += l-1;
- continue;
- }
- }
-#endif
-
/*
Special case when dumping BINARY/VARBINARY/BLOB values
for the clients with character sets big5, cp932, gbk and sjis,
@@ -1385,10 +1375,11 @@ bool select_export::send_data(List<Item>
Don't escape field_term_char by doubling - doubling is only
valid for ENCLOSED BY characters:
*/
- (enclosed || !is_ambiguous_field_term || *pos != field_term_char))
+ (enclosed || !is_ambiguous_field_term ||
+ (int) (uchar) *pos != field_term_char))
{
char tmp_buff[2];
- tmp_buff[0]= ((int) *pos == field_sep_char &&
+ tmp_buff[0]= ((int) (uchar) *pos == field_sep_char &&
is_ambiguous_field_sep) ?
field_sep_char : escape_char;
tmp_buff[1]= *pos ? *pos : '0';
@@ -1397,6 +1388,17 @@ bool select_export::send_data(List<Item>
goto err;
start=pos+1;
}
+#ifdef USE_MB
+ else if (use_mb(res_charset))
+ {
+ int l;
+ if ((l=my_ismbchar(res_charset, pos, end)))
+ {
+ pos += l-1;
+ continue;
+ }
+ }
+#endif
}
if (my_b_write(&cache,(byte*) start,(uint) (pos-start)))
goto err;
diff -Nrup a/sql/sql_load.cc b/sql/sql_load.cc
--- a/sql/sql_load.cc 2007-07-30 20:27:30 +05:00
+++ b/sql/sql_load.cc 2007-11-16 23:44:47 +04:00
@@ -1016,31 +1016,7 @@ int READ_INFO::read_field()
{
while ( to < end_of_buff)
{
- chr = GET;
-#ifdef USE_MB
- if ((my_mbcharlen(read_charset, chr) > 1) &&
- to+my_mbcharlen(read_charset, chr) <= end_of_buff)
- {
- uchar* p = (uchar*)to;
- *to++ = chr;
- int ml = my_mbcharlen(read_charset, chr);
- int i;
- for (i=1; i<ml; i++) {
- chr = GET;
- if (chr == my_b_EOF)
- goto found_eof;
- *to++ = chr;
- }
- if (my_ismbchar(read_charset,
- (const char *)p,
- (const char *)to))
- continue;
- for (i=0; i<ml; i++)
- PUSH((uchar) *--to);
- chr = GET;
- }
-#endif
- if (chr == my_b_EOF)
+ if ((chr= GET) == my_b_EOF)
goto found_eof;
if (chr == escape_char)
{
@@ -1123,7 +1099,32 @@ int READ_INFO::read_field()
return 0;
}
}
- *to++ = (byte) chr;
+#ifdef USE_MB
+ if ((my_mbcharlen(read_charset, chr) > 1) &&
+ to+my_mbcharlen(read_charset, chr) <= end_of_buff)
+ {
+ uchar* p = (uchar*)to;
+ *to++ = chr;
+ int ml = my_mbcharlen(read_charset, chr);
+ int i;
+ for (i=1; i<ml; i++) {
+ chr = GET;
+ if (chr == my_b_EOF)
+ goto found_eof;
+ *to++ = chr;
+ }
+ if (my_ismbchar(read_charset,
+ (const char *)p,
+ (const char *)to))
+ continue;
+ for (i=0; i<ml; i++)
+ PUSH((uchar) *--to);
+ }
+ else
+#endif
+ {
+ *to++ = (byte) chr;
+ }
}
/*
** We come here if buffer is too small. Enlarge it and continue