Below is the list of changes that have just been committed into a local
5.0 repository of gshchepa. When gshchepa does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-02-05 06:37:04+04:00, gshchepa@stripped +10 -0
Fixed bugs #30946 and #31677.
Bug#30946: mysqldump silently ignores --default-character-set
when used with --tab.
Added optional CHARACTER SET clause to the SELECT ... INTO OUTFILE
(to complement the same clause in LOAD DATA INFILE).
mysqldump is updated to use it in --tab mode.
Added complete support for multibyte delimiter characters,
support for UCS2 harsets and hexadecimal representation of
malformed characters been added.
Fixed bug with 8-bit separators of LOAD DATA INFILE statement.
New error message has been provided: "Malformed ... delimiter value",
where "..." is one of "FIELDS TERMINATED BY", "FIELDS ENCLOSED BY",
"FIELDS ESCAPED BY", "LINES TERMINATED BY" or "LINES STARTING BY",
to report errors on bad SELECT ... INTO OUTFILE separator values.
client/mysqldump.c@stripped, 2008-02-05 06:12:01+04:00, gshchepa@stripped +6 -0
Fixed bug #30946.
The dump_table function has been modified to take the
--default-character-set switch into account when dumpind a table
into into external file.
For a backward compatibility the binary character set is used
by default.
mysql-test/r/ctype_big5.result@stripped, 2008-02-05 06:12:15+04:00, gshchepa@stripped +6 -3
Updated test case for bug#30946.
mysql-test/t/ctype_big5.test@stripped, 2008-02-05 06:12:19+04:00, gshchepa@stripped +3 -2
Updated test case for bug#30946.
mysql-test/t/mysqldump.test@stripped, 2008-02-05 06:12:22+04:00, gshchepa@stripped +102 -0
Added test case for bug#30946.
sql/share/errmsg.txt@stripped, 2008-02-05 06:12:31+04:00, gshchepa@stripped +2 -0
Fixed bug #30946.
New error message has been provided: "Malformed ... delimiter value",
where "..." is one of "FIELDS TERMINATED BY", "FIELDS ENCLOSED BY",
"FIELDS ESCAPED BY", "LINES TERMINATED BY" or "LINES STARTING BY",
to report errors on bad SELECT ... INTO OUTFILE separator values.
sql/sql_class.cc@stripped, 2008-02-05 06:12:24+04:00, gshchepa@stripped +394 -115
Fixed bugs #30946 and #31677.
The select_export::send_data function has been modified to support
new 'SELECT ... INFO OUTFILE ... CHARACTER SET' syntax.
Complete support for multibyte delimiter characters, support for
UCS2 harsets and hexadecimal encoding of malformed characters have
been added.
sql/sql_class.h@stripped, 2008-02-05 06:12:26+04:00, gshchepa@stripped +12 -3
Fixed bug #30946.
For compliance with multibyte character processing singlebyte
(int type) delimiter fields of select_export class have been
replaced with String type fields.
sql/sql_load.cc@stripped, 2008-02-05 06:12:27+04:00, gshchepa@stripped +321 -161
Fixed bug #30946.
Fixed bug with 8-bit separators.
Complete support for multibyte delimiter characters, support for
UCS2 harsets and decoding of hexadecimal sequences have been added.
sql/sql_string.h@stripped, 2008-02-05 06:12:28+04:00, gshchepa@stripped +6 -1
Fixed bug #30946.
String::is_empty() method has been marked as 'const',
stringcmp() function has been made globaly visible.
sql/sql_yacc.yy@stripped, 2008-02-05 06:12:29+04:00, gshchepa@stripped +2 -0
Fixed bug #30946.
New syntax has been provided:
Added optional CHARACTER SET clause to the SELECT ... INTO OUTFILE
(to complement the same clause in LOAD DATA INFILE).
By default this value is set to the binary character set.
diff -Nrup a/client/mysqldump.c b/client/mysqldump.c
--- a/client/mysqldump.c 2007-12-05 07:07:00 +04:00
+++ b/client/mysqldump.c 2008-02-05 06:12:01 +04:00
@@ -2399,6 +2399,12 @@ static void dump_table(char *table, char
dynstr_append_checked(&query_string, filename);
dynstr_append_checked(&query_string, "'");
+ dynstr_append_checked(&query_string, " /*!50054 CHARACTER SET ");
+ dynstr_append_checked(&query_string, default_charset ==
mysql_universal_client_charset ?
+ my_charset_bin.name : /* backward compatibility
*/
+ default_charset);
+ dynstr_append_checked(&query_string, " */");
+
if (fields_terminated || enclosed || opt_enclosed || escaped)
dynstr_append_checked(&query_string, " FIELDS");
diff -Nrup a/mysql-test/r/ctype_big5.result b/mysql-test/r/ctype_big5.result
--- a/mysql-test/r/ctype_big5.result 2007-11-28 10:55:11 +04:00
+++ b/mysql-test/r/ctype_big5.result 2008-02-05 06:12:15 +04:00
@@ -210,14 +210,17 @@ select hex(convert(_big5 0xC84041 using
hex(convert(_big5 0xC84041 using ucs2))
003F0041
End of 4.1 tests
-set names big5;
create table t1 (a blob);
insert into t1 values (0xEE00);
-select * into outfile 'test/t1.txt' from t1;
+select * into outfile 'test/t1.txt' character set big5 from t1;
delete from t1;
select hex(load_file('MYSQLTEST_VARDIR/master-data/test/t1.txt'));;
hex(load_file('MYSQLTEST_VARDIR/master-data/test/t1.txt'))
-5CEE5C300A
+5C7845455C300A
+select
load_file('/home/uchum/work/5.0-opt-select/mysql-test/var/master-data/test/t1.txt');
+load_file('/home/uchum/work/5.0-opt-select/mysql-test/var/master-data/test/t1.txt')
+\xEE\0
+
load data infile 't1.txt' into table t1;
select hex(a) from t1;
hex(a)
diff -Nrup a/mysql-test/t/ctype_big5.test b/mysql-test/t/ctype_big5.test
--- a/mysql-test/t/ctype_big5.test 2007-07-04 14:21:28 +05:00
+++ b/mysql-test/t/ctype_big5.test 2008-02-05 06:12:19 +04:00
@@ -68,13 +68,14 @@ select hex(convert(_big5 0xC84041 using
#
# Bug#26711 "binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load"
#
-set names big5;
+#set names big5;
create table t1 (a blob);
insert into t1 values (0xEE00);
-select * into outfile 'test/t1.txt' from t1;
+select * into outfile 'test/t1.txt' character set big5 from t1;
delete from t1;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
--eval select hex(load_file('$MYSQLTEST_VARDIR/master-data/test/t1.txt'));
+--eval select load_file('$MYSQLTEST_VARDIR/master-data/test/t1.txt')
load data infile 't1.txt' into table t1;
select hex(a) from t1;
--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.txt
diff -Nrup a/mysql-test/t/mysqldump.test b/mysql-test/t/mysqldump.test
--- a/mysql-test/t/mysqldump.test 2007-10-03 11:36:18 +05:00
+++ b/mysql-test/t/mysqldump.test 2008-02-05 06:12:22 +04:00
@@ -1447,8 +1447,11 @@ INSERT INTO t1 VALUES (1), (2);
--exec $MYSQL_DUMP
--tab=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa test 2>&1
--exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/
--fields-terminated-by=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
test
+--error 2
--exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/
--fields-enclosed-by=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
test
+--error 2
--exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/
--fields-optionally-enclosed-by=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
test
+--error 2
--exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/
--fields-escaped-by=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
test
--exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/
--lines-terminated-by=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
test
@@ -1593,6 +1596,105 @@ DROP TABLE t1,t2;
--replace_regex /-- [^D][^u][^m][^p].*// /\/\*!.*// / on [0-9 :-]+/ on DATE/
--exec $MYSQL_DUMP test
+--echo #
+--echo # Bug #30946: mysqldump silently ignores --default-character-set
+--echo # when used with --tab
+--echo #
+
+SET NAMES utf8;
+CREATE TABLE t1 (a CHAR(10) CHARSET koi8r, b CHAR(10) CHARSET latin1);
+CREATE TABLE t2 LIKE t1;
+INSERT INTO t1 VALUES ('ABC-АБВ', 'DEF-ÂÃÄ'), (NULL, NULL);
+
+--let $file=$MYSQLTEST_VARDIR/tmp/t1.txt
+
+--echo # Default (binary) charset:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' FROM t1
+--echo ##################################################
+--cat_file $file
+--echo ##################################################
+TRUNCATE t2;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t2 CHARACTER SET binary
+--remove_file $file
+SELECT * FROM t2;
+
+--echo # UTF-8 charset:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' CHARACTER SET utf8 FROM t1
+--echo ##################################################
+--cat_file $file
+--echo ##################################################
+TRUNCATE t2;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t2 CHARACTER SET utf8
+--remove_file $file
+SELECT * FROM t2;
+
+--echo #
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+eval
+SELECT * INTO OUTFILE '$file'
+ CHARACTER SET utf8
+ FIELDS TERMINATED BY 'межа' ENCLOSED BY 'ъ' LINES TERMINATED BY 'строка'
+ FROM t1;
+--echo ##################################################
+--cat_file $file
+--echo
+--echo ##################################################
+TRUNCATE t2;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+eval
+LOAD DATA INFILE '$file' INTO TABLE t2
+ CHARACTER SET utf8
+ FIELDS TERMINATED BY 'межа' ENCLOSED BY 'ъ' LINES TERMINATED BY 'строка';
+--remove_file $file
+SELECT * FROM t2;
+
+--echo # UCS2 charset:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' CHARACTER SET ucs2 FROM t1
+--echo ##################################################
+--cat_file $file
+--echo ##################################################
+TRUNCATE t2;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t2 CHARACTER SET ucs2
+--remove_file $file
+SELECT * FROM t2;
+
+--echo # mysqldump, default '--default-charset' (binary):
+
+--exec $MYSQL_DUMP --character-sets-dir=$CHARSETSDIR --tab=$MYSQLTEST_VARDIR/tmp/ test t1
+--echo ##################################################
+--cat_file $file
+--echo ##################################################
+TRUNCATE t2;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t2 CHARACTER SET binary
+--remove_file $file
+SELECT * FROM t2;
+
+--echo # mysqldump --default-charset=utf8
+
+--exec $MYSQL_DUMP --character-sets-dir=$CHARSETSDIR --default-character-set=utf8
--tab=$MYSQLTEST_VARDIR/tmp/ test t1
+--echo ##################################################
+--cat_file $file
+--echo ##################################################
+TRUNCATE t2;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t2 CHARACTER SET utf8
+--remove_file $file
+SELECT * FROM t2;
+
+SET NAMES default;
+
+DROP TABLE t1, t2;
--echo #
--echo # End of 5.0 tests
--echo #
diff -Nrup a/sql/share/errmsg.txt b/sql/share/errmsg.txt
--- a/sql/share/errmsg.txt 2007-10-23 16:15:27 +05:00
+++ b/sql/share/errmsg.txt 2008-02-05 06:12:31 +04:00
@@ -5641,3 +5641,5 @@ ER_NAME_BECOMES_EMPTY
eng "Name '%-.64s' has become ''"
ER_AMBIGUOUS_FIELD_TERM
eng "First character of the FIELDS TERMINATED string is ambiguous; please use
non-optional and non-empty FIELDS ENCLOSED BY"
+ER_MALFORMED_DELIMITER
+ eng "Malformed %-.64s delimiter value"
diff -Nrup a/sql/sql_class.cc b/sql/sql_class.cc
--- a/sql/sql_class.cc 2007-12-15 15:04:58 +04:00
+++ b/sql/sql_class.cc 2008-02-05 06:12:24 +04:00
@@ -1123,6 +1123,8 @@ select_to_file::~select_to_file()
select_export::~select_export()
{
thd->sent_row_count=row_count;
+ if (cvt_buff)
+ my_free(cvt_buff, MYF(0));
}
@@ -1191,6 +1193,227 @@ static File create_file(THD *thd, char *
}
+/*
+ Copy first multibyte character
+
+ SYNOPSIS
+ set_mb_char()
+ to Destination string.
+ from Source string.
+ single TRUE if source string should not contain
+ multiple characters.
+
+ RETURN VALUE
+ 0 Success
+ 1 Source string is malformed or contains more than one
+ multibyte character (see 'single' parameter).
+*/
+
+static bool
+set_mb_char(String *to, const String &from, bool single= FALSE)
+{
+ int len= to->charset()->mbmaxlen;
+ const char *dummy_error1, *dummy_error2, *from_end_pos;
+ if (from.is_empty())
+ {
+ to->length(0);
+ return FALSE;
+ }
+ if (to->alloc(len))
+ return TRUE;
+ len= well_formed_copy_nchars(to->charset(), (char *) to->ptr(), len,
+ from.charset(), from.ptr(), from.length(), 1,
+ &dummy_error1, &dummy_error2, &from_end_pos);
+ if (len == 0)
+ return TRUE;
+ if (single && from_end_pos != from.ptr() + from.length())
+ return TRUE;
+ to->length(len);
+ return FALSE;
+}
+
+
+/*
+ Look ahead for a given prefix string
+
+ SYNOPSIS
+ prefix()
+ ptr Buffer to compare with prefix string.
+ end Next byte after the buffer string.
+ pfx Prefix string.
+
+ RETURN
+ TRUE on success.
+*/
+
+inline bool
+prefix(const char *ptr, const char *end, const String &pfx)
+{
+ if (pfx.is_empty() || (int) pfx.length() > end - ptr)
+ return FALSE;
+ return !memcmp(ptr, pfx.ptr(), pfx.length());
+}
+
+
+enum cvt_result
+{
+ CVT_OK = 0, // successfull conversion
+ CVT_MB_WC = 1, // malformed multibyte char in the input stream (for sure)
+ CVT_WC_MB = 2 // can't convert current character to destination charset
+};
+
+
+/*
+ Precise conversion from one charset to another
+
+ SYNOPSIS
+ cvt()
+ to Start of the output buffer.
+ to_end [IN/OUT] End of output buffer, may be adjusted by this function.
+ to_cs Output charset.
+ from Start of the input buffer.
+ from_end [IN/OUT] End of input buffer, may be adjusted by this function.
+ from_cs Input charset.
+
+ RETURN VALUE
+ CVT_OK Success.
+ CVT_MB_WC Malformed multibyte char in the input stream (for sure).
+ CVT_WC_MB Can't convert current character to destination charset.
+*/
+
+static cvt_result
+cvt(char *to, char **to_end, CHARSET_INFO *to_cs,
+ const char *from, const char **from_end, CHARSET_INFO *from_cs)
+{
+ cvt_result ret= CVT_OK;
+ int cnvres;
+ my_wc_t wc;
+
+ if (my_charset_same(to_cs, &my_charset_bin))
+ {
+ int len= min(*to_end - to, *from_end - from);
+ memcpy(to, from, len);
+ *to_end= to + len;
+ *from_end= from + len;
+ return CVT_OK;
+ }
+ if (my_charset_same(from_cs, &my_charset_bin))
+ from_cs= to_cs;
+ while (from < *from_end)
+ {
+ if ((cnvres= (*from_cs->cset->mb_wc)(from_cs, &wc, (uchar *) from,
+ (uchar *) *from_end)) > 0)
+ from+= cnvres;
+ else
+ {
+ ret= CVT_MB_WC; // MY_CS_ILSEQ, MY_CS_TOOSMALL
+ break;
+ }
+ if ((cnvres= (*to_cs->cset->wc_mb)(to_cs, wc, (uchar *) to,
+ (uchar *) *to_end)) > 0)
+ to+= cnvres;
+ else if (cnvres == MY_CS_TOOSMALL)
+ {
+ ret= CVT_OK;
+ break;
+ }
+ else
+ {
+ ret= CVT_WC_MB; // MY_CS_ILUNI
+ break;
+ }
+ }
+ *from_end= from;
+ *to_end= to;
+ return ret;
+}
+
+
+/*
+ Write C-style hexadecimal representation of single byte value
+
+ SYNOPSIS
+ write_hex()
+ cache Output stream.
+ cs Output charset (for UCS2).
+ c Source value.
+
+ RETURN VALUE
+ TRUE I/O error.
+ FALSE Success.
+*/
+
+static bool
+write_hex(IO_CACHE *cache, CHARSET_INFO *cs, char c)
+{
+ uint hex_len= sizeof("\\xXX") - 1;
+ char buff[(hex_len + 1) * 2]; // *2 for UCS2
+ snprintf(buff, sizeof(buff), "\\x%02X", (uchar) c);
+ if (cs->mbminlen == 2) // UCS2
+ {
+ for (uint i= hex_len; i--; )
+ {
+ buff[2 * i + 1]= buff[i];
+ buff[2 * i]= 0;
+ }
+ }
+ return my_b_write(cache, buff, hex_len * cs->mbminlen);
+}
+
+
+/*
+ TODO: Encode byte as \uUUUU or \u+UUUUUU and write to output stream
+
+ SYNOPSIS
+ write_unicode()
+ cache Output stream.
+ cs Output charset (for UCS2).
+ c Source value.
+
+ RETURN VALUE
+ TRUE I/O error.
+ FALSE Success.
+*/
+
+static bool
+write_unicode(IO_CACHE *cache, CHARSET_INFO *cs, char c)
+{
+ return cs->mbminlen == 2 ? my_b_write(cache, "?", 1)
+ : my_b_write(cache, "\0?", 2);
+}
+
+
+static bool
+write_str(IO_CACHE *cache, const String &s)
+{
+ return my_b_write(cache, (byte*) s.ptr(), s.length());
+}
+
+
+/*
+ Check string for ambiguous characters
+
+ SYNOPSIS
+ is_ambiguous()
+ ascii Set of probably ambiguous characters.
+ delimiter Delimiter string.
+
+ RETURN VALUE
+ TRUE if delimiter string doesn't contain ambiguous characters.
+*/
+
+static bool
+is_ambiguous(const char *ascii, const String &delimiter)
+{
+ if (delimiter.is_empty())
+ return FALSE;
+ if (delimiter.length() == 1)
+ return test(strchr(ascii, delimiter[0]));
+ DBUG_ASSERT(delimiter.length() == 2); // UCS2
+ return delimiter[0] ? FALSE : test(strchr(ascii, delimiter[1]));
+}
+
+
int
select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
{
@@ -1200,38 +1423,90 @@ select_export::prepare(List<Item> &list,
if ((uint) strlen(exchange->file_name) + NAME_LEN >= FN_REFLEN)
strmake(path,exchange->file_name,FN_REFLEN-1);
+ write_cs= exchange->cs ? exchange->cs : write_cs;
+
if ((file= create_file(thd, path, exchange, &cache)) < 0)
return 1;
- /* Check if there is any blobs in data */
{
+ uint max_chars= 0;
List_iterator_fast<Item> li(list);
Item *item;
while ((item=li++))
{
if (item->max_length >= MAX_BLOB_WIDTH)
- {
blob_flag=1;
- break;
- }
if (item->result_type() == STRING_RESULT)
string_results= TRUE;
else
non_string_results= TRUE;
+ set_if_bigger(max_chars,
+ item->max_length / item->collation.collation->mbmaxlen);
}
+ if (!(cvt_buff= my_malloc(IO_SIZE, MYF(MY_WME))))
+ return 1;
+ cvt_buff_end= cvt_buff + IO_SIZE;
+ }
+
+ enclosed_chr.set_charset(write_cs);
+ field_sep_chr.set_charset(write_cs);
+ escape_chr.set_charset(write_cs);
+ line_sep_chr.set_charset(write_cs);
+ line_start_str.set_charset(write_cs);
+ line_term_str.set_charset(write_cs);
+ field_term_chr.set_charset(write_cs);
+ field_term_str.set_charset(write_cs);
+
+ null_str.set_charset(write_cs);
+ zero_str.set_charset(write_cs);
+
+ const char *what= NULL;
+
+ if (set_mb_char(&field_term_chr, *exchange->field_term) ||
+ field_term_str.append_with_convert(*exchange->field_term))
+ what= "FIELDS TERMINATED BY";
+
+ if (set_mb_char(&enclosed_chr, *exchange->enclosed, TRUE))
+ what= "FIELDS ENCLOSED BY";
+
+ if (set_mb_char(&escape_chr, *exchange->escaped, TRUE))
+ what= "FIELDS ESCAPED BY";
+
+ if (set_mb_char(&line_sep_chr, *exchange->line_term) ||
+ line_term_str.append_with_convert(*exchange->line_term))
+ what= "LINES TERMINATED BY";
+
+ if (line_start_str.append_with_convert(*exchange->line_start))
+ what= "LINES STARTING BY";
+
+ if (what)
+ {
+ my_error(ER_MALFORMED_DELIMITER, MYF(0), what);
+ return 1;
+ }
+
+ max_prefix_len= max(max(field_term_chr.length(), field_sep_chr.length()),
+ max(escape_chr.length(), line_sep_chr.length()));
+
+ if (escape_chr.length()) // Use \N syntax
+ {
+ if (null_str.append(escape_chr) ||
+ null_str.append(STRING_WITH_LEN("N"), &my_charset_latin1))
+ return 1;
+ }
+ else
+ {
+ if (null_str.append(STRING_WITH_LEN("NULL"), &my_charset_latin1))
+ return 1;
}
+ if (zero_str.append(STRING_WITH_LEN("0"), &my_charset_latin1))
+ return 1;
+
field_term_length=exchange->field_term->length();
- field_term_char= field_term_length ?
- (int) (uchar) (*exchange->field_term)[0] : INT_MAX;
- if (!exchange->line_term->length())
- exchange->line_term=exchange->field_term; // Use this if it exists
- field_sep_char= (exchange->enclosed->length() ?
- (int) (uchar) (*exchange->enclosed)[0] : field_term_char);
- escape_char= (exchange->escaped->length() ?
- (int) (uchar) (*exchange->escaped)[0] : -1);
- is_ambiguous_field_sep= test(strchr(ESCAPE_CHARS, field_sep_char));
- is_unsafe_field_sep= test(strchr(NUMERIC_CHARS, field_sep_char));
- line_sep_char= (exchange->line_term->length() ?
- (int) (uchar) (*exchange->line_term)[0] : INT_MAX);
+ if (line_term_str.is_empty())
+ line_term_str= field_term_str; // Use this if it exists
+ field_sep_chr= enclosed_chr.length() ? enclosed_chr : field_term_chr;
+ is_ambiguous_field_sep= is_ambiguous(ESCAPE_CHARS, field_sep_chr);
+ is_unsafe_field_sep= is_ambiguous(NUMERIC_CHARS, field_sep_chr);
if (!field_term_length)
exchange->opt_enclosed=0;
if (!exchange->enclosed->length())
@@ -1241,7 +1516,7 @@ select_export::prepare(List<Item> &list,
if ((is_ambiguous_field_sep && exchange->enclosed->is_empty() &&
(string_results || is_unsafe_field_sep)) ||
(exchange->opt_enclosed && non_string_results &&
- field_term_length && strchr(NUMERIC_CHARS, field_term_char)))
+ field_term_length && is_ambiguous(NUMERIC_CHARS, field_term_chr)))
{
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_AMBIGUOUS_FIELD_TERM, ER(ER_AMBIGUOUS_FIELD_TERM));
@@ -1254,17 +1529,23 @@ select_export::prepare(List<Item> &list,
}
-#define NEED_ESCAPING(x) ((int) (uchar) (x) == escape_char || \
- (enclosed ? (int) (uchar) (x) == field_sep_char \
- : (int) (uchar) (x) == field_term_char) || \
- (int) (uchar) (x) == line_sep_char || \
- !(x))
+/* TRUE if the first character in [x..e) is equal to \0 (or 0x0000 for UCS2) */
+#define is_zero(x, e) ((*(x) == 0 && write_cs->mbminlen == 1) || \
+ ((e) - (x) > 1 && (x)[1] == 0))
+
+
+#define NEED_ESCAPING(x, e) (prefix(x, e, escape_chr) || \
+ (enclosed ? prefix(x, e, field_sep_chr) \
+ : prefix(x, e, field_term_chr)) || \
+ prefix(x, e, line_sep_chr) || \
+ is_zero(x, e))
+
bool select_export::send_data(List<Item> &items)
{
DBUG_ENTER("select_export::send_data");
- char buff[MAX_FIELD_WIDTH],null_buff[2],space[MAX_FIELD_WIDTH];
+ char buff[MAX_FIELD_WIDTH],space[MAX_FIELD_WIDTH];
bool space_inited=0;
String tmp(buff,sizeof(buff),&my_charset_bin),*res;
tmp.length(0);
@@ -1279,8 +1560,7 @@ bool select_export::send_data(List<Item>
uint used_length=0,items_left=items.elements;
List_iterator_fast<Item> li(items);
- if (my_b_write(&cache,(byte*) exchange->line_start->ptr(),
- exchange->line_start->length()))
+ if (write_str(&cache, line_start_str))
goto err;
while ((item=li++))
{
@@ -1290,23 +1570,15 @@ bool select_export::send_data(List<Item>
res=item->str_result(&tmp);
if (res && enclosed)
{
- if (my_b_write(&cache,(byte*) exchange->enclosed->ptr(),
- exchange->enclosed->length()))
+ if (write_str(&cache, enclosed_chr))
goto err;
}
if (!res)
{ // NULL
if (!fixed_row_size)
{
- if (escape_char != -1) // Use \N syntax
- {
- null_buff[0]=escape_char;
- null_buff[1]='N';
- if (my_b_write(&cache,(byte*) null_buff,2))
- goto err;
- }
- else if (my_b_write(&cache,(byte*) "NULL",4))
- goto err;
+ if (write_str(&cache, null_str))
+ goto err;
}
else
{
@@ -1320,90 +1592,100 @@ bool select_export::send_data(List<Item>
else
used_length=res->length();
if ((result_type == STRING_RESULT || is_unsafe_field_sep) &&
- escape_char != -1)
+ escape_chr.length())
{
- char *pos, *start, *end;
+ char *pos= cvt_buff, *start= pos, *end = cvt_buff_end;
CHARSET_INFO *res_charset= res->charset();
- CHARSET_INFO *character_set_client= thd->variables.
- character_set_client;
- bool check_second_byte= (res_charset == &my_charset_bin) &&
- character_set_client->
- escape_with_backslash_is_dangerous;
- DBUG_ASSERT(character_set_client->mbmaxlen == 2 ||
- !character_set_client->escape_with_backslash_is_dangerous);
- for (start=pos=(char*) res->ptr(),end=pos+used_length ;
- pos != end ;
- pos++)
- {
-#ifdef USE_MB
- if (use_mb(res_charset))
- {
- int l;
- if ((l=my_ismbchar(res_charset, pos, end)))
- {
- pos += l-1;
- continue;
- }
- }
-#endif
+ const char *from_pos= res->ptr();
+ const char *from_end= from_pos + used_length;
+ const char *tmp_from= from_end;
+ cvt_result cvt_res= cvt(pos, &end, write_cs,
+ from_pos, &tmp_from, res_charset);
+ from_pos= tmp_from;
+ for (;;)
+ {
+ if (pos == end)
+ { // fill cvt_buff with a new portion of converted characters
+ if (my_b_write(&cache, (byte*) start, pos - start))
+ goto err;
+ if (from_pos == from_end)
+ {
+ DBUG_ASSERT(cvt_res == CVT_OK);
+ break;
+ }
+ switch (cvt_res) {
+ /*
+ If previos conversion from field charset to write_cs was
+ interrupted because of conversion error, write malformed
+ input with hexadecimal/unicode text representation to the
+ output stream.
+ */
+ case CVT_MB_WC:
+ if (write_hex(&cache, write_cs, *from_pos++))
+ goto err;
+ cvt_res= CVT_OK;
+ continue;
+ case CVT_WC_MB:
+ if (write_unicode(&cache, write_cs, *from_pos++))
+ goto err;
+ cvt_res= CVT_OK;
+ continue;
+ default:;
+ }
+ start= pos= cvt_buff;
+ end= cvt_buff_end;
+ tmp_from= from_end;
+ cvt_res= cvt(pos, &end, write_cs, from_pos, &tmp_from, res_charset);
+ from_pos= tmp_from;
+ }
+ else if (cvt_res == CVT_OK &&
+ end - pos < max_prefix_len && from_pos != from_end)
+ { // add some converted chars to cvt_buff for prefix look-ahead
+ if (my_b_write(&cache, (byte*) start, pos - start))
+ goto err;
+ uint tail= end - pos;
+ memcpy(cvt_buff, pos, tail);
+ start= pos= cvt_buff;
+ end= cvt_buff_end;
+ tmp_from= from_end;
+ cvt_res= cvt(cvt_buff + tail, &end, write_cs,
+ from_pos, &tmp_from, res_charset);
+ from_pos= tmp_from;
+ }
- /*
- Special case when dumping BINARY/VARBINARY/BLOB values
- for the clients with character sets big5, cp932, gbk and sjis,
- which can have the escape character (0x5C "\" by default)
- as the second byte of a multi-byte sequence.
-
- If
- - pos[0] is a valid multi-byte head (e.g 0xEE) and
- - pos[1] is 0x00, which will be escaped as "\0",
-
- then we'll get "0xEE + 0x5C + 0x30" in the output file.
-
- If this file is later loaded using this sequence of commands:
-
- mysql> create table t1 (a varchar(128)) character set big5;
- mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1;
-
- then 0x5C will be misinterpreted as the second byte
- of a multi-byte character "0xEE + 0x5C", instead of
- escape character for 0x00.
-
- To avoid this confusion, we'll escape the multi-byte
- head character too, so the sequence "0xEE + 0x00" will be
- dumped as "0x5C + 0xEE + 0x5C + 0x30".
-
- Note, in the condition below we only check if
- mbcharlen is equal to 2, because there are no
- character sets with mbmaxlen longer than 2
- and with escape_with_backslash_is_dangerous set.
- DBUG_ASSERT before the loop makes that sure.
- */
-
- if ((NEED_ESCAPING(*pos) ||
- (check_second_byte &&
- my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
- pos + 1 < end &&
- NEED_ESCAPING(pos[1]))) &&
+ int ml= my_mbcharlen(write_cs, (uchar) *pos);
+ if (NEED_ESCAPING(pos, end) &&
/*
Don't escape field_term_char by doubling - doubling is only
valid for ENCLOSED BY characters:
*/
(enclosed || !is_ambiguous_field_term ||
- (int) (uchar) *pos != field_term_char))
+ !prefix(pos, end, field_term_chr)))
{
- char tmp_buff[2];
- tmp_buff[0]= ((int) (uchar) *pos == field_sep_char &&
- is_ambiguous_field_sep) ?
- field_sep_char : escape_char;
- tmp_buff[1]= *pos ? *pos : '0';
- if (my_b_write(&cache,(byte*) start,(uint) (pos-start)) ||
- my_b_write(&cache,(byte*) tmp_buff,2))
- goto err;
- start=pos+1;
+ if (my_b_write(&cache, (byte*) start, (uint) (pos-start)))
+ goto err;
+ if (prefix(pos, end, field_sep_chr) && is_ambiguous_field_sep)
+ {
+ if (write_str(&cache, field_sep_chr))
+ goto err;
+ } else {
+ if (write_str(&cache, escape_chr))
+ goto err;
+ }
+ if (is_zero(pos, end))
+ {
+ if (write_str(&cache, zero_str))
+ goto err;
+ }
+ else
+ {
+ if (my_b_write(&cache, (byte *) pos, ml))
+ goto err;
+ }
+ start= pos + ml;
}
+ pos+= ml;
}
- if (my_b_write(&cache,(byte*) start,(uint) (pos-start)))
- goto err;
}
else if (my_b_write(&cache,(byte*) res->ptr(),used_length))
goto err;
@@ -1430,19 +1712,16 @@ bool select_export::send_data(List<Item>
}
if (res && enclosed)
{
- if (my_b_write(&cache, (byte*) exchange->enclosed->ptr(),
- exchange->enclosed->length()))
+ if (write_str(&cache, enclosed_chr))
goto err;
}
if (--items_left)
{
- if (my_b_write(&cache, (byte*) exchange->field_term->ptr(),
- field_term_length))
+ if (write_str(&cache, field_term_str))
goto err;
}
}
- if (my_b_write(&cache,(byte*) exchange->line_term->ptr(),
- exchange->line_term->length()))
+ if (write_str(&cache, line_term_str))
goto err;
DBUG_RETURN(0);
err:
diff -Nrup a/sql/sql_class.h b/sql/sql_class.h
--- a/sql/sql_class.h 2007-12-15 15:04:58 +04:00
+++ b/sql/sql_class.h 2008-02-05 06:12:26 +04:00
@@ -2019,8 +2019,12 @@ public:
class select_export :public select_to_file {
uint field_term_length;
- int field_sep_char,escape_char,line_sep_char;
- int field_term_char; // first char of FIELDS TERMINATED BY or MAX_INT
+ String field_sep_chr,escape_chr,line_sep_chr;
+ String field_term_chr; // first char of FIELDS TERMINATED BY or MAX_INT
+ String enclosed_chr;
+ String field_term_str, line_start_str, line_term_str;
+ String null_str, zero_str;
+ int max_prefix_len;
/*
The is_ambiguous_field_sep field is true if a value of the field_sep_char
field is one of the 'n', 't', 'r' etc characters
@@ -2040,12 +2044,17 @@ class select_export :public select_to_fi
*/
bool is_unsafe_field_sep;
bool fixed_row_size;
+ CHARSET_INFO *write_cs;
+ char *cvt_buff, *cvt_buff_end;
public:
/**
Creates a select_export to represent INTO OUTFILE <filename> with a
defined level of subquery nesting.
*/
- select_export(sql_exchange *ex, uint nest_level_arg) :select_to_file(ex)
+ select_export(sql_exchange *ex, uint nest_level_arg)
+ : select_to_file(ex),
+ write_cs(&my_charset_bin), // backward compatibility
+ cvt_buff(NULL), cvt_buff_end(NULL)
{
nest_level= nest_level_arg;
}
diff -Nrup a/sql/sql_load.cc b/sql/sql_load.cc
--- a/sql/sql_load.cc 2007-10-29 17:20:55 +04:00
+++ b/sql/sql_load.cc 2008-02-05 06:12:27 +04:00
@@ -29,11 +29,11 @@ class READ_INFO {
*end_of_buff; /* Data in bufferts ends here */
uint buff_length, /* Length of buffert */
max_length; /* Max length of row */
- char *field_term_ptr,*line_term_ptr,*line_start_ptr,*line_start_end;
- uint field_term_length,line_term_length,enclosed_length;
- int field_term_char,line_term_char,enclosed_char,escape_char;
+ String field_term_str, line_term_str, line_start_str;
+ String enclosed_chr, escape_chr;
int *stack,*stack_pos;
bool found_end_of_line,start_of_line,eof;
+ bool enclosed_eq_escaped;
bool need_end_io_cache;
IO_CACHE cache;
NET *io_net;
@@ -45,14 +45,17 @@ public:
CHARSET_INFO *read_charset;
READ_INFO(File file,uint tot_length,CHARSET_INFO *cs,
- String &field_term,String &line_start,String &line_term,
- String &enclosed,int escape,bool get_it_from_net, bool is_fifo);
+ const String &field_term, const String &line_start,
+ const String &line_term,
+ const String &enclosed, const String &escape,
+ bool get_it_from_net, bool is_fifo);
~READ_INFO();
int read_field();
int read_fixed_length(void);
int next_line(void);
char unescape(char chr);
- int terminator(char *ptr,uint length);
+ bool unescape(char **to);
+ void unescape_hex(char **to);
bool find_start_of_fields();
/*
We need to force cache close before destructor is invoked to log
@@ -70,6 +73,12 @@ public:
either the table or THD value
*/
void set_io_cache_arg(void* arg) { cache.arg = arg; }
+
+ int get();
+ bool look(const char *ptr, uint length);
+ bool look(const String &s) { return look(s.ptr(), s.length()); }
+ void unget(const String &s) { unget(s.ptr(), s.length()); }
+ void unget(const char *ptr, uint length);
};
static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
@@ -89,6 +98,28 @@ static bool write_execute_load_query_log
THD::killed_state killed_status);
#endif /* EMBEDDED_LIBRARY */
+
+/*
+ Check string for valid enclosed/escape char
+
+ SYNOPSIS
+ is_separator()
+ s Separator string.
+
+ RETURN VALUE
+ 0 Success
+ 1 Source string is malformed or contains more than one
+ multibyte character.
+*/
+
+static bool
+is_separator(const String &s)
+{
+ return s.is_empty() ||
+ (uint) my_mbcharlen(s.charset(), (uchar) *s.ptr()) == s.length();
+}
+
+
/*
Execute LOAD DATA query
@@ -143,7 +174,7 @@ bool mysql_load(THD *thd,sql_exchange *e
read_file_from_client = 0; //server is always in the same process
#endif
- if (escaped->length() > 1 || enclosed->length() > 1)
+ if (!is_separator(*escaped) || !is_separator(*enclosed))
{
my_message(ER_WRONG_FIELD_TERMINATORS,ER(ER_WRONG_FIELD_TERMINATORS),
MYF(0));
@@ -328,7 +359,7 @@ bool mysql_load(THD *thd,sql_exchange *e
READ_INFO read_info(file,tot_length,
ex->cs ? ex->cs : thd->variables.collation_database,
*field_term,*ex->line_start, *ex->line_term, *enclosed,
- info.escape_char, read_file_from_client, is_fifo);
+ *escaped, read_file_from_client, is_fifo);
if (read_info.error)
{
if (file >= 0)
@@ -717,9 +748,12 @@ read_sep_field(THD *thd, COPY_INFO &info
length=(uint) (read_info.row_end-pos);
if (!read_info.enclosed &&
- (enclosed_length && length == 4 &&
- !memcmp(pos, STRING_WITH_LEN("NULL"))) ||
- (length == 1 && read_info.found_null))
+ (enclosed_length &&
+ ((length == 4 && !memcmp(pos, STRING_WITH_LEN("NULL"))) ||
+ (length == 8 && read_info.read_charset->mbminlen == 2 &&
+ !memcmp(pos, STRING_WITH_LEN("\0N\0U\0L\0L")))) // UCS2
+ ) ||
+ (length == read_info.read_charset->mbminlen && read_info.found_null))
{
if (item->type() == Item::FIELD_ITEM)
{
@@ -880,44 +914,36 @@ READ_INFO::unescape(char chr)
READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
- String &field_term, String &line_start, String &line_term,
- String &enclosed_par, int escape, bool get_it_from_net,
- bool is_fifo)
- :file(file_par),escape_char(escape)
+ const String &field_term, const String &line_start,
+ const String &line_term,
+ const String &enclosed_par, const String &escape,
+ bool get_it_from_net,
+ bool is_fifo)
+ :file(file_par),
+ field_term_str("", cs), line_term_str("", cs), line_start_str("", cs),
+ enclosed_chr("", cs), escape_chr("", cs),
+ error(0)
{
read_charset= cs;
- field_term_ptr=(char*) field_term.ptr();
- field_term_length= field_term.length();
- line_term_ptr=(char*) line_term.ptr();
- line_term_length= line_term.length();
- if (line_start.length() == 0)
- {
- line_start_ptr=0;
- start_of_line= 0;
- }
- else
- {
- line_start_ptr=(char*) line_start.ptr();
- line_start_end=line_start_ptr+line_start.length();
- start_of_line= 1;
- }
+ if (field_term_str.append_with_convert(field_term) ||
+ line_term_str.append_with_convert(line_term) ||
+ line_start_str.append_with_convert(line_start) ||
+ enclosed_chr.append_with_convert(enclosed_par) ||
+ escape_chr.append_with_convert(escape))
+ error= 1;
+ enclosed_eq_escaped= !stringcmp(&enclosed_chr, &escape_chr);
+ start_of_line= !line_start.is_empty();
/* If field_terminator == line_terminator, don't use line_terminator */
- if (field_term_length == line_term_length &&
- !memcmp(field_term_ptr,line_term_ptr,field_term_length))
- {
- line_term_length=0;
- line_term_ptr=(char*) "";
- }
- enclosed_char= (enclosed_length=enclosed_par.length()) ?
- (uchar) enclosed_par[0] : INT_MAX;
- field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX;
- line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
- error=eof=found_end_of_line=found_null=line_cuted=0;
+ if (field_term_str.length() == line_term_str.length() &&
+ !memcmp(field_term_str.ptr(),line_term_str.ptr(),field_term_str.length()))
+ line_term_str.length(0);
+ eof=found_end_of_line=found_null=line_cuted=0;
buff_length=tot_length;
/* Set of a stack for unget if long terminators */
- uint length=max(field_term_length,line_term_length)+1;
+ uint length= max(max(field_term_str.length(), line_term_str.length()) + 1,
+ sizeof("\\xXX") * cs->mbminlen);
set_if_bigger(length,line_start.length());
stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
@@ -968,33 +994,185 @@ READ_INFO::~READ_INFO()
}
-#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache))
+/*
+ Read next byte from input stream
+
+ SYNOPSIS
+ get()
+
+ RETURN
+ Byte value of my_b_EOF.
+
+ NOTE
+ Also updates 'found_end_of_line' and 'eof' field values.
+*/
+
+inline int READ_INFO::get()
+{
+ int ret= stack_pos != stack ? *--stack_pos : my_b_get(&cache);
+ if (ret == my_b_EOF)
+ found_end_of_line= eof= TRUE;
+ return ret;
+}
+
+
+#define GET get()
#define PUSH(A) *(stack_pos++)=(A)
+// Put String value into an output stream
+#define PUT(s) for (const char *p= (s).ptr(), *e= p + (s).length(); p < e; p++)\
+ *to++= (byte) *p
+
+
+/*
+ Look ahead for the given string
+
+ SYNOPSIS
+ look()
+ ptr String to match with the input stream head.
+ length Size of matching string.
+
+ RETURN
+ TRUE on success.
+
+ NOTE
+ "Ungets" read bytes back to input stream in case of failed matching.
+ Also updates 'found_end_of_line' and 'eof' field values.
+*/
-inline int READ_INFO::terminator(char *ptr,uint length)
+bool READ_INFO::look(const char *ptr, uint length)
{
+ if (length == 0)
+ return 0;
+ bool save_found_end_of_line= found_end_of_line;
+ bool save_eof= eof;
int chr=0; // Keep gcc happy
uint i;
- for (i=1 ; i < length ; i++)
+ for (i= 0 ; i < length ; i++)
{
- if ((chr=GET) != *++ptr)
+ if ((chr=GET) != (uchar) *ptr++)
{
break;
}
}
if (i == length)
return 1;
+ if (i == 0 && eof)
+ return 0;
PUSH(chr);
- while (i-- > 1)
+ ptr--;
+ while (i--)
PUSH((uchar) *--ptr);
+ eof= save_eof;
+ found_end_of_line= save_found_end_of_line;
return 0;
}
+/*
+ "Unget" given string back to input stream
+
+ SYNOPSIS
+ READ_INFO::unget()
+ ptr String to unget.
+ length String size.
+*/
+
+void
+READ_INFO::unget(const char *ptr, uint length)
+{
+ ptr+= length;
+ while (length--)
+ PUSH((uchar) *--ptr);
+}
+
+
+/*
+ Decode C-style escape-sequence and write to output stream
+
+ SYNOPSIS
+ READ_INFO::unescape()
+ to_ptr [IN/OUT] Pointer to output buffer.
+
+ RETURN VALUES
+ FALSE Success.
+ TRUE I/O error.
+*/
+
+bool
+READ_INFO::unescape(char **to_ptr)
+{
+ char *&to= *to_ptr;
+ int chr;
+ if ((chr= GET) == my_b_EOF)
+ return FALSE;
+
+ if (read_charset->mbminlen == 2 && chr == 0) // USC2-encoded ASCII
+ {
+ *to++= 0;
+ if ((chr= GET) != my_b_EOF) // check for truncated input
+ {
+ if (chr != 'x')
+ *to++= unescape(chr);
+ else
+ unescape_hex(to_ptr);
+ }
+ return TRUE;
+ }
+ if (chr != 'x')
+ *to++= unescape(chr);
+ else
+ unescape_hex(to_ptr);
+ return TRUE;
+}
+
+
+/*
+ Decode XX part of hexadecimal-encoded string \xXX
+
+ SYNOPSIS
+ READ_INFO::unescape_hex()
+ to_ptr [IN/OUT] Pointer to output buffer.
+*/
+
+void
+READ_INFO::unescape_hex(char **to_ptr)
+{
+ const char *hex= "0123456789ABCDEF", *pos;
+ char chr= 0;
+ const uint hex_len= 2; // XX
+ const uint ml= read_charset->mbminlen;
+ char buff[(hex_len + 1) * 2]; // *2 for UCS2
+ uint i;
+ for (i= 0; i < hex_len * ml; i++)
+ {
+ int c;
+ if ((c= GET) == my_b_EOF)
+ goto err;
+ buff[i]= c;
+ if (ml == 2 && i % 2)
+ {
+ if (c)
+ goto err;
+ }
+ else
+ {
+ if (!(pos= strchr(hex, c)))
+ goto err;
+ chr= chr * 0x10 + (pos - hex);
+ }
+ }
+ **to_ptr= chr;
+ (*to_ptr)++;
+ return;
+err:
+ unget(buff, i);
+}
+
+
int READ_INFO::read_field()
{
- int chr,found_enclosed_char;
+ bool found_enclosed_char= FALSE;
byte *to,*new_buffer;
found_null=0;
@@ -1009,60 +1187,26 @@ int READ_INFO::read_field()
if (find_start_of_fields())
return 1;
}
- if ((chr=GET) == my_b_EOF)
- {
- found_end_of_line=eof=1;
- return 1;
- }
to=buffer;
- if (chr == enclosed_char)
+ PUSH(GET); // refresh eof flag
+ if (look(enclosed_chr))
{
- found_enclosed_char=enclosed_char;
- *to++=(byte) chr; // If error
+ found_enclosed_char= TRUE;
+ PUT(enclosed_chr); // If error
}
+ else if (eof)
+ return 1;
else
- {
- found_enclosed_char= INT_MAX;
- PUSH(chr);
- }
+ found_enclosed_char= FALSE;
for (;;)
{
- while ( to < end_of_buff)
+ while (to + read_charset->mbmaxlen < end_of_buff)
{
- chr = GET;
-#ifdef USE_MB
- if ((my_mbcharlen(read_charset, chr) > 1) &&
- to+my_mbcharlen(read_charset, chr) <= end_of_buff)
- {
- uchar* p = (uchar*)to;
- *to++ = chr;
- int ml = my_mbcharlen(read_charset, chr);
- int i;
- for (i=1; i<ml; i++) {
- chr = GET;
- if (chr == my_b_EOF)
- goto found_eof;
- *to++ = chr;
- }
- if (my_ismbchar(read_charset,
- (const char *)p,
- (const char *)to))
- continue;
- for (i=0; i<ml; i++)
- PUSH((uchar) *--to);
- chr = GET;
- }
-#endif
- if (chr == my_b_EOF)
+ if (eof)
goto found_eof;
- if (chr == escape_char)
+ if (look(escape_chr))
{
- if ((chr=GET) == my_b_EOF)
- {
- *to++= (byte) escape_char;
- goto found_eof;
- }
/*
When escape_char == enclosed_char, we treat it like we do for
handling quotes in SQL parsing -- you can double-up the
@@ -1070,66 +1214,61 @@ int READ_INFO::read_field()
like \n. This allows: LOAD DATA ... ENCLOSED BY '"' ESCAPED BY '"'
with data like: "fie""ld1", "field2"
*/
- if (escape_char != enclosed_char || chr == escape_char)
+ if (!enclosed_eq_escaped)
+ {
+ if (unescape(&to))
+ continue;
+ PUT(escape_chr);
+ goto found_eof;
+ }
+ else if (enclosed_eq_escaped && look(escape_chr))
{
- *to++ = (byte) unescape((char) chr);
+ PUT(escape_chr);
continue;
}
- PUSH(chr);
- chr= escape_char;
+ // escape_char == enclosed_char && chr != escape_char
+ unget(escape_chr);
}
#ifdef ALLOW_LINESEPARATOR_IN_STRINGS
- if (chr == line_term_char)
+ if (look(line_term_str))
#else
- if (chr == line_term_char && found_enclosed_char == INT_MAX)
+ if (!found_enclosed_char && look(line_term_str))
#endif
- {
- if (terminator(line_term_ptr,line_term_length))
- { // Maybe unexpected linefeed
+ { // Maybe unexpected linefeed
enclosed=0;
found_end_of_line=1;
row_start=buffer;
row_end= to;
return 0;
- }
}
- if (chr == found_enclosed_char)
+ if (found_enclosed_char && look(enclosed_chr))
{
- if ((chr=GET) == found_enclosed_char)
+ if (look(enclosed_chr))
{ // Remove dupplicated
- *to++ = (byte) chr;
+ PUT(enclosed_chr);
continue;
}
// End of enclosed field if followed by field_term or line_term
- if (chr == my_b_EOF ||
- chr == line_term_char && terminator(line_term_ptr,
- line_term_length))
+ if (eof || look(line_term_str))
{ // Maybe unexpected linefeed
enclosed=1;
found_end_of_line=1;
- row_start=buffer+1;
+ row_start= buffer + enclosed_chr.length();
row_end= to;
return 0;
}
- if (chr == field_term_char &&
- terminator(field_term_ptr,field_term_length))
+ if (look(field_term_str))
{
enclosed=1;
- row_start=buffer+1;
+ row_start= buffer + enclosed_chr.length();
row_end= to;
return 0;
}
- /*
- The string didn't terminate yet.
- Store back next character for the loop
- */
- PUSH(chr);
/* copy the found term character to 'to' */
- chr= found_enclosed_char;
+ unget(enclosed_chr);
}
- else if (chr == field_term_char && found_enclosed_char == INT_MAX)
+ else if (!found_enclosed_char && look(field_term_str))
{
- if (terminator(field_term_ptr,field_term_length))
{
enclosed=0;
row_start=buffer;
@@ -1137,6 +1276,34 @@ int READ_INFO::read_field()
return 0;
}
}
+
+ int chr = GET;
+ if (chr == my_b_EOF)
+ goto found_eof;
+#ifdef USE_MB
+ int ml= my_mbcharlen(read_charset, chr);
+ if (ml > 1 && to + ml <= end_of_buff)
+ {
+ uchar* p = (uchar*)to;
+ *to++ = chr;
+ int i;
+ for (i=1; i<ml; i++) {
+ if ((chr= GET) == my_b_EOF)
+ goto found_eof;
+ *to++ = chr;
+ }
+ if (my_ismbchar(read_charset,
+ (const char *)p,
+ (const char *)to))
+ continue;
+ for (i=0; i<ml; i++)
+ PUSH((uchar) *--to);
+ chr = GET;
+ }
+#endif
+ if (eof)
+ goto found_eof;
+
*to++ = (byte) chr;
}
/*
@@ -1176,7 +1343,6 @@ found_eof:
int READ_INFO::read_fixed_length()
{
- int chr;
byte *to;
if (found_end_of_line)
return 1; // One have to call next_line
@@ -1191,34 +1357,32 @@ int READ_INFO::read_fixed_length()
to=row_start=buffer;
while (to < end_of_buff)
{
- if ((chr=GET) == my_b_EOF)
+ if (eof)
goto found_eof;
- if (chr == escape_char)
+ if (look(escape_chr))
{
- if ((chr=GET) == my_b_EOF)
- {
- *to++= (byte) escape_char;
- goto found_eof;
- }
- *to++ =(byte) unescape((char) chr);
- continue;
+ if (unescape(&to))
+ continue;
+ PUT(escape_chr);
+ goto found_eof;
}
- if (chr == line_term_char)
+ if (look(line_term_str))
{
- if (terminator(line_term_ptr,line_term_length))
{ // Maybe unexpected linefeed
found_end_of_line=1;
row_end= to;
return 0;
}
}
+ int chr;
+ if ((chr=GET) == my_b_EOF)
+ goto found_eof;
*to++ = (byte) chr;
}
row_end=to; // Found full line
return 0;
found_eof:
- found_end_of_line=eof=1;
row_start=buffer;
row_end=to;
return to == buffer ? 1 : 0;
@@ -1228,44 +1392,41 @@ found_eof:
int READ_INFO::next_line()
{
line_cuted=0;
- start_of_line= line_start_ptr != 0;
+ start_of_line= !line_start_str.is_empty();
if (found_end_of_line || eof)
{
found_end_of_line=0;
return eof;
}
found_end_of_line=0;
- if (!line_term_length)
+ if (line_term_str.is_empty())
return 0; // No lines
for (;;)
{
- int chr = GET;
-#ifdef USE_MB
- if (my_mbcharlen(read_charset, chr) > 1)
- {
- for (int i=1;
- chr != my_b_EOF && i<my_mbcharlen(read_charset, chr);
- i++)
- chr = GET;
- if (chr == escape_char)
- continue;
- }
-#endif
- if (chr == my_b_EOF)
- {
- eof=1;
- return 1;
- }
- if (chr == escape_char)
+ if (look(escape_chr))
{
line_cuted=1;
if (GET == my_b_EOF)
- return 1;
+ return 1;
continue;
}
- if (chr == line_term_char && terminator(line_term_ptr,line_term_length))
+ if (look(line_term_str))
return 0;
+ if (eof)
+ {
+ eof=1;
+ return 1;
+ }
line_cuted=1;
+ int chr = GET;
+#ifdef USE_MB
+ int ml= my_mbcharlen(read_charset, chr);
+ if (ml > 1)
+ {
+ for (int i= 1; chr != my_b_EOF && i < ml; i++)
+ chr= GET;
+ }
+#endif
}
}
@@ -1277,18 +1438,17 @@ bool READ_INFO::find_start_of_fields()
do
{
if ((chr=GET) == my_b_EOF)
- {
- found_end_of_line=eof=1;
return 1;
- }
- } while ((char) chr != line_start_ptr[0]);
- for (char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++)
+ } while ((char) chr != *line_start_str.ptr());
+ for (const char *ptr= line_start_str.ptr() + 1,
+ *end= line_start_str.ptr() + line_start_str.length();
+ ptr != end ; ptr++)
{
chr=GET; // Eof will be checked later
if ((char) chr != *ptr)
{ // Can't be line_start
PUSH(chr);
- while (--ptr != line_start_ptr)
+ while (--ptr != line_start_str.ptr())
{ // Restart with next char
PUSH((uchar) *ptr);
}
diff -Nrup a/sql/sql_string.h b/sql/sql_string.h
--- a/sql/sql_string.h 2007-01-22 16:10:41 +04:00
+++ b/sql/sql_string.h 2008-02-05 06:12:28 +04:00
@@ -39,6 +39,7 @@ uint32 well_formed_copy_nchars(CHARSET_I
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos);
+int stringcmp(const String *s, const String *t);
class String
{
@@ -93,7 +94,7 @@ public:
inline uint32 alloced_length() const { return Alloced_length;}
inline char& operator [] (uint32 i) const { return Ptr[i]; }
inline void length(uint32 len) { str_length=len ; }
- inline bool is_empty() { return (str_length == 0); }
+ inline bool is_empty() const { return (str_length == 0); }
inline void mark_as_const() { Alloced_length= 0;}
inline const char *ptr() const { return Ptr; }
inline char *c_ptr()
@@ -251,6 +252,10 @@ public:
bool append(IO_CACHE* file, uint32 arg_length);
bool append_with_prefill(const char *s, uint32 arg_length,
uint32 full_length, char fill_char);
+ bool append_with_convert(const String &s)
+ {
+ return append(s.ptr(), s.length(), s.charset());
+ }
int strstr(const String &search,uint32 offset=0); // Returns offset to substring or
-1
int strrstr(const String &search,uint32 offset=0); // Returns offset to substring
or -1
bool replace(uint32 offset,uint32 arg_length,const char *to,uint32 length);
diff -Nrup a/sql/sql_yacc.yy b/sql/sql_yacc.yy
--- a/sql/sql_yacc.yy 2007-12-15 15:04:59 +04:00
+++ b/sql/sql_yacc.yy 2008-02-05 06:12:29 +04:00
@@ -6428,6 +6428,8 @@ into_destination:
!(lex->result= new select_export(lex->exchange, lex->nest_level)))
MYSQL_YYABORT;
}
+ opt_load_data_charset
+ { Lex->exchange->cs= $4; }
opt_field_term opt_line_term
| DUMPFILE TEXT_STRING_filesystem
{
| Thread |
|---|
| • bk commit into 5.0 tree (gshchepa:1.2594) BUG#30946 | gshchepa | 5 Feb 2008 |