#At file:///home/narayanan/Work/mysql/W-M/mysql-6.0-bugteam-40814/
2967 V Narayanan 2009-01-15
Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
When a .CSV file for table in the CSV engine contains
\X characters as part of unquoted fields, e.g.
2,naraya\nan
\n is not interpreted as a new line (it is however interpreted as a
newline in a quoted field).
The old algorithm copied the entire value for a unquoted field without
parsing the \X characters.
The new algorithm adds the capability to handle \X characters in the
unquoted fields of a .CSV file.
modified:
mysql-test/r/csv.result
mysql-test/t/csv.test
storage/csv/ha_tina.cc
per-file messages:
mysql-test/r/csv.result
Contains additional test output corresponding to the new
tests added.
mysql-test/t/csv.test
Contains additional tests for testing the behaviour of the CSV
storage engine when the fields are not enclosed in quotes and
contain \X characters.
storage/csv/ha_tina.cc
Changes the parsing logic of the rows in a CSV file, to parse
\X characters that might be present in the unquoted fields.
=== modified file 'mysql-test/r/csv.result'
--- a/mysql-test/r/csv.result 2008-12-29 12:50:51 +0000
+++ b/mysql-test/r/csv.result 2009-01-15 09:57:24 +0000
@@ -5407,4 +5407,60 @@ test.t1 repair status OK
select * from t1 limit 1;
a
drop table t1;
+#
+# Test for the following cases
+# 1) integers and strings enclosed in quotes
+# 2) integers and strings not enclosed in quotes
+# 3) \X characters with quotes
+# 4) \X characters outside quotes
+#
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+c1 c2
+1 integer sans quotes
+1 string sans quotes
+1 quotes"in between" strings
+1 Integer with quote and string with no quote
+1 escape sequence
+ " \
\a within quotes
+1 escape sequence
+ " \
\a without quotes
+DROP TABLE t1;
+# Test for the case when a field begins with a quote, but does not end in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"string only at the beginning quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
+# Test for the case when a field ends with a quote, but does not begin in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,string with only ending quotes"
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
End of 5.1 tests
=== modified file 'mysql-test/t/csv.test'
--- a/mysql-test/t/csv.test 2008-12-29 12:50:51 +0000
+++ b/mysql-test/t/csv.test 2009-01-15 09:57:24 +0000
@@ -1817,4 +1817,85 @@ repair table t1;
select * from t1 limit 1;
drop table t1;
+#
+# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields
+#
+
+--echo #
+--echo # Test for the following cases
+--echo # 1) integers and strings enclosed in quotes
+--echo # 2) integers and strings not enclosed in quotes
+--echo # 3) \X characters with quotes
+--echo # 4) \X characters outside quotes
+--echo #
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+EOF
+--cat_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+--echo # Test for the case when a field begins with a quote, but does not end in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+1,"string only at the beginning quotes
+EOF
+--cat_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+--echo # Test for the case when a field ends with a quote, but does not begin in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+1,string with only ending quotes"
+EOF
+--cat_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
--echo End of 5.1 tests
=== modified file 'storage/csv/ha_tina.cc'
--- a/storage/csv/ha_tina.cc 2009-01-06 10:38:47 +0000
+++ b/storage/csv/ha_tina.cc 2009-01-15 09:57:24 +0000
@@ -613,6 +613,33 @@ int ha_tina::find_current_row(uchar *buf
memset(buf, 0, table->s->null_bytes);
+ /*
+ Parse the line obtained using the following algorithm
+
+ BEGIN
+ 1) Store the EOL (end of line) for the current row
+ 2) Until all the fields in the current query have not been
+ filled
+ 2.1) If the current character is a quote
+ 2.1.1) Until EOL has not been reached
+ a) If end of current field is reached, move
+ to next field and jump to step 2.3
+ b) If current character is a \\ handle
+ \\n, \\r, \\, \\"
+ c) else append the current character into the buffer
+ before checking that EOL has not been reached.
+ 2.2) If the current character does not begin with a quote
+ 2.2.1) Until EOL has not been reached
+ a) If the end of field has been reached move to the
+ next field and jump to step 2.3
+ b) If current character begins with \\ handle
+ \\n, \\r, \\, \\"
+ c) else append the current character into the buffer
+ before checking that EOL has not been reached.
+ 2.3) Store the current field value and jump to 2)
+ TERMINATE
+ */
+
for (Field **field=table->field ; *field ; field++)
{
char curr_char;
@@ -621,19 +648,23 @@ int ha_tina::find_current_row(uchar *buf
if (curr_offset >= end_offset)
goto err;
curr_char= file_buff->get_value(curr_offset);
+ /* Handle the case where the first character is a quote */
if (curr_char == '"')
{
- curr_offset++; // Incrementpast the first quote
+ /* Increment past the first quote */
+ curr_offset++;
- for(; curr_offset < end_offset; curr_offset++)
+ /* Loop through the row to extract the values for the current field */
+ for( ; curr_offset < end_offset; curr_offset++)
{
curr_char= file_buff->get_value(curr_offset);
- // Need to convert line feeds!
+ /* check for end of the current field */
if (curr_char == '"' &&
(curr_offset == end_offset - 1 ||
file_buff->get_value(curr_offset + 1) == ','))
{
- curr_offset+= 2; // Move past the , and the "
+ /* Move past the , and the " */
+ curr_offset+= 2;
break;
}
if (curr_char == '\\' && curr_offset != (end_offset - 1))
@@ -655,7 +686,7 @@ int ha_tina::find_current_row(uchar *buf
else // ordinary symbol
{
/*
- We are at final symbol and no last quote was found =>
+ If we are at final symbol and no last quote was found =>
we are working with a damaged file.
*/
if (curr_offset == end_offset - 1)
@@ -666,15 +697,41 @@ int ha_tina::find_current_row(uchar *buf
}
else
{
- for(; curr_offset < end_offset; curr_offset++)
+ for( ; curr_offset < end_offset; curr_offset++)
{
curr_char= file_buff->get_value(curr_offset);
+ /* Move past the ,*/
if (curr_char == ',')
{
- curr_offset++; // Skip the ,
+ curr_offset++;
break;
}
- buffer.append(curr_char);
+ if (curr_char == '\\' && curr_offset != (end_offset - 1))
+ {
+ curr_offset++;
+ curr_char= file_buff->get_value(curr_offset);
+ if (curr_char == 'r')
+ buffer.append('\r');
+ else if (curr_char == 'n' )
+ buffer.append('\n');
+ else if (curr_char == '\\' || curr_char == '"')
+ buffer.append(curr_char);
+ else /* This could only happed with an externally created file */
+ {
+ buffer.append('\\');
+ buffer.append(curr_char);
+ }
+ }
+ else
+ {
+ /*
+ We are at the final symbol and a quote was found for the
+ unquoted field => We are working with a damaged field.
+ */
+ if (curr_offset == end_offset - 1 && curr_char == '"')
+ goto err;
+ buffer.append(curr_char);
+ }
}
}
| Thread |
|---|
| • bzr commit into mysql-6.0-bugteam branch (v.narayanan:2967) Bug#40814 | V Narayanan | 15 Jan 2009 |