List:Commits« Previous MessageNext Message »
From:V Narayanan Date:January 15 2009 10:58am
Subject:bzr commit into mysql-6.0-bugteam branch (v.narayanan:2967) Bug#40814
View as plain text  
#At file:///home/narayanan/Work/mysql/W-M/mysql-6.0-bugteam-40814/

 2967 V Narayanan	2009-01-15
      Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
            
      When a .CSV file for table in the CSV engine contains
      \X characters as part of unquoted fields, e.g.
            
      2,naraya\nan
            
      \n is not interpreted as a new line (it is however interpreted as a
      newline in a quoted field).
            
      The old algorithm copied the entire value for a unquoted field without
      parsing the \X characters. 
            
      The new algorithm adds the capability to handle \X characters in the 
      unquoted fields of a .CSV file.
modified:
  mysql-test/r/csv.result
  mysql-test/t/csv.test
  storage/csv/ha_tina.cc

per-file messages:
  mysql-test/r/csv.result
    Contains additional test output corresponding to the new 
    tests added.
  mysql-test/t/csv.test
    Contains additional tests for testing the behaviour of the CSV 
    storage engine when the fields are not enclosed in quotes and
    contain \X characters.
  storage/csv/ha_tina.cc
    Changes the parsing logic of the rows in a CSV file, to parse
    \X characters that might be present in the unquoted fields.
=== modified file 'mysql-test/r/csv.result'
--- a/mysql-test/r/csv.result	2008-12-29 12:50:51 +0000
+++ b/mysql-test/r/csv.result	2009-01-15 09:57:24 +0000
@@ -5407,4 +5407,60 @@ test.t1	repair	status	OK
 select * from t1 limit 1;
 a
 drop table t1;
+#
+# Test for the following cases
+# 1) integers and strings enclosed in quotes
+# 2) integers and strings not enclosed in quotes
+# 3) \X  characters with quotes
+# 4) \X  characters outside quotes
+#
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings 
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+c1	c2
+1	integer sans quotes
+1	string sans quotes
+1	quotes"in between" strings 
+1	Integer with quote and string with no quote
+1	escape sequence 
+ " \ 
 \a within quotes
+1	escape sequence 
+ " \ 
 \a without quotes
+DROP TABLE t1;
+# Test for the case when a field begins with a quote, but does not end in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"string only at the beginning quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
+# Test for the case when a field ends with a quote, but does not begin in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,string with only ending quotes"
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
 End of 5.1 tests

=== modified file 'mysql-test/t/csv.test'
--- a/mysql-test/t/csv.test	2008-12-29 12:50:51 +0000
+++ b/mysql-test/t/csv.test	2009-01-15 09:57:24 +0000
@@ -1817,4 +1817,85 @@ repair table t1;
 select * from t1 limit 1;
 drop table t1;
 
+#
+# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields
+#
+
+--echo #
+--echo # Test for the following cases
+--echo # 1) integers and strings enclosed in quotes
+--echo # 2) integers and strings not enclosed in quotes
+--echo # 3) \X  characters with quotes
+--echo # 4) \X  characters outside quotes
+--echo #
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings 
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+EOF
+--cat_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+SELECT * FROM t1;
+
+DROP TABLE t1; 
+
+--echo # Test for the case when a field begins with a quote, but does not end in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+1,"string only at the beginning quotes
+EOF
+--cat_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+--echo # Test for the case when a field ends with a quote, but does not begin in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+1,string with only ending quotes"
+EOF
+--cat_file $MYSQLTEST_VARDIR/master-data/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
 --echo End of 5.1 tests

=== modified file 'storage/csv/ha_tina.cc'
--- a/storage/csv/ha_tina.cc	2009-01-06 10:38:47 +0000
+++ b/storage/csv/ha_tina.cc	2009-01-15 09:57:24 +0000
@@ -613,6 +613,33 @@ int ha_tina::find_current_row(uchar *buf
 
   memset(buf, 0, table->s->null_bytes);
 
+  /*
+    Parse the line obtained using the following algorithm
+   
+    BEGIN
+      1) Store the EOL (end of line) for the current row
+      2) Until all the fields in the current query have not been 
+         filled
+         2.1) If the current character is a quote
+              2.1.1) Until EOL has not been reached
+                     a) If end of current field is reached, move
+                        to next field and jump to step 2.3
+                     b) If current character is a \\ handle
+                        \\n, \\r, \\, \\"
+                     c) else append the current character into the buffer
+                        before checking that EOL has not been reached.
+          2.2) If the current character does not begin with a quote
+               2.2.1) Until EOL has not been reached
+                      a) If the end of field has been reached move to the
+                         next field and jump to step 2.3
+                      b) If current character begins with \\ handle
+                        \\n, \\r, \\, \\"
+                      c) else append the current character into the buffer
+                         before checking that EOL has not been reached.
+          2.3) Store the current field value and jump to 2)
+    TERMINATE
+  */
+  
   for (Field **field=table->field ; *field ; field++)
   {
     char curr_char;
@@ -621,19 +648,23 @@ int ha_tina::find_current_row(uchar *buf
     if (curr_offset >= end_offset)
       goto err;
     curr_char= file_buff->get_value(curr_offset);
+    /* Handle the case where the first character is a quote */
     if (curr_char == '"')
     {
-      curr_offset++; // Incrementpast the first quote
+      /* Increment past the first quote */
+      curr_offset++;
 
-      for(; curr_offset < end_offset; curr_offset++)
+      /* Loop through the row to extract the values for the current field */
+      for( ; curr_offset < end_offset; curr_offset++)
       {
         curr_char= file_buff->get_value(curr_offset);
-        // Need to convert line feeds!
+        /* check for end of the current field */
         if (curr_char == '"' &&
             (curr_offset == end_offset - 1 ||
              file_buff->get_value(curr_offset + 1) == ','))
         {
-          curr_offset+= 2; // Move past the , and the "
+          /* Move past the , and the " */
+          curr_offset+= 2;
           break;
         }
         if (curr_char == '\\' && curr_offset != (end_offset - 1))
@@ -655,7 +686,7 @@ int ha_tina::find_current_row(uchar *buf
         else // ordinary symbol
         {
           /*
-            We are at final symbol and no last quote was found =>
+            If we are at final symbol and no last quote was found =>
             we are working with a damaged file.
           */
           if (curr_offset == end_offset - 1)
@@ -666,15 +697,41 @@ int ha_tina::find_current_row(uchar *buf
     }
     else 
     {
-      for(; curr_offset < end_offset; curr_offset++)
+      for( ; curr_offset < end_offset; curr_offset++)
       {
         curr_char= file_buff->get_value(curr_offset);
+        /* Move past the ,*/
         if (curr_char == ',')
         {
-          curr_offset++;       // Skip the ,
+          curr_offset++;
           break;
         }
-        buffer.append(curr_char);
+        if (curr_char == '\\' && curr_offset != (end_offset - 1))
+        {
+          curr_offset++;
+          curr_char= file_buff->get_value(curr_offset);
+          if (curr_char == 'r')
+            buffer.append('\r');
+          else if (curr_char == 'n' )
+            buffer.append('\n');
+          else if (curr_char == '\\' || curr_char == '"')
+            buffer.append(curr_char);
+          else  /* This could only happed with an externally created file */
+          {
+            buffer.append('\\');
+            buffer.append(curr_char);
+          }
+        }
+        else
+        {
+          /*
+             We are at the final symbol and a quote was found for the
+             unquoted field => We are working with a damaged field.
+          */
+          if (curr_offset == end_offset - 1 && curr_char == '"')
+            goto err;
+          buffer.append(curr_char);
+        }
       }
     }
 

Thread
bzr commit into mysql-6.0-bugteam branch (v.narayanan:2967) Bug#40814V Narayanan15 Jan 2009