MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:V Narayanan Date:December 3 2009 11:48am
Subject:bzr commit into mysql-5.6-next-mr branch (v.narayanan:2926) Bug#40814
View as plain text  
#At file:///home/narayanan/Work/mysql_checkouts/shared_repository_directory/mysql-next-mr-svoj-40814-01/ based on revid:svoj@stripped

 2926 V Narayanan	2009-12-03
      Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
          
      When a .CSV file for table in the CSV engine contains
      \X characters as part of unquoted fields, e.g.
          
      2,naraya\nan
          
      \n is not interpreted as a new line (it is however interpreted as a
      newline in a quoted field).
          
      The old algorithm copied the entire value for a unquoted field without
      parsing the \X characters. 
          
      The new algorithm adds the capability to handle \X characters in the 
      unquoted fields of a .CSV file.
     @ mysql-test/r/csv.result
        Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
        
        Contains additional test output corresponding to the new 
        tests added.
     @ mysql-test/t/csv.test
        Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
        
        Contains additional tests for testing the behaviour of the CSV 
        storage engine when the fields are not enclosed in quotes and
        contain \X characters.
     @ storage/csv/ha_tina.cc
        Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
        
        Changes the parsing logic of the rows in a CSV file, to parse
        \X characters that might be present in the unquoted fields.

    modified:
      mysql-test/r/csv.result
      mysql-test/t/csv.test
      storage/csv/ha_tina.cc
=== modified file 'mysql-test/r/csv.result'
--- a/mysql-test/r/csv.result	2009-01-23 12:22:05 +0000
+++ b/mysql-test/r/csv.result	2009-12-03 11:48:43 +0000
@@ -5407,4 +5407,60 @@ test.t1	repair	status	OK
 select * from t1 limit 1;
 a
 drop table t1;
+#
+# Test for the following cases
+# 1) integers and strings enclosed in quotes
+# 2) integers and strings not enclosed in quotes
+# 3) \X  characters with quotes
+# 4) \X  characters outside quotes
+#
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings 
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+c1	c2
+1	integer sans quotes
+1	string sans quotes
+1	quotes"in between" strings 
+1	Integer with quote and string with no quote
+1	escape sequence 
+ " \ 
 \a within quotes
+1	escape sequence 
+ " \ 
 \a without quotes
+DROP TABLE t1;
+# Test for the case when a field begins with a quote, but does not end in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,"string only at the beginning quotes
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
+# Test for the case when a field ends with a quote, but does not begin in a
+# quote.
+# Note: This results in an error.
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+# remove the already existing .CSV file if any
+# create the .CSV file that contains the hard-coded data used in
+# testing
+1,string with only ending quotes"
+# select from the table in which the data has been filled in using
+# the hard-coded .CSV file
+SELECT * FROM t1;
+ERROR HY000: Table 't1' is marked as crashed and should be repaired
+DROP TABLE t1;
 End of 5.1 tests

=== modified file 'mysql-test/t/csv.test'
--- a/mysql-test/t/csv.test	2009-05-15 10:15:56 +0000
+++ b/mysql-test/t/csv.test	2009-12-03 11:48:43 +0000
@@ -1819,4 +1819,84 @@ repair table t1;
 select * from t1 limit 1;
 drop table t1;
 
+#
+# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields
+#
+
+--echo #
+--echo # Test for the following cases
+--echo # 1) integers and strings enclosed in quotes
+--echo # 2) integers and strings not enclosed in quotes
+--echo # 3) \X  characters with quotes
+--echo # 4) \X  characters outside quotes
+--echo #
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLD_DATADIR/test/t1.CSV
+1,"integer sans quotes"
+1,string sans quotes
+1,quotes"in between" strings 
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+1,escape sequence \n \" \\ \r \a without quotes
+EOF
+--cat_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+SELECT * FROM t1;
+
+DROP TABLE t1; 
+
+--echo # Test for the case when a field begins with a quote, but does not end in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLD_DATADIR/test/t1.CSV
+1,"string only at the beginning quotes
+EOF
+--cat_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
+
+--echo # Test for the case when a field ends with a quote, but does not begin in a
+--echo # quote.
+--echo # Note: This results in an error.
+
+CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
+
+--echo # remove the already existing .CSV file if any
+--remove_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # create the .CSV file that contains the hard-coded data used in
+--echo # testing
+--write_file $MYSQLD_DATADIR/test/t1.CSV
+1,string with only ending quotes"
+EOF
+--cat_file $MYSQLD_DATADIR/test/t1.CSV
+
+--echo # select from the table in which the data has been filled in using
+--echo # the hard-coded .CSV file
+--error ER_CRASHED_ON_USAGE
+SELECT * FROM t1;
+
+DROP TABLE t1;
 --echo End of 5.1 tests

=== modified file 'storage/csv/ha_tina.cc'
--- a/storage/csv/ha_tina.cc	2009-10-23 11:22:21 +0000
+++ b/storage/csv/ha_tina.cc	2009-12-03 11:48:43 +0000
@@ -614,6 +614,33 @@ int ha_tina::find_current_row(uchar *buf
 
   memset(buf, 0, table->s->null_bytes);
 
+  /*
+    Parse the line obtained using the following algorithm
+   
+    BEGIN
+      1) Store the EOL (end of line) for the current row
+      2) Until all the fields in the current query have not been 
+         filled
+         2.1) If the current character is a quote
+              2.1.1) Until EOL has not been reached
+                     a) If end of current field is reached, move
+                        to next field and jump to step 2.3
+                     b) If current character is a \\ handle
+                        \\n, \\r, \\, \\"
+                     c) else append the current character into the buffer
+                        before checking that EOL has not been reached.
+          2.2) If the current character does not begin with a quote
+               2.2.1) Until EOL has not been reached
+                      a) If the end of field has been reached move to the
+                         next field and jump to step 2.3
+                      b) If current character begins with \\ handle
+                        \\n, \\r, \\, \\"
+                      c) else append the current character into the buffer
+                         before checking that EOL has not been reached.
+          2.3) Store the current field value and jump to 2)
+    TERMINATE
+  */  
+
   for (Field **field=table->field ; *field ; field++)
   {
     char curr_char;
@@ -622,19 +649,23 @@ int ha_tina::find_current_row(uchar *buf
     if (curr_offset >= end_offset)
       goto err;
     curr_char= file_buff->get_value(curr_offset);
+    /* Handle the case where the first character is a quote */
     if (curr_char == '"')
     {
-      curr_offset++; // Incrementpast the first quote
+      /* Increment past the first quote */
+      curr_offset++;
 
-      for(; curr_offset < end_offset; curr_offset++)
+      /* Loop through the row to extract the values for the current field */
+      for ( ; curr_offset < end_offset; curr_offset++)
       {
         curr_char= file_buff->get_value(curr_offset);
-        // Need to convert line feeds!
+        /* check for end of the current field */
         if (curr_char == '"' &&
             (curr_offset == end_offset - 1 ||
              file_buff->get_value(curr_offset + 1) == ','))
         {
-          curr_offset+= 2; // Move past the , and the "
+          /* Move past the , and the " */
+          curr_offset+= 2;
           break;
         }
         if (curr_char == '\\' && curr_offset != (end_offset - 1))
@@ -656,7 +687,7 @@ int ha_tina::find_current_row(uchar *buf
         else // ordinary symbol
         {
           /*
-            We are at final symbol and no last quote was found =>
+            If we are at final symbol and no last quote was found =>
             we are working with a damaged file.
           */
           if (curr_offset == end_offset - 1)
@@ -667,15 +698,41 @@ int ha_tina::find_current_row(uchar *buf
     }
     else 
     {
-      for(; curr_offset < end_offset; curr_offset++)
+      for ( ; curr_offset < end_offset; curr_offset++)
       {
         curr_char= file_buff->get_value(curr_offset);
+        /* Move past the ,*/
         if (curr_char == ',')
         {
-          curr_offset++;       // Skip the ,
+          curr_offset++;
           break;
         }
-        buffer.append(curr_char);
+        if (curr_char == '\\' && curr_offset != (end_offset - 1))
+        {
+          curr_offset++;
+          curr_char= file_buff->get_value(curr_offset);
+          if (curr_char == 'r')
+            buffer.append('\r');
+          else if (curr_char == 'n' )
+            buffer.append('\n');
+          else if (curr_char == '\\' || curr_char == '"')
+            buffer.append(curr_char);
+          else  /* This could only happed with an externally created file */
+          {
+            buffer.append('\\');
+            buffer.append(curr_char);
+          }
+        }
+        else
+        {
+          /*
+             We are at the final symbol and a quote was found for the
+             unquoted field => We are working with a damaged field.
+          */
+          if (curr_offset == end_offset - 1 && curr_char == '"')
+            goto err;
+          buffer.append(curr_char);
+        }
       }
     }
 


Attachment: [text/bzr-bundle] bzr/v.narayanan@sun.com-20091203114843-bama7cf5g3q27cd2.bundle
Thread
bzr commit into mysql-5.6-next-mr branch (v.narayanan:2926) Bug#40814V Narayanan3 Dec
  • Re: bzr commit into mysql-5.6-next-mr branch (v.narayanan:2926)Bug#40814Sergey Vojtovich8 Dec