MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:V Narayanan Date:November 18 2008 6:01am
Subject:bzr commit into mysql-5.0-bugteam branch (v.narayanan:2718) Bug#39616
View as plain text  
#At file:///home/narayanan/Work/mysql/W-M/mysql-5.0-bugteam-39616/

 2718 V Narayanan	2008-11-18
      Bug#39616: Missing quotes from .CSV crashes server
            
      When a CSV file contained comma separated elements 
      that were not enclosed in quotes, it was causing the
      mysql server to crash.
            
      The old algorithm that parsed the content of a row in
      mysql 5.0 was assuming that the values of the fields
      in a .CSV file will be enclosed in quotes and will be
      separated by commas.
      
      This was causing the old algorithm to fail when the 
      content of the file resembled the following
      3,"sans quotes"
      The CSV engine that is part of mysql 5.0 was expecting
      the above to be
      "3","sans quotes"
            
      The above is just one example of where the engine was
      failing for what would be recognized as a valid .CSV 
      file content otherwise.
            
      The proposed fix changes the old algorithm being used
      to parse rows from the .CSV file to handle two separate
      cases
      
      1) When the current field of the row is enclosed in quotes
      2) When the current field of the row is not enclosed in 
         quotes
modified:
  mysql-test/r/csv.result
  mysql-test/t/csv.test
  sql/examples/ha_tina.cc

per-file messages:
  mysql-test/r/csv.result
    Contains additional test output corresponding to the new 
    tests added.
  mysql-test/t/csv.test
    Contains additional tests for testing the behaviour of the CSV 
    storage engine when the fields are not enclosed in quotes.
  sql/examples/ha_tina.cc
    Changes the parsing logic of the rows in a CSV file to account for
    fields that are not enclosed in quotes.
=== modified file 'mysql-test/r/csv.result'
--- a/mysql-test/r/csv.result	2007-10-26 00:23:12 +0000
+++ b/mysql-test/r/csv.result	2008-11-18 06:01:03 +0000
@@ -5071,4 +5071,19 @@ select * from t1;
 a
 foo
 drop table t1;
+create table bug39616_1(id int NOT NULL, d varchar(50) NOT NULL) ENGINE=csv;
+select * from bug39616_1;
+id	d
+1	integer sans quotes
+1	string sans quotes
+1	string end quotes"
+1	quotes"in between" strings 
+1	Integer with quote and string with no quote
+1	escape sequence 
+ " \ 
 \a within quotes
+drop table bug39616_1;
+create table bug39616_1(id int NOT NULL, d varchar(50) NOT NULL) ENGINE=csv;
+select * from bug39616_1;
+id	d
+drop table bug39616_1;
 End of 5.0 tests

=== modified file 'mysql-test/t/csv.test'
--- a/mysql-test/t/csv.test	2007-10-26 00:23:12 +0000
+++ b/mysql-test/t/csv.test	2008-11-18 06:01:03 +0000
@@ -1460,4 +1460,47 @@ insert into t1 values();
 select * from t1;
 drop table t1;
 
+#
+# Bug #39616 Missing quotes from .CSV crashes server
+#
+# Editing the .CSV file and leaving out quotes from around an integer field 
+# crashes the server.
+#
+
+#
+# Test for the integers and strings enclosed in quotes, not enclosed in quotes,
+# \X  characters.
+#
+create table bug39616_1(id int NOT NULL, d varchar(50) NOT NULL) ENGINE=csv;
+
+--remove_file $MYSQLTEST_VARDIR/master-data/test/bug39616_1.CSV
+--write_file $MYSQLTEST_VARDIR/master-data/test/bug39616_1.CSV
+1,"integer sans quotes"
+1,string sans quotes
+1,string end quotes"
+1,quotes"in between" strings 
+"1",Integer with quote and string with no quote
+1,"escape sequence \n \" \\ \r \a within quotes"
+EOF
+
+select * from bug39616_1;
+
+drop table bug39616_1; 
+
+#
+# Test for he case when a field begins with a quote, but does not end in a
+# quote.
+# Note: This results in an empty set.
+#
+create table bug39616_1(id int NOT NULL, d varchar(50) NOT NULL) ENGINE=csv;
+
+--remove_file $MYSQLTEST_VARDIR/master-data/test/bug39616_1.CSV
+--write_file $MYSQLTEST_VARDIR/master-data/test/bug39616_1.CSV
+1,"string only at the beginning quotes
+EOF
+
+select * from bug39616_1;
+
+drop table bug39616_1; 
+
 --echo End of 5.0 tests

=== modified file 'sql/examples/ha_tina.cc'
--- a/sql/examples/ha_tina.cc	2008-03-29 15:50:46 +0000
+++ b/sql/examples/ha_tina.cc	2008-11-18 06:01:03 +0000
@@ -416,37 +416,96 @@ int ha_tina::find_current_row(byte *buf)
   if ((end_ptr=  find_eoln(share->mapped_file, current_position, share->file_stat.st_size)) == 0)
     DBUG_RETURN(HA_ERR_END_OF_FILE);
 
+  /*
+    Parse the line obtained using the following algorithm
+   
+    BEGIN
+      1) Store the EOL (end of line) for the current row
+      2) Until all the fields in the current query have not been 
+         filled
+         2.1) If the current character begins with a quote
+              2.1.1) Until EOL has not been reached
+                     a) If end of current field is reached, move
+                        to next field and jump to step 2.3
+                     b) If current character begins with \\ handle
+                        \\n, \\r, \\, \\"
+                     c) else append the current character into the buffer
+                        before checking that EOL has not been reached.
+          2.2) If the current character does not begin with a quote
+               2.2.1) Until EOL has not been reached
+                      a) If the end of field has been reached move to the
+                         next field and jump to step 2.3
+                      b) append the current character into the buffer
+          2.3) Store the current field value and jump to 2)
+    TERMINATE
+   */
+
   for (Field **field=table->field ; *field ; field++)
   {
     buffer.length(0);
-    mapped_ptr++; // Increment past the first quote
-    for(;mapped_ptr != end_ptr; mapped_ptr++)
+    /* Handle the case where the first character begins with a quote */
+    if (*mapped_ptr == '"')
     {
-      //Need to convert line feeds!
-      if (*mapped_ptr == '"' && 
-          (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) || (mapped_ptr == end_ptr -1 )))
-      {
-        mapped_ptr += 2; // Move past the , and the "
-        break;
-      } 
-      if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) 
+      /* Increment past the first quote */
+      mapped_ptr++;
+      /* Loop through the row to extract the values for the current field */
+      for(; mapped_ptr != end_ptr; mapped_ptr++)
       {
-        mapped_ptr++;
-        if (*mapped_ptr == 'r')
-          buffer.append('\r');
-        else if (*mapped_ptr == 'n' )
-          buffer.append('\n');
-        else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"'))
-          buffer.append(*mapped_ptr);
-        else  /* This could only happed with an externally created file */
+        /* check for end of the current field */
+        if (*mapped_ptr == '"' && 
+            (mapped_ptr[1] == ',' || mapped_ptr == end_ptr -1 ))
+        {
+          /* Move past the , and the " */
+          mapped_ptr += 2;
+          break;
+        } 
+        if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1)) 
+        {
+          mapped_ptr++;
+          if (*mapped_ptr == 'r')
+            buffer.append('\r');
+          else if (*mapped_ptr == 'n' )
+            buffer.append('\n');
+          else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"'))
+            buffer.append(*mapped_ptr);
+          else  /* This could only happed with an externally created file */
+          {
+            buffer.append('\\');
+            buffer.append(*mapped_ptr);
+          }
+        } 
+        else
         {
-          buffer.append('\\');
+          /*
+           If no last quote was found, but the end of row has been reached
+           it implies that there has been error.
+          */
+          if (mapped_ptr == end_ptr -1)
+            DBUG_RETURN(HA_ERR_END_OF_FILE);
+          /* Store current character in the buffer for the field */
           buffer.append(*mapped_ptr);
         }
-      } 
-      else
+      }
+    }
+    else
+    {
+      /* Handle the case where the current row does not start with quotes */
+        
+      /* Loop through the row to extract the values for the current field */
+      for (; mapped_ptr != end_ptr; mapped_ptr++)
+      {
+        /* check for end of current field */
+        if (*mapped_ptr == ',')
+        {
+          /* Increment past the current comma */
+          mapped_ptr++;
+          break;
+        }
+        /* store the current character in the buffer for the field */
         buffer.append(*mapped_ptr);
+      }
     }
+    /* Store the field value from the buffer */
     (*field)->store(buffer.ptr(), buffer.length(), buffer.charset());
   }
   next_position= (end_ptr - share->mapped_file)+1;

Thread
bzr commit into mysql-5.0-bugteam branch (v.narayanan:2718) Bug#39616V Narayanan18 Nov
  • Re: bzr commit into mysql-5.0-bugteam branch (v.narayanan:2718)Bug#39616Ingo Strüwing18 Nov