List:Commits« Previous MessageNext Message »
From:bar Date:July 19 2007 12:27pm
Subject:bk commit into 5.2 tree (bar:1.2537) BUG#29752
View as plain text  
Below is the list of changes that have just been committed into a local
5.2 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-07-19 15:27:49+05:00, bar@stripped +3 -0
  Bug#29752 Linefeeds break LOAD XML INFILE
  Problem: Linefeed, newline and tab characters where
  not considered as separators. Only space character
  worked as separator.
  Fix: convert linefeed, newline and tab characters to space
  when loading the next character from the input stream.
  This is safe, because these characters are equal from the
  point of view of XML.

  mysql-test/r/loadxml.result@stripped, 2007-07-19 15:27:47+05:00, bar@stripped +16 -0
    Adding tests

  mysql-test/std_data/loadxml.dat@stripped, 2007-07-19 15:27:47+05:00, bar@stripped +20 -1
    Adding tests

  sql/sql_load.cc@stripped, 2007-07-19 15:27:47+05:00, bar@stripped +39 -20
    When loading from the input stream, convert all newline,
    linefeed and tab characters to spaces.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.myoffice.izhnet.ru
# Root:	/home/bar/mysql-work/mysql-5.2.b28125

--- 1.130/sql/sql_load.cc	2007-06-27 12:04:15 +05:00
+++ 1.131/sql/sql_load.cc	2007-07-19 15:27:47 +05:00
@@ -1549,6 +1549,23 @@ my_xml_entity_to_char(const char *name, 
 }
 
 
+/**
+  @brief Convert newline, linefeed, tab to space
+  
+  @param chr    character
+  
+  @details According to the "XML 1.0" standard,
+           only space (#x20) characters, carriage returns,
+           line feeds or tabs are considered as spaces.
+           Convert all of them to space (#x20) for parsing simplicity.
+*/
+static int
+my_tospace(int chr)
+{
+  return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
+}
+
+
 /*
   Read an xml value: handle multibyte and xml escape
 */
@@ -1557,7 +1574,7 @@ int READ_INFO::read_value(int delim, Str
   int chr;
   String tmp;
 
-  for (chr= GET; chr != delim && chr != my_b_EOF; )
+  for (chr= my_tospace(GET); chr != delim && chr != my_b_EOF; )
   {
 #ifdef USE_MB
     if (my_mbcharlen(read_charset, chr) > 1)
@@ -1567,18 +1584,20 @@ int READ_INFO::read_value(int delim, Str
       for (i= 1; i < ml; i++) 
       {
         val->append(chr);
-        chr= GET;
+        /*
+          Don't use my_tospace() in the middle of a multi-byte character
+          TODO: check that the multi-byte sequence is valid.
+        */
+        chr= GET; 
         if (chr == my_b_EOF)
           return chr;
       }
     }
 #endif
-    if(my_isspace(read_charset, chr)) /* convert newline, tab etc to space */
-      val->append(' ');
-    else if(chr == '&')
+    if(chr == '&')
     {
       tmp.length(0);
-      for (chr= GET ; chr != ';' ; chr= GET)
+      for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
       {
         if (chr == my_b_EOF)
           return chr;
@@ -1595,7 +1614,7 @@ int READ_INFO::read_value(int delim, Str
     }
     else
       val->append(chr);
-    chr= GET; 
+    chr= my_tospace(GET);
   }            
   return chr;
 }
@@ -1618,12 +1637,12 @@ int READ_INFO::read_xml()
   attribute.length(0);
   value.length(0);
   
-  for (chr= GET; chr != my_b_EOF ; )
+  for (chr= my_tospace(GET); chr != my_b_EOF ; )
   {
     switch(chr){
     case '<':  /* read tag */
         /* TODO: check if this is a comment <!-- comment -->  */
-      chr= GET;
+      chr= my_tospace(GET);
       if(chr == '!')
       {
         chr2= GET;
@@ -1633,7 +1652,7 @@ int READ_INFO::read_xml()
         {
           chr2= 0;
           chr3= 0;
-          chr= GET;
+          chr= my_tospace(GET);
           
           while(chr != '>' || chr2 != '-' || chr3 != '-')
           {
@@ -1647,7 +1666,7 @@ int READ_INFO::read_xml()
               chr2= 0;
               chr3= 0;
             }
-            chr= GET;
+            chr= my_tospace(GET);
             if (chr == my_b_EOF)
               goto found_eof;
           }
@@ -1660,7 +1679,7 @@ int READ_INFO::read_xml()
       {
         if(chr != delim) /* fix for the '<field name =' format */
           tag.append(chr);
-        chr= GET;
+        chr= my_tospace(GET);
       }
       
       // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term 
@@ -1685,7 +1704,7 @@ int READ_INFO::read_xml()
       
     case ' ': /* read attribute */
       while(chr == ' ')  /* skip blanks */
-        chr= GET;
+        chr= my_tospace(GET);
       
       if(!in_tag)
         break;
@@ -1693,7 +1712,7 @@ int READ_INFO::read_xml()
       while(chr != '=' && chr != '/' && chr != '>' && chr !=
my_b_EOF)
       {
         attribute.append(chr);
-        chr= GET;
+        chr= my_tospace(GET);
       }
       break;
       
@@ -1717,13 +1736,13 @@ int READ_INFO::read_xml()
       
     case '/': /* close tag */
       level--;
-      chr= GET;
+      chr= my_tospace(GET);
       if(chr != '>')   /* if this is an empty tag <tag   /> */
         tag.length(0); /* we should keep tag value          */
       while(chr != '>' && chr != my_b_EOF)
       {
         tag.append(chr);
-        chr= GET;
+        chr= my_tospace(GET);
       }
       
       if((tag.length() == line_term_length -2) &&
@@ -1733,7 +1752,7 @@ int READ_INFO::read_xml()
                                  level, tag.c_ptr_safe()));
          DBUG_RETURN(0); //normal return
       }
-      chr= GET;
+      chr= my_tospace(GET);
       break;   
       
     case '=': /* attribute name end - read the value */
@@ -1745,7 +1764,7 @@ int READ_INFO::read_xml()
           this is format <field name="xx">xx</field>
           where actual fieldname is in attribute
         */
-        delim= GET;
+        delim= my_tospace(GET);
         tag.length(0);
         attribute.length(0);
         chr= '<'; /* we pretend that it is a tag */
@@ -1779,11 +1798,11 @@ int READ_INFO::read_xml()
       attribute.length(0);
       value.length(0);
       if (chr != ' ')
-        chr= GET;
+        chr= my_tospace(GET);
       break;
     
     default:
-      chr= GET;  
+      chr= my_tospace(GET);
     } /* end switch */
   } /* end while */
   

--- 1.3/mysql-test/r/loadxml.result	2007-06-27 15:20:21 +05:00
+++ 1.4/mysql-test/r/loadxml.result	2007-07-19 15:27:47 +05:00
@@ -12,6 +12,10 @@ a	b
 111	b111
 112	b112 & < > " ' &unknown; -- check entities
 212	b212
+213	b213
+214	b214
+215	b215
+216	&bb b;
 delete from t1;
 -- Load a static XML file with 'IGNORE num ROWS'
 load xml infile '../std_data_ln/loadxml.dat' into table t1
@@ -21,6 +25,10 @@ a	b
 111	b111
 112	b112 & < > " ' &unknown; -- check entities
 212	b212
+213	b213
+214	b214
+215	b215
+216	&bb b;
 -- Check 'mysqldump --xml' + 'LOAD XML' round trip
 delete from t1;
 load xml infile 'MYSQLTEST_VARDIR/loadxml-dump.xml' into table t1 rows identified by
'<row>';;
@@ -29,6 +37,10 @@ a	b
 111	b111
 112	b112 & < > " ' &unknown; -- check entities
 212	b212
+213	b213
+214	b214
+215	b215
+216	&bb b;
 --Check that default row tag is '<row>
 delete from t1;
 load xml infile 'MYSQLTEST_VARDIR/loadxml-dump.xml' into table t1;;
@@ -37,6 +49,10 @@ a	b
 111	b111
 112	b112 & < > " ' &unknown; -- check entities
 212	b212
+213	b213
+214	b214
+215	b215
+216	&bb b;
 -- Check that 'xml' is not a keyword
 select 1 as xml;
 xml

--- 1.1/mysql-test/std_data/loadxml.dat	2007-06-27 12:04:16 +05:00
+++ 1.2/mysql-test/std_data/loadxml.dat	2007-07-19 15:27:47 +05:00
@@ -37,7 +37,26 @@
 
 
 	<!-- Check field values in attributes -->
-	<row a=212 b="b212"</row>
+	<row a=212 b="b212"></row>
+
+	<!-- Bug#29752 Linefeeds break LOAD XML INFILE -->
+	<!-- Check varios combinations of TAB and NL   -->
+
+	<row
+	a=213 b="b213">
+	</row>
+
+	<row
+	a=214
+	b="b214">
+	</row>
+
+	<row a=215	b="b215"></row>
+
+	<row a=216 b="&bb
+b;"></row>
+
+	<!-- End of bug#29752 -->
 
 	</table_data>
 </database>
Thread
bk commit into 5.2 tree (bar:1.2537) BUG#29752bar19 Jul