MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:<gshchepa Date:November 16 2007 7:44pm
Subject:bk commit into 5.0 tree (gshchepa:1.2566) BUG#31677
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of uchum. When uchum does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-11-16 23:44:55+04:00, gshchepa@stripped +4 -0
  Fixed bug #31677.
  1. Multi-byte characters was never escaped, because they
     was always skipped before escaping process.
  
  2. 8bit escape characters, termination and enclosed characters
     were silently ignored by SELECT INTO query, but LOAD DATA INFILE
     algorithm is 8bit-clear, so data was corrupted during 
     encoding.

  mysql-test/r/outfile_loaddata.result@stripped, 2007-11-16 23:44:50+04:00, gshchepa@stripped +59 -0
    Added test case for bug #31677.

  mysql-test/t/outfile_loaddata.test@stripped, 2007-11-16 23:44:49+04:00, gshchepa@stripped +89 -0
    Added test case for bug #31677.

  sql/sql_class.cc@stripped, 2007-11-16 23:44:41+04:00, gshchepa@stripped +21 -19
    Fixed bug #31677.
    1. Multi-byte skipping has been moved to the end of data
       escaping cycle to take into account first bytes of
       multi-byte characters.
    2. SELECT INTO OUTFILE encoding was not 8bit clear, it
       has been fixed for a symmetry with the LOAD DATA INFILE
       decoding algorithm.

  sql/sql_load.cc@stripped, 2007-11-16 23:44:47+04:00, gshchepa@stripped +27 -26
    Fixed bug #31677.
    Multi-byte skipping has been moved to the end of decoding
    cycle to un-escape first bytes of multi-byte characters.

diff -Nrup a/mysql-test/r/outfile_loaddata.result b/mysql-test/r/outfile_loaddata.result
--- a/mysql-test/r/outfile_loaddata.result	2007-10-23 16:15:18 +05:00
+++ b/mysql-test/r/outfile_loaddata.result	2007-11-16 23:44:50 +04:00
@@ -82,4 +82,63 @@ c1	c2
 -r-	=raker=
 DROP TABLE t2;
 DROP TABLE t1;
+#
+# Bug#31677: SELECT INTO OUTFILE never escapes multibyte character
+#
+SET NAMES utf8;
+SET character_set_database= utf8;
+CREATE TABLE t1 (c1 VARCHAR(256), c2 VARCHAR(256)) CHARACTER SET utf8;
+INSERT INTO t1 VALUES ('Á', 'Á');
+SELECT * FROM t1;
+c1	c2
+Á	Á
+# No FIELDS ENCLOSED/TERMINATED:
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt'  FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'));
+HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'))
+C38109C3810A
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 ;
+SELECT * FROM t1;
+c1	c2
+Á	Á
+# FIELDS ENCLOSED BY 0xC3
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FIELDS ENCLOSED BY 0xC3 FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'));
+HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'))
+C35CC381C309C35CC381C30A
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3;
+SELECT * FROM t1;
+c1	c2
+Á	Á
+# FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81:
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81 FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'));
+HEX(LOAD_FILE('MYSQLTEST_VARDIR/tmp/bug31677.txt'))
+C35CC381C381C35CC381C30A
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81;
+SELECT * FROM t1;
+c1	c2
+Á	Á
+DROP TABLE t1;
+SET character_set_database= default;
+SET NAMES default;
+# Single-byte 8bit chars enclosed by 8bit char (FIELDS ENCLOSED BY 0xC3):
+CREATE TABLE t1 (c1 VARCHAR(256));
+INSERT INTO t1 VALUES (0xC3);
+SELECT HEX(c1) FROM t1;
+HEX(c1)
+C3
+SELECT * INTO OUTFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' FIELDS ENCLOSED BY 0xC3 FROM t1;
+TRUNCATE t1;
+SELECT HEX(LOAD_FILE('file'));
+HEX(LOAD_FILE('file'))
+NULL
+LOAD DATA INFILE 'MYSQLTEST_VARDIR/tmp/bug31677.txt' INTO TABLE t1 FIELDS ENCLOSED BY 0xC3;
+SELECT HEX(c1) FROM t1;
+HEX(c1)
+C3
+DROP TABLE t1;
 # End of 5.0 tests.
diff -Nrup a/mysql-test/t/outfile_loaddata.test b/mysql-test/t/outfile_loaddata.test
--- a/mysql-test/t/outfile_loaddata.test	2007-10-23 16:15:18 +05:00
+++ b/mysql-test/t/outfile_loaddata.test	2007-11-16 23:44:49 +04:00
@@ -86,4 +86,93 @@ DROP TABLE t2;
 
 DROP TABLE t1;
 
+--echo #
+--echo # Bug#31677: SELECT INTO OUTFILE never escapes multibyte character
+--echo #
+
+SET NAMES utf8;
+SET character_set_database= utf8;
+
+CREATE TABLE t1 (c1 VARCHAR(256), c2 VARCHAR(256)) CHARACTER SET utf8;
+INSERT INTO t1 VALUES ('Á', 'Á');
+SELECT * FROM t1;
+
+--let $file=$MYSQLTEST_VARDIR/tmp/bug31677.txt
+
+--let $clauses=
+--echo # No FIELDS ENCLOSED/TERMINATED:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('$file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT * FROM t1;
+
+--remove_file $file
+
+
+--let $clauses=FIELDS ENCLOSED BY 0xC3
+--echo # $clauses
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('$file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT * FROM t1;
+
+--remove_file $file
+
+
+--let $clauses=FIELDS ENCLOSED BY 0xC3 TERMINATED BY 0x81
+--echo # $clauses:
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('$file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT * FROM t1;
+
+--remove_file $file
+
+DROP TABLE t1;
+SET character_set_database= default;
+SET NAMES default;
+
+
+--let $clauses=FIELDS ENCLOSED BY 0xC3
+--echo # Single-byte 8bit chars enclosed by 8bit char ($clauses):
+
+CREATE TABLE t1 (c1 VARCHAR(256));
+INSERT INTO t1 VALUES (0xC3);
+SELECT HEX(c1) FROM t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT * INTO OUTFILE '$file' $clauses FROM t1
+TRUNCATE t1;
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval SELECT HEX(LOAD_FILE('file'))
+
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval LOAD DATA INFILE '$file' INTO TABLE t1 $clauses
+SELECT HEX(c1) FROM t1;
+
+--remove_file $file
+DROP TABLE t1;
+
 --echo # End of 5.0 tests.
diff -Nrup a/sql/sql_class.cc b/sql/sql_class.cc
--- a/sql/sql_class.cc	2007-11-10 23:44:15 +04:00
+++ b/sql/sql_class.cc	2007-11-16 23:44:41 +04:00
@@ -1219,16 +1219,18 @@ select_export::prepare(List<Item> &list,
     }
   }
   field_term_length=exchange->field_term->length();
-  field_term_char= field_term_length ? (*exchange->field_term)[0] : INT_MAX;
+  field_term_char= field_term_length ?
+                   (int) (uchar) (*exchange->field_term)[0] : INT_MAX;
   if (!exchange->line_term->length())
     exchange->line_term=exchange->field_term;	// Use this if it exists
-  field_sep_char= (exchange->enclosed->length() ? (*exchange->enclosed)[0] :
-                   field_term_char);
-  escape_char=	(exchange->escaped->length() ? (*exchange->escaped)[0] : -1);
+  field_sep_char= (exchange->enclosed->length() ?
+                  (int) (uchar) (*exchange->enclosed)[0] : field_term_char);
+  escape_char=	(exchange->escaped->length() ?
+                (int) (uchar) (*exchange->escaped)[0] : -1);
   is_ambiguous_field_sep= test(strchr(ESCAPE_CHARS, field_sep_char));
   is_unsafe_field_sep= test(strchr(NUMERIC_CHARS, field_sep_char));
   line_sep_char= (exchange->line_term->length() ?
-		  (*exchange->line_term)[0] : INT_MAX);
+                 (int) (uchar) (*exchange->line_term)[0] : INT_MAX);
   if (!field_term_length)
     exchange->opt_enclosed=0;
   if (!exchange->enclosed->length())
@@ -1332,18 +1334,6 @@ bool select_export::send_data(List<Item>
 	     pos != end ;
 	     pos++)
 	{
-#ifdef USE_MB
-	  if (use_mb(res_charset))
-	  {
-	    int l;
-	    if ((l=my_ismbchar(res_charset, pos, end)))
-	    {
-	      pos += l-1;
-	      continue;
-	    }
-	  }
-#endif
-
           /*
             Special case when dumping BINARY/VARBINARY/BLOB values
             for the clients with character sets big5, cp932, gbk and sjis,
@@ -1385,10 +1375,11 @@ bool select_export::send_data(List<Item>
                Don't escape field_term_char by doubling - doubling is only
                valid for ENCLOSED BY characters:
               */
-              (enclosed || !is_ambiguous_field_term || *pos != field_term_char))
+              (enclosed || !is_ambiguous_field_term ||
+               (int) (uchar) *pos != field_term_char))
           {
 	    char tmp_buff[2];
-            tmp_buff[0]= ((int) *pos == field_sep_char &&
+            tmp_buff[0]= ((int) (uchar) *pos == field_sep_char &&
                           is_ambiguous_field_sep) ?
                           field_sep_char : escape_char;
 	    tmp_buff[1]= *pos ? *pos : '0';
@@ -1397,6 +1388,17 @@ bool select_export::send_data(List<Item>
 	      goto err;
 	    start=pos+1;
 	  }
+#ifdef USE_MB
+          else if (use_mb(res_charset))
+	  {
+	    int l;
+	    if ((l=my_ismbchar(res_charset, pos, end)))
+	    {
+	      pos += l-1;
+	      continue;
+	    }
+	  }
+#endif
 	}
 	if (my_b_write(&cache,(byte*) start,(uint) (pos-start)))
 	  goto err;
diff -Nrup a/sql/sql_load.cc b/sql/sql_load.cc
--- a/sql/sql_load.cc	2007-07-30 20:27:30 +05:00
+++ b/sql/sql_load.cc	2007-11-16 23:44:47 +04:00
@@ -1016,31 +1016,7 @@ int READ_INFO::read_field()
   {
     while ( to < end_of_buff)
     {
-      chr = GET;
-#ifdef USE_MB
-      if ((my_mbcharlen(read_charset, chr) > 1) &&
-          to+my_mbcharlen(read_charset, chr) <= end_of_buff)
-      {
-	  uchar* p = (uchar*)to;
-	  *to++ = chr;
-	  int ml = my_mbcharlen(read_charset, chr);
-	  int i;
-	  for (i=1; i<ml; i++) {
-	      chr = GET;
-	      if (chr == my_b_EOF)
-		  goto found_eof;
-	      *to++ = chr;
-	  }
-	  if (my_ismbchar(read_charset,
-                          (const char *)p,
-                          (const char *)to))
-	    continue;
-	  for (i=0; i<ml; i++)
-	    PUSH((uchar) *--to);
-	  chr = GET;
-      }
-#endif
-      if (chr == my_b_EOF)
+      if ((chr= GET) == my_b_EOF)
 	goto found_eof;
       if (chr == escape_char)
       {
@@ -1123,7 +1099,32 @@ int READ_INFO::read_field()
 	  return 0;
 	}
       }
-      *to++ = (byte) chr;
+#ifdef USE_MB
+      if ((my_mbcharlen(read_charset, chr) > 1) &&
+          to+my_mbcharlen(read_charset, chr) <= end_of_buff)
+      {
+	  uchar* p = (uchar*)to;
+	  *to++ = chr;
+	  int ml = my_mbcharlen(read_charset, chr);
+	  int i;
+	  for (i=1; i<ml; i++) {
+	      chr = GET;
+	      if (chr == my_b_EOF)
+		  goto found_eof;
+	      *to++ = chr;
+	  }
+	  if (my_ismbchar(read_charset,
+                          (const char *)p,
+                          (const char *)to))
+	    continue;
+	  for (i=0; i<ml; i++)
+	    PUSH((uchar) *--to);
+      }
+      else
+#endif
+      {
+        *to++ = (byte) chr;
+      }
     }
     /*
     ** We come here if buffer is too small. Enlarge it and continue
Thread
bk commit into 5.0 tree (gshchepa:1.2566) BUG#31677gshchepa16 Nov
  • Re: bk commit into 5.0 tree (gshchepa:1.2566) BUG#31677Sergei Golubchik19 Nov
    • Re: bk commit into 5.0 tree (gshchepa:1.2566) BUG#31677Konstantin Osipov23 Nov
      • Re: bk commit into 5.0 tree (gshchepa:1.2566) BUG#31677Gleb Shchepa23 Nov
      • Re: bk commit into 5.0 tree (gshchepa:1.2566) BUG#31677Sergei Golubchik23 Nov