List:Commits« Previous MessageNext Message »
From:bar Date:June 4 2007 1:06pm
Subject:bk commit into 5.0 tree (bar:1.2513) BUG#26711
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of bar. When bar does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-04 18:06:39+05:00, bar@stripped +6 -0
  Bug#26711 "Binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load"
  Problem: "SELECT INTO OUTFILE" created incorrect dumps for BLOBs,
  so "LOAD DATA" later incorrectly interpreted 0x5C as the second
  byte of a multi-byte sequence, instead of escape character.
  Fix: adding escaping of multi-byte heads.

  mysql-test/r/ctype_big5.result@stripped, 2007-06-04 18:06:37+05:00, bar@stripped +11 -0
    Adding test case

  mysql-test/t/ctype_big5.test@stripped, 2007-06-04 18:06:37+05:00, bar@stripped +16 -0
    Adding test case

  sql/sql_class.cc@stripped, 2007-06-04 18:06:37+05:00, bar@stripped +41 -3
    Add escape characters before multi-byte heads.

  strings/ctype-big5.c@stripped, 2007-06-04 18:06:37+05:00, bar@stripped +2 -2
    Flagging character set as dangerous for escaping.

  strings/ctype-gbk.c@stripped, 2007-06-04 18:06:37+05:00, bar@stripped +2 -2
    Flagging character set as dangerous for escaping.

  strings/ctype-sjis.c@stripped, 2007-06-04 18:06:37+05:00, bar@stripped +2 -2
    Flagging character set as dangerous for escaping.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	bar
# Host:	bar.myoffice.izhnet.ru
# Root:	/home/bar/mysql-work/mysql-5.0.b26711

--- 1.270/sql/sql_class.cc	2007-04-24 20:25:50 +05:00
+++ 1.271/sql/sql_class.cc	2007-06-04 18:06:37 +05:00
@@ -1221,6 +1221,10 @@
 }
 
 
+#define NEED_ESCAPING(x) ((int) (x) == escape_char || \
+                          (int) (x) == field_sep_char || \
+                          (int) (x) == line_sep_char  || !(x))
+
 bool select_export::send_data(List<Item> &items)
 {
 
@@ -1297,10 +1301,44 @@
 	      continue;
 	    }
 	  }
-#endif
-	  if ((int) *pos == escape_char || (int) *pos == field_sep_char ||
-	      (int) *pos == line_sep_char || !*pos)
+	  else if (res_charset == &my_charset_bin)
 	  {
+	    /*
+              Special case when dumping BINARY/VARBINARY/BLOB values
+              for the clients with character sets big5, cp932, gbk and sjis,
+              which can have the escape character (0x5C "\" by default)
+              as the second byte of a multi-byte sequence.
+              
+              If
+              - pos[0] is a valid multi-byte head (e.g 0xEE) and
+              - pos[1] is 0x00, which will be escaped as "\0",
+              
+              then we'll get "0xEE + 0x5C + 0x30" in the output file.
+              
+              If this file is later loaded using this sequence of commands:
+              
+              mysql> create table t1 (a varchar(128)) character set big5;
+              mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1;
+              
+              then 0x5C will be misinterpreted as the second byte
+              of a multi-byte character "0xEE + 0x5C", instead of
+              escape character for 0x00.
+              
+              To avoid this confusion, we'll escape the multi-byte
+              head character too, so the sequence "0xEE + 0x00" will be
+              dumped as "0x5C + 0xEE + 0x5C + 0x30".
+            */
+            CHARSET_INFO *character_set_client= thd->variables.
+                                                character_set_client;
+            if (character_set_client->escape_with_backslash_is_dangerous &&
+                my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
+                pos + 1 < end && NEED_ESCAPING(pos[1]))
+              goto escape_me;
+          }
+#endif
+          if (NEED_ESCAPING(*pos))
+          {
+escape_me:
 	    char tmp_buff[2];
 	    tmp_buff[0]= escape_char;
 	    tmp_buff[1]= *pos ? *pos : '0';

--- 1.91/strings/ctype-big5.c	2006-12-23 23:04:29 +04:00
+++ 1.92/strings/ctype-big5.c	2007-06-04 18:06:37 +05:00
@@ -6400,7 +6400,7 @@
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
-    0,                  /* escape_with_backslash_is_dangerous */
+    1,                  /* escape_with_backslash_is_dangerous */
     &my_charset_big5_handler,
     &my_collation_big5_chinese_ci_handler
 };
@@ -6433,7 +6433,7 @@
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
-    0,                  /* escape_with_backslash_is_dangerous */
+    1,                  /* escape_with_backslash_is_dangerous */
     &my_charset_big5_handler,
     &my_collation_mb_bin_handler
 };

--- 1.81/strings/ctype-gbk.c	2006-12-23 23:04:29 +04:00
+++ 1.82/strings/ctype-gbk.c	2007-06-04 18:06:37 +05:00
@@ -10046,7 +10046,7 @@
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
-    0,                  /* escape_with_backslash_is_dangerous */
+    1,                  /* escape_with_backslash_is_dangerous */
     &my_charset_handler,
     &my_collation_ci_handler
 };
@@ -10078,7 +10078,7 @@
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
-    0,                  /* escape_with_backslash_is_dangerous */
+    1,                  /* escape_with_backslash_is_dangerous */
     &my_charset_handler,
     &my_collation_mb_bin_handler
 };

--- 1.93/strings/ctype-sjis.c	2007-01-22 16:10:42 +04:00
+++ 1.94/strings/ctype-sjis.c	2007-06-04 18:06:37 +05:00
@@ -4694,7 +4694,7 @@
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
-    0,                  /* escape_with_backslash_is_dangerous */
+    1,                  /* escape_with_backslash_is_dangerous */
     &my_charset_handler,
     &my_collation_ci_handler
 };
@@ -4726,7 +4726,7 @@
     0,			/* min_sort_char */
     255,		/* max_sort_char */
     ' ',                /* pad char      */
-    0,                  /* escape_with_backslash_is_dangerous */
+    1,                  /* escape_with_backslash_is_dangerous */
     &my_charset_handler,
     &my_collation_mb_bin_handler
 };

--- 1.11/mysql-test/r/ctype_big5.result	2005-12-12 21:36:21 +04:00
+++ 1.12/mysql-test/r/ctype_big5.result	2007-06-04 18:06:37 +05:00
@@ -192,3 +192,14 @@
 select hex(convert(_big5 0xC84041 using ucs2));
 hex(convert(_big5 0xC84041 using ucs2))
 003F0041
+create table t1 (a blob);
+insert into t1 values (0xEE00);
+delete from t1;
+select hex(load_file('test/t1.txt'));
+hex(load_file('test/t1.txt'))
+5CEE5C300A
+load data infile 't1.txt' into table t1;
+select hex(a) from t1;
+hex(a)
+EE00
+drop table t1;

--- 1.11/mysql-test/t/ctype_big5.test	2005-12-12 21:36:47 +04:00
+++ 1.12/mysql-test/t/ctype_big5.test	2007-06-04 18:06:37 +05:00
@@ -64,3 +64,19 @@
 select hex(convert(_big5 0xC84041 using ucs2));
 
 # End of 4.1 tests
+
+#
+# Bug#26711 "binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load"
+#
+create table t1 (a blob);
+insert into t1 values (0xEE00);
+--exec $MYSQL_DUMP --default-character-set=big5 -T $MYSQLTEST_VARDIR/master-data/test test t1
+delete from t1;
+select hex(load_file('test/t1.txt'));
+load data infile 't1.txt' into table t1;
+select hex(a) from t1;
+--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.txt
+--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.sql
+drop table t1;
+
+# End of 5.0 tests
Thread
bk commit into 5.0 tree (bar:1.2513) BUG#26711bar4 Jun