List:Commits« Previous MessageNext Message »
From:Sergey Vojtovich Date:June 27 2007 1:10pm
Subject:bk commit into 5.0 tree (svoj:1.2493) BUG#29299
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of svoj. When svoj does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-27 18:10:19+05:00, svoj@stripped +3 -0
  BUG#29299 - repeatable myisam fulltext index corruption
  
  Fulltext index may get corrupt by certain gbk characters.
  
  The problem was that when skipping leading non-true-word-characters,
  we assumed that these characters are always 1 byte long. This is not
  the case with gbk character set, since non-true-word-characters may
  be 2 bytes long.
  
  Affects 5.0 only.

  myisam/ft_parser.c@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +4 -2
    Leading non-true-word-characters may also be multi-byte (e.g. in
    gbk character set).

  mysql-test/r/fulltext2.result@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +12 -0
    A test case for BUG#29299.

  mysql-test/t/fulltext2.test@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +12 -0
    A test case for BUG#29299.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	svoj
# Host:	june.mysql.com
# Root:	/home/svoj/devel/mysql/BUG29299/mysql-5.0-engines

--- 1.50/myisam/ft_parser.c	2006-12-31 00:02:04 +04:00
+++ 1.51/myisam/ft_parser.c	2007-06-27 18:10:18 +05:00
@@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte 
 
   while (doc<end)
   {
-    for (;doc<end;doc++)
+    for (; doc < end; doc+= mbl)
     {
       if (true_word_char(cs,*doc)) break;
       if (*doc == FTB_RQUOT && param->quot)
@@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte 
         *start=doc+1;
         return 3; /* FTB_RBR */
       }
+      mbl= my_mbcharlen(cs, *(uchar *)doc);
       if (!param->quot)
       {
         if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
@@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs
 
   do
   {
-    for (;; doc++)
+    for (;; doc+= mbl)
     {
       if (doc >= end) DBUG_RETURN(0);
       if (true_word_char(cs, *doc)) break;
+      mbl= my_mbcharlen(cs, *(uchar *)doc);
     }
 
     mwc= length= 0;

--- 1.13/mysql-test/r/fulltext2.result	2006-05-12 21:40:15 +05:00
+++ 1.14/mysql-test/r/fulltext2.result	2007-06-27 18:10:18 +05:00
@@ -241,3 +241,15 @@ select * from t1 where match a against('
 a
 drop table t1;
 set names latin1;
+CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
+SET NAMES utf8;
+INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
+SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
+HEX(a)
+BEF361616197C22061616161
+DELETE FROM t1 LIMIT 1;
+CHECK TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	check	status	OK
+SET NAMES latin1;
+DROP TABLE t1;

--- 1.14/mysql-test/t/fulltext2.test	2006-05-12 21:26:46 +05:00
+++ 1.15/mysql-test/t/fulltext2.test	2007-06-27 18:10:18 +05:00
@@ -220,4 +220,16 @@ select * from t1 where match a against('
 drop table t1;
 set names latin1;
 
+#
+# BUG#29299 - repeatable myisam fulltext index corruption
+#
+CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
+SET NAMES utf8;
+INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
+SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
+DELETE FROM t1 LIMIT 1;
+CHECK TABLE t1;
+SET NAMES latin1;
+DROP TABLE t1;
+
 # End of 4.1 tests
Thread
bk commit into 5.0 tree (svoj:1.2493) BUG#29299Sergey Vojtovich27 Jun
  • Re: bk commit into 5.0 tree (svoj:1.2493) BUG#29299Sergei Golubchik29 Jun