From: Date: June 27 2007 3:10pm Subject: bk commit into 5.0 tree (svoj:1.2493) BUG#29299 List-Archive: http://lists.mysql.com/commits/29723 X-Bug: 29299 Message-Id: <20070627131022.BEBF541CEC6@june.myoffice.izhnet.ru> Below is the list of changes that have just been committed into a local 5.0 repository of svoj. When svoj does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2007-06-27 18:10:19+05:00, svoj@stripped +3 -0 BUG#29299 - repeatable myisam fulltext index corruption Fulltext index may get corrupt by certain gbk characters. The problem was that when skipping leading non-true-word-characters, we assumed that these characters are always 1 byte long. This is not the case with gbk character set, since non-true-word-characters may be 2 bytes long. Affects 5.0 only. myisam/ft_parser.c@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +4 -2 Leading non-true-word-characters may also be multi-byte (e.g. in gbk character set). mysql-test/r/fulltext2.result@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +12 -0 A test case for BUG#29299. mysql-test/t/fulltext2.test@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +12 -0 A test case for BUG#29299. # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: svoj # Host: june.mysql.com # Root: /home/svoj/devel/mysql/BUG29299/mysql-5.0-engines --- 1.50/myisam/ft_parser.c 2006-12-31 00:02:04 +04:00 +++ 1.51/myisam/ft_parser.c 2007-06-27 18:10:18 +05:00 @@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte while (docquot) @@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte *start=doc+1; return 3; /* FTB_RBR */ } + mbl= my_mbcharlen(cs, *(uchar *)doc); if (!param->quot) { if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) @@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs do { - for (;; doc++) + for (;; doc+= mbl) { if (doc >= end) DBUG_RETURN(0); if (true_word_char(cs, *doc)) break; + mbl= my_mbcharlen(cs, *(uchar *)doc); } mwc= length= 0; --- 1.13/mysql-test/r/fulltext2.result 2006-05-12 21:40:15 +05:00 +++ 1.14/mysql-test/r/fulltext2.result 2007-06-27 18:10:18 +05:00 @@ -241,3 +241,15 @@ select * from t1 where match a against(' a drop table t1; set names latin1; +CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a)); +SET NAMES utf8; +INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161); +SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE); +HEX(a) +BEF361616197C22061616161 +DELETE FROM t1 LIMIT 1; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SET NAMES latin1; +DROP TABLE t1; --- 1.14/mysql-test/t/fulltext2.test 2006-05-12 21:26:46 +05:00 +++ 1.15/mysql-test/t/fulltext2.test 2007-06-27 18:10:18 +05:00 @@ -220,4 +220,16 @@ select * from t1 where match a against(' drop table t1; set names latin1; +# +# BUG#29299 - repeatable myisam fulltext index corruption +# +CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a)); +SET NAMES utf8; +INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161); +SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE); +DELETE FROM t1 LIMIT 1; +CHECK TABLE t1; +SET NAMES latin1; +DROP TABLE t1; + # End of 4.1 tests