Below is the list of changes that have just been committed into a local
5.0 repository of svoj. When svoj does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-06-27 18:10:19+05:00, svoj@stripped +3 -0
BUG#29299 - repeatable myisam fulltext index corruption
Fulltext index may get corrupt by certain gbk characters.
The problem was that when skipping leading non-true-word-characters,
we assumed that these characters are always 1 byte long. This is not
the case with gbk character set, since non-true-word-characters may
be 2 bytes long.
Affects 5.0 only.
myisam/ft_parser.c@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +4 -2
Leading non-true-word-characters may also be multi-byte (e.g. in
gbk character set).
mysql-test/r/fulltext2.result@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +12 -0
A test case for BUG#29299.
mysql-test/t/fulltext2.test@stripped, 2007-06-27 18:10:18+05:00, svoj@stripped +12 -0
A test case for BUG#29299.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: svoj
# Host: june.mysql.com
# Root: /home/svoj/devel/mysql/BUG29299/mysql-5.0-engines
--- 1.50/myisam/ft_parser.c 2006-12-31 00:02:04 +04:00
+++ 1.51/myisam/ft_parser.c 2007-06-27 18:10:18 +05:00
@@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte
while (doc<end)
{
- for (;doc<end;doc++)
+ for (; doc < end; doc+= mbl)
{
if (true_word_char(cs,*doc)) break;
if (*doc == FTB_RQUOT && param->quot)
@@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte
*start=doc+1;
return 3; /* FTB_RBR */
}
+ mbl= my_mbcharlen(cs, *(uchar *)doc);
if (!param->quot)
{
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
@@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs
do
{
- for (;; doc++)
+ for (;; doc+= mbl)
{
if (doc >= end) DBUG_RETURN(0);
if (true_word_char(cs, *doc)) break;
+ mbl= my_mbcharlen(cs, *(uchar *)doc);
}
mwc= length= 0;
--- 1.13/mysql-test/r/fulltext2.result 2006-05-12 21:40:15 +05:00
+++ 1.14/mysql-test/r/fulltext2.result 2007-06-27 18:10:18 +05:00
@@ -241,3 +241,15 @@ select * from t1 where match a against('
a
drop table t1;
set names latin1;
+CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
+SET NAMES utf8;
+INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
+SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
+HEX(a)
+BEF361616197C22061616161
+DELETE FROM t1 LIMIT 1;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SET NAMES latin1;
+DROP TABLE t1;
--- 1.14/mysql-test/t/fulltext2.test 2006-05-12 21:26:46 +05:00
+++ 1.15/mysql-test/t/fulltext2.test 2007-06-27 18:10:18 +05:00
@@ -220,4 +220,16 @@ select * from t1 where match a against('
drop table t1;
set names latin1;
+#
+# BUG#29299 - repeatable myisam fulltext index corruption
+#
+CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
+SET NAMES utf8;
+INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
+SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
+DELETE FROM t1 LIMIT 1;
+CHECK TABLE t1;
+SET NAMES latin1;
+DROP TABLE t1;
+
# End of 4.1 tests