#At file:///home/svoj/devel/bzr-mysql/mysql-5.1-bugteam-bug37245/
2711 Sergey Vojtovich 2008-11-13
BUG#37245 - Full text search problem
Certain boolean mode queries with truncation operator may
may not return matching records and calculate relevancy
incorrectly.
The problem was that we used binary search to determine
if the word with truncation operator is in the query.
Fixed by implementing a separate function, which is intended
to handle queries with truncation operator.
modified:
mysql-test/r/fulltext.result
mysql-test/t/fulltext.test
storage/myisam/ft_boolean_search.c
per-file messages:
mysql-test/r/fulltext.result
A test case for BUG#37245.
mysql-test/t/fulltext.test
A test case for BUG#37245.
storage/myisam/ft_boolean_search.c
Added ftb_find_relevance_add_word_with_trunc() function
as a replacement for ftb_find_relevance_add_word() for
queries with truncation operator.
=== modified file 'mysql-test/r/fulltext.result'
--- a/mysql-test/r/fulltext.result 2008-02-12 19:09:16 +0000
+++ b/mysql-test/r/fulltext.result 2008-11-13 14:55:50 +0000
@@ -494,3 +494,12 @@ SELECT a FROM t1 WHERE MATCH a AGAINST (
a
City Of God
DROP TABLE t1;
+CREATE TABLE t1(a CHAR(10));
+INSERT INTO t1 VALUES('aaa15');
+SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa16' IN BOOLEAN MODE) FROM t1;
+MATCH(a) AGAINST('aaa1* aaa14 aaa16' IN BOOLEAN MODE)
+1
+SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa15 aaa16' IN BOOLEAN MODE) FROM t1;
+MATCH(a) AGAINST('aaa1* aaa14 aaa15 aaa16' IN BOOLEAN MODE)
+2
+DROP TABLE t1;
=== modified file 'mysql-test/t/fulltext.test'
--- a/mysql-test/t/fulltext.test 2007-11-02 09:20:38 +0000
+++ b/mysql-test/t/fulltext.test 2008-11-13 14:55:50 +0000
@@ -418,3 +418,12 @@ SELECT a FROM t1 WHERE MATCH a AGAINST (
DROP TABLE t1;
# End of 4.1 tests
+
+#
+# BUG#37245 - Full text search problem
+#
+CREATE TABLE t1(a CHAR(10));
+INSERT INTO t1 VALUES('aaa15');
+SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa16' IN BOOLEAN MODE) FROM t1;
+SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa15 aaa16' IN BOOLEAN MODE) FROM t1;
+DROP TABLE t1;
=== modified file 'storage/myisam/ft_boolean_search.c'
--- a/storage/myisam/ft_boolean_search.c 2008-02-12 09:45:08 +0000
+++ b/storage/myisam/ft_boolean_search.c 2008-11-13 14:55:50 +0000
@@ -891,6 +891,63 @@ static int ftb_find_relevance_add_word(M
return(0);
}
+/*
+ @brief Find relevance for queries with truncation operator
+
+ @param[in,out] param Parser param
+ @param[in] word the word from the document
+ @param[in] len the length of the word
+ @param[in] boolean_info unused
+
+ @note This function is a replacement for ftb_find_relevance_add_word(),
+ for queries with truncation operator. It iterates through all
+ words of the query attempting to find a match.
+
+ As ftb_find_relevance_add_word() uses binary search, it wont
+ detect match of e.g. 'aaa15' against 'aaa1* aaa14 aaa16'.
+
+ Also ftb_find_relevance_add_word() would not calculate relevance
+ correctly for queries like 'aaa1* aaa14 aaa15 aaa16'.
+
+ This happens because aaa1* may be equal, smaller, bigger than/to
+ e.g. aaa14, which breaks the main idea of the binary search, where
+ the value must be either small or bigger or equal to other array
+ elements.
+
+ There is still some room for improvements here. We could sort
+ words with truncation operator, so they're at the beginning
+ of the queue, store number of words with truncation operator
+ somewhere. Later we could iterate through all words with
+ truncation operator and use binary search for other words.
+
+ @see ftb_find_relevance_add_word()
+
+ @return Execution status.
+ @retval 0 Success
+ @retval 1 Error
+*/
+static int ftb_find_relevance_add_word_with_trunc(MYSQL_FTPARSER_PARAM *param,
+ char *word, int len,
+ MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
+{
+ MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
+ FT_INFO *ftb= ftb_param->ftb;
+ uint i;
+ for (i= 0; i < ftb->queue.elements; i++)
+ {
+ FTB_WORD *ftbw= ftb->list[i];
+ if (ha_compare_text(ftb->charset, (uchar*) word, len,
+ (uchar*) ftbw->word + 1, ftbw->len - 1,
+ (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) ||
+ ftbw->docid[1] == ftb->info->lastpos)
+ continue;
+ ftbw->docid[1]= ftb->info->lastpos;
+ if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
+ return 1;
+ }
+ return 0;
+}
+
static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
char *doc, int len)
@@ -947,7 +1004,8 @@ float ft_boolean_find_relevance(FT_INFO
ftb_param.ftb= ftb;
ftb_param.ftsi= &ftsi2;
param->mysql_parse= ftb_find_relevance_parse;
- param->mysql_add_word= ftb_find_relevance_add_word;
+ param->mysql_add_word= ftb->with_scan & FTB_FLAG_TRUNC ?
+ ftb_find_relevance_add_word_with_trunc : ftb_find_relevance_add_word;
param->mysql_ftparam= (void *)&ftb_param;
param->flags= 0;
param->cs= ftb->charset;