List:Internals« Previous MessageNext Message »
From:svoj Date:March 4 2005 1:10pm
Subject:bk commit into 5.0 tree (svoj:1.1775) BUG#8351
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of svoj. When svoj does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.1775 05/03/04 16:10:41 svoj@stripped +4 -0
  Fix for BUG#8351 (5.0) - Double quote in Boolean Fulltext query causes crash

  myisam/ftdefs.h
    1.30 05/03/04 16:10:37 svoj@stripped +2 -1
    One more argument for ft_simple_get_word added.
    Third argument of ft_simple_get_word changet to const.

  myisam/ft_stopwords.c
    1.22 05/03/04 16:10:37 svoj@stripped +1 -1
    One more argument for ft_simple_get_word.

  myisam/ft_parser.c
    1.44 05/03/04 16:10:37 svoj@stripped +17 -8
    ft_get_word extended, so it can return short words and stopwords. (return value 4)
    ft_simple_get_word extended, so it can return short words and stopwords
    when skip_stopwords is false.

  myisam/ft_boolean_search.c
    1.89 05/03/04 16:10:37 svoj@stripped +43 -23
    Put phrase words, including stopwords into list `phrase'.
    Use `phrase' instead of `qout', `qend' in _ftb_strstr.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	svoj
# Host:	svoj.pils.ru
# Root:	/home/svoj/devel/mysql/ft-mysql-5.0

--- 1.43/myisam/ft_parser.c	2004-10-25 13:23:32 +05:00
+++ 1.44/myisam/ft_parser.c	2005-03-04 16:10:37 +04:00
@@ -98,6 +98,7 @@
  * 1 - word found
  * 2 - left bracket
  * 3 - right bracket
+ * 4 - stopword found
  */
 byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
                  FT_WORD *word, FTB_PARAM *param)
@@ -161,6 +162,11 @@
       *start=doc;
       return 1;
     }
+    else if (length)
+    {
+      *start= doc;
+      return 4;
+    }
   }
   if (param->quot)
   {
@@ -170,18 +176,19 @@
   return 0;
 }
 
-byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, byte *end,
-                        FT_WORD *word)
+byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
+                        FT_WORD *word, my_bool skip_stopwords)
 {
   byte *doc= *start;
   uint mwc, length, mbl;
   DBUG_ENTER("ft_simple_get_word");
 
-  while (doc<end)
+  do
   {
-    for (;doc<end;doc++)
+    for (;; doc++)
     {
-      if (true_word_char(cs,*doc)) break;
+      if (doc >= end) DBUG_RETURN(0);
+      if (true_word_char(cs, *doc)) break;
     }
 
     mwc= length= 0;
@@ -193,13 +200,15 @@
 
     word->len= (uint)(doc-word->pos) - mwc;
 
-    if (length >= ft_min_word_len && length < ft_max_word_len &&
-        !is_stopword(word->pos, word->len))
+    if (skip_stopwords == FALSE ||
+        (length >= ft_min_word_len && length < ft_max_word_len &&
+         !is_stopword(word->pos, word->len)))
     {
       *start= doc;
       DBUG_RETURN(1);
     }
   }
+  while (doc < end);
   DBUG_RETURN(0);
 }
 
@@ -217,7 +226,7 @@
   FT_WORD w;
   DBUG_ENTER("ft_parse");
 
-  while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w))
+  while (ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE))
   {
     if (with_alloc)
     {

--- 1.21/myisam/ft_stopwords.c	2005-02-18 15:51:52 +04:00
+++ 1.22/myisam/ft_stopwords.c	2005-03-04 16:10:37 +04:00
@@ -81,7 +81,7 @@
       goto err0;
     len=my_read(fd, buffer, len, MYF(MY_WME));
     end=start+len;
-    while (ft_simple_get_word(default_charset_info, &start, end, &w))
+    while (ft_simple_get_word(default_charset_info, &start, end, &w, TRUE))
     {
       if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
         goto err1;

--- 1.29/myisam/ftdefs.h	2005-02-04 18:23:58 +04:00
+++ 1.30/myisam/ftdefs.h	2005-03-04 16:10:37 +04:00
@@ -112,7 +112,8 @@
 uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
 
 byte ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, FTB_PARAM *);
-byte ft_simple_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *);
+byte ft_simple_get_word(CHARSET_INFO *, byte **, const byte *,
+                        FT_WORD *, my_bool);
 
 typedef struct _st_ft_seg_iterator {
   uint        num, len;

--- 1.88/myisam/ft_boolean_search.c	2005-02-15 18:46:39 +04:00
+++ 1.89/myisam/ft_boolean_search.c	2005-03-04 16:10:37 +04:00
@@ -69,6 +69,7 @@
   float     weight;
   float     cur_weight;
   byte     *quot, *qend;
+  LIST     *phrase;               /* phrase words */
   uint      yesses;               /* number of "yes" words matched */
   uint      nos;                  /* number of "no"  words matched */
   uint      ythresh;              /* number of "yes" words in expr */
@@ -139,6 +140,8 @@
   FT_WORD     w;
   FTB_WORD   *ftbw;
   FTB_EXPR   *ftbe;
+  FT_WORD    *phrase_word;
+  LIST       *phrase_list;
   uint  extra=HA_FT_WLEN+ftb->info->s->rec_reflength; /* just a shortcut */
 
   if (ftb->state != UNINITIALIZED)
@@ -146,6 +149,7 @@
 
   param.prev=' ';
   param.quot=up->quot;
+  up->phrase= NULL;
   while ((res=ft_get_word(ftb->charset,start,end,&w,&param)))
   {
     int   r=param.plusminus;
@@ -172,6 +176,14 @@
         if (param.yesno > 0) up->ythresh++;
         queue_insert(& ftb->queue, (byte *)ftbw);
         ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC);
+      case 4:
+        if (! up->quot) break;
+        phrase_word= (FT_WORD *)alloc_root(&ftb->mem_root, sizeof(FT_WORD));
+        phrase_list= (LIST *)alloc_root(&ftb->mem_root, sizeof(LIST));
+        phrase_word->pos= w.pos;
+        phrase_word->len= w.len;
+        phrase_list->data= (void *)phrase_word;
+        up->phrase= list_add(up->phrase, phrase_list);
         break;
       case 2: /* left bracket */
         ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR));
@@ -188,7 +200,11 @@
         param.quot=0;
         break;
       case 3: /* right bracket */
-        if (up->quot) up->qend=param.quot;
+        if (up->quot)
+        {
+          up->qend= param.quot;
+          up->phrase= list_reverse(up->phrase);
+        }
         return;
     }
   }
@@ -433,27 +449,31 @@
 
 /* returns 1 if str0 ~= /\bstr1\b/ */
 static int _ftb_strstr(const byte *s0, const byte *e0,
-                const byte *s1, const byte *e1,
-                CHARSET_INFO *cs)
+                LIST *phrase, CHARSET_INFO *cs)
 {
-  const byte *p0= s0;
-  my_bool s_after= true_word_char(cs, s1[0]);
-  my_bool e_before= true_word_char(cs, e1[-1]);
-  uint p0_len;
-  my_match_t m[2];
-
-  while (p0 < e0)
-  {
-    if (cs->coll->instr(cs, p0, e0 - p0, s1, e1 - s1, m, 2) != 2)
-      return(0);
-    if ((!s_after || p0 + m[1].beg == s0 || !true_word_char(cs, p0[m[1].beg-1]))
&&
-        (!e_before || p0 + m[1].end == e0 || !true_word_char(cs, p0[m[1].end])))
-      return(1);
-    p0+= m[1].beg;
-    p0+= (p0_len= my_mbcharlen(cs, *(uchar *)p0)) ? p0_len : 1;
-  }
+  FT_WORD h_word;
+  const byte *h_start= s0;
+  DBUG_ENTER("_ftb_strstr");
 
-  return(0);
+  if (! phrase) DBUG_RETURN(0);
+
+  while (ft_simple_get_word(cs, (byte **)&h_start, e0, &h_word, FALSE))
+  {
+    FT_WORD *n_word;
+    LIST *phrase_element= phrase;
+    const byte *h_start1= h_start;
+    for (;;)
+    {
+      n_word= (FT_WORD *)phrase_element->data;
+      if (my_strnncoll(cs, h_word.pos, h_word.len, n_word->pos, n_word->len))
+        break;
+      if (! (phrase_element= phrase_element->next))
+        DBUG_RETURN(1);
+      if (! ft_simple_get_word(cs, (byte **)&h_start1, e0, &h_word, FALSE))
+        DBUG_RETURN(0);
+    }
+  }
+  DBUG_RETURN(0);
 }
 
 
@@ -494,7 +514,7 @@
             if (!ftsi.pos)
               continue;
             not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len,
-                                      ftbe->quot, ftbe->qend, ftb->charset);
+                                      ftbe->phrase, ftb->charset);
           }
           if (not_found) break;
         } /* ftbe->quot */
@@ -642,8 +662,8 @@
       continue;
 
     end=ftsi.pos+ftsi.len;
-    while (ft_simple_get_word(ftb->charset,
-                              (byte **) &ftsi.pos, (byte *) end, &word))
+    while (ft_simple_get_word(ftb->charset, (byte **) &ftsi.pos,
+                              (byte *) end, &word, TRUE))
     {
       int a, b, c;
       for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
Thread
bk commit into 5.0 tree (svoj:1.1775) BUG#8351svoj4 Mar