MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:August 15 2006 5:08pm
Subject:bk commit into 5.0 tree (sergefp:1.2257) BUG#15872
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-08-15 21:08:22+04:00, sergefp@stripped +3 -0
  BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>) 
  In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
  where N>1000, was incorrectly converted to
    (-inf < X < c_min) OR (c_max < X)
  Now this conversion is removed, we dont produce any range lists for such
  conditions.

  mysql-test/r/range.result@stripped, 2006-08-15 21:08:20+04:00, sergefp@stripped +22 -0
    BUG#21282: Testcase

  mysql-test/t/range.test@stripped, 2006-08-15 21:08:20+04:00, sergefp@stripped +25 -0
    BUG#21282: Testcase

  sql/opt_range.cc@stripped, 2006-08-15 21:08:20+04:00, sergefp@stripped +46 -61
    BUG#21282: Incorrect query results for "t.key NOT IN (<big const list>) 
    In fix for BUG#15872, a condition of type "t.key NOT IN (c1, .... cN)"
    where N>1000, was incorrectly converted to 
      (-inf < X < c_min) OR (c_max < X)
    Now this conversion is removed, we dont produce any range lists for such
    conditions.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	pylon.mylan
# Root:	/home/psergey/mysql-5.0-opt-bug21282-r2

--- 1.217/sql/opt_range.cc	2006-08-15 21:08:26 +04:00
+++ 1.218/sql/opt_range.cc	2006-08-15 21:08:26 +04:00
@@ -3608,41 +3608,33 @@
       if (func->array && func->cmp_type != ROW_RESULT)
       {
         /*
-          We get here for conditions in form "t.key NOT IN (c1, c2, ...)" 
-          (where c{i} are constants).
-          Our goal is to produce a SEL_ARG graph that represents intervals:
+          We get here for conditions in form "t.key NOT IN (c1, c2, ...)",
+          where c{i} are constants. Our goal is to produce a SEL_TREE that 
+          represents intervals:
           
           ($MIN<t.key<c1) OR (c1<t.key<c2) OR (c2<t.key<c3) OR ...    (*)
           
           where $MIN is either "-inf" or NULL.
           
-          The most straightforward way to handle NOT IN would be to convert
-          it to "(t.key != c1) AND (t.key != c2) AND ..." and let the range
-          optimizer to build SEL_ARG graph from that. However that will cause
-          the range optimizer to use O(N^2) memory (it's a bug, not filed),
-          and people do use big NOT IN lists (see BUG#15872). Also, for big          
-          NOT IN lists constructing/using graph (*) does not make the query
-          faster.
-          
-          So, we will handle NOT IN manually in the following way:
-          * if the number of entries in the NOT IN list is less then 
-            NOT_IN_IGNORE_THRESHOLD, we will construct SEL_ARG graph (*)
-            manually.
-          * Otherwise, we will construct a smaller graph: for 
-            "t.key NOT IN (c1,...cN)" we construct a graph representing 
-            ($MIN < t.key) OR (cN < t.key)  // here sequence of c_i is
-                                            // ordered.
-
-          A note about partially-covering indexes: for those (e.g. for 
-          "a CHAR(10), KEY(a(5))") the handling is correct (albeit not very
-          efficient):
-          Instead of "t.key < c1" we get "t.key <= prefix-val(c1)".
-          Combining the intervals in (*) together, we get:
-          (-inf<=t.key<=c1) OR (c1<=t.key<=c2) OR (c2<=t.key<=c3) OR ...
-          i.e. actually we get intervals combined into one interval:
-          (-inf<=t.key<=+inf). This doesn't make much sense but it doesn't
-          cause any problems.
+          The most straightforward way to produce it is to convert NOT IN
+          into "(t.key != c1) AND (t.key != c2) AND ... " and let the range
+          analyzer to build SEL_TREE from that. The problem is that the
+          range analyzer will use O(N^2) memory (which is probably a bug),
+          and people do use big NOT IN lists (e.g. see BUG#15872, BUG#21282),
+          will run out of memory.
+
+          Another problem with big lists like (*) is that a big list is
+          unlikely to produce a good "range" access, while considering that
+          range access will require expensive CPU calculations (and for 
+          MyISAM even index accesses). In short, big NOT IN lists are rarely
+          worth analyzing.
+
+          Considering the above, we'll handle NOT IN as follows:
+          * if the number of entries in the NOT IN list is less than
+            NOT_IN_IGNORE_THRESHOLD, construct the SEL_TREE (*) manually.
+          * Otherwise, don't produce a SEL_TREE.
         */
+#define NOT_IN_IGNORE_THRESHOLD 1000
         MEM_ROOT *tmp_root= param->mem_root;
         param->thd->mem_root= param->old_root;
         /* 
@@ -3656,9 +3648,9 @@
         Item *value_item= func->array->create_item();
         param->thd->mem_root= tmp_root;
 
-        if (!value_item)
+        if (func->array->count > NOT_IN_IGNORE_THRESHOLD || !value_item)
           break;
-        
+
         /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
         uint i=0;
         do 
@@ -3677,45 +3669,39 @@
           tree= NULL;
           break;
         }
-#define NOT_IN_IGNORE_THRESHOLD 1000        
         SEL_TREE *tree2;
-        if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
+        for (; i < func->array->count; i++)
         {
-          for (; i < func->array->count; i++)
+          if (func->array->compare_elems(i, i-1))
           {
-            if (func->array->compare_elems(i, i-1))
+            /* Get a SEL_TREE for "-inf < X < c_i" interval */
+            func->array->value_to_item(i, value_item);
+            tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
+                                value_item, cmp_type);
+            if (!tree2)
             {
-              /* Get a SEL_TREE for "-inf < X < c_i" interval */
-              func->array->value_to_item(i, value_item);
-              tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
-                                  value_item, cmp_type);
-              if (!tree2)
-              {
-                tree= NULL;
-                break;
-              }
+              tree= NULL;
+              break;
+            }
 
-              /* Change all intervals to be "c_{i-1} < X < c_i" */
-              for (uint idx= 0; idx < param->keys; idx++)
+            /* Change all intervals to be "c_{i-1} < X < c_i" */
+            for (uint idx= 0; idx < param->keys; idx++)
+            {
+              SEL_ARG *new_interval, *last_val;
+              if (((new_interval= tree2->keys[idx])) && 
+                  ((last_val= tree->keys[idx]->last())))
               {
-                SEL_ARG *new_interval, *last_val;
-                if (((new_interval= tree2->keys[idx])) && 
-                    ((last_val= tree->keys[idx]->last())))
-                {
-                  new_interval->min_value= last_val->max_value;
-                  new_interval->min_flag= NEAR_MIN;
-                }
+                new_interval->min_value= last_val->max_value;
+                new_interval->min_flag= NEAR_MIN;
               }
-              /* 
-                The following doesn't try to allocate memory so no need to
-                check for NULL.
-              */
-              tree= tree_or(param, tree, tree2);
             }
+            /* 
+              The following doesn't try to allocate memory so no need to
+              check for NULL.
+            */
+            tree= tree_or(param, tree, tree2);
           }
         }
-        else
-          func->array->value_to_item(func->array->count - 1, value_item);
         
         if (tree && tree->type != SEL_TREE::IMPOSSIBLE)
         {
@@ -3780,7 +3766,6 @@
   }
 
   DBUG_RETURN(tree);
-
 }
 
 	/* make a select tree of all keys in condition */

--- 1.46/mysql-test/r/range.result	2006-08-15 21:08:26 +04:00
+++ 1.47/mysql-test/r/range.result	2006-08-15 21:08:26 +04:00
@@ -838,3 +838,25 @@
 a	hex(filler)
 a	0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
 drop table t1,t2,t3;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, key(a));
+insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
+set @a="select * from t2 force index (a) where a NOT IN(0";
+select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
+count(*)
+1000
+set @a=concat(@a, ')');
+insert into t2 values (11),(13),(15);
+set @b= concat("explain ", @a);
+prepare stmt1 from @b;
+execute stmt1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	a	a	5	NULL	1003	Using where; Using index
+prepare stmt1 from @a;
+execute stmt1;
+a
+11
+13
+15
+drop table t1, t2;

--- 1.39/mysql-test/t/range.test	2006-08-15 21:08:26 +04:00
+++ 1.40/mysql-test/t/range.test	2006-08-15 21:08:26 +04:00
@@ -656,3 +656,28 @@
 select a, hex(filler) from t1 where a not between 'b' and 'b'; 
 
 drop table t1,t2,t3;
+
+#
+# BUG#21282
+#
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, key(a));
+insert into t2 select 2*(A.a + 10*(B.a + 10*C.a)) from t1 A, t1 B, t1 C;
+
+set @a="select * from t2 force index (a) where a NOT IN(0";
+select count(*) from (select @a:=concat(@a, ',', a) from t2 ) Z;
+set @a=concat(@a, ')');
+
+insert into t2 values (11),(13),(15);
+
+set @b= concat("explain ", @a);
+
+prepare stmt1 from @b;
+execute stmt1;
+
+prepare stmt1 from @a;
+execute stmt1;
+
+drop table t1, t2;
+# End of 5.0 tests
Thread
bk commit into 5.0 tree (sergefp:1.2257) BUG#15872Sergey Petrunia15 Aug