MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:June 30 2006 5:05am
Subject:bk commit into 4.1 tree (sergefp:1.2517) BUG#16168
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2517 06/06/30 09:05:12 sergefp@stripped +3 -0
  BUG#16168: Wrong results from range optimizer, "Use_count: Wrong count for key ..." warnings:
   - Added comments.
   - Make SEL_ARG::clone() set SEL_ARG::elements in the created copy.

  sql/opt_range.cc
    1.146 06/06/30 09:05:09 sergefp@stripped +179 -4
    BUG#16168: Wrong results from range optimizer, "Use_count: Wrong count for key ..." warnings:
     - Added comments.
     - Make SEL_ARG::clone() set SEL_ARG::elements in the created copy.

  mysql-test/t/range.test
    1.31 06/06/30 09:05:08 sergefp@stripped +26 -0
    BUG#16168: Testcase

  mysql-test/r/range.result
    1.35 06/06/30 09:05:08 sergefp@stripped +22 -0
    BUG#16168: Testcase

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	newbox.mylan
# Root:	/home/psergey/mysql-4.1-bug16168-r2

--- 1.145/sql/opt_range.cc	2006-06-02 13:03:59 +04:00
+++ 1.146/sql/opt_range.cc	2006-06-30 09:05:09 +04:00
@@ -42,18 +42,119 @@ static int sel_cmp(Field *f,char *a,char
 
 static char is_null_string[2]= {1,0};
 
+
+/*
+  A construction block of the SEL_ARG-graph.
+  
+  The following description only covers graphs of SEL_ARG objects with 
+  sel_arg->type==KEY_RANGE:
+
+  One SEL_ARG object represents an "elementary interval" in form
+  
+      min_value <=?  table.keypartX  <=? max_value
+  
+  The interval is a non-empty interval of any kind: with[out] minimum/maximum
+  bound, [half]open/closed, single-point interval, etc.
+
+  1. SEL_ARG GRAPH STRUCTURE
+  
+  SEL_ARG objects are linked together in a graph. The meaning of the graph
+  is better demostrated by an example:
+  
+     tree->keys[i]
+      | 
+      |             $              $
+      |    part=1   $     part=2   $    part=3
+      |             $              $
+      |  +-------+  $   +-------+  $   +--------+
+      |  | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
+      |  +-------+  $   +-------+  $   +--------+
+      |      |      $              $       |
+      |      |      $              $   +--------+
+      |      |      $              $   | kp3=12 | 
+      |      |      $              $   +--------+ 
+      |  +-------+  $              $   
+      \->| kp1=2 |--$--------------$-+ 
+         +-------+  $              $ |   +--------+
+             |      $              $  ==>| kp3=11 |
+         +-------+  $              $ |   +--------+
+         | kp1=3 |--$--------------$-+       |
+         +-------+  $              $     +--------+
+             |      $              $     | kp3=14 |
+            ...     $              $     +--------+
+ 
+  The entire graph is partitioned into "interval lists".
+
+  An interval list is a sequence of ordered disjoint intervals over the same
+  key part. SEL_ARG are linked via "next" and "prev" pointers. Additionally,
+  all intervals in the list form an RB-tree, linked via left/right/parent 
+  pointers. The RB-tree root SEL_ARG object will be further called "root of the
+  interval list".
+  
+    In the example pic, there are 4 interval lists: 
+    "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
+    The vertical lines represent SEL_ARG::next/prev pointers.
+    
+  In an interval list, each member X may have SEL_ARG::next_key_part pointer
+  pointing to the root of another interval list Y. The pointed interval list
+  must cover a key part with greater number (i.e. Y->part > X->part).
+    
+    In the example pic, the next_key_part pointers are represented by
+    horisontal lines.
+
+  2. SEL_ARG GRAPH SEMANTICS
+
+  It represents a condition in a special form (we don't have a name for it ATM)
+  The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
+  
+  For example, the picture represents the condition in form:
+   (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR 
+   (kp1=2 AND (kp3=11 OR kp3=14)) OR 
+   (kp1=3 AND (kp3=11 OR kp3=14))
+
+
+  3. SEL_ARG GRAPH USE
+
+  Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
+  Then walk the SEL_ARG graph and get a list of dijsoint ordered key
+  intervals (i.e. intervals in form
+  
+   (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
+
+  Those intervals can be used to access the index. The uses are in:
+   - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
+                            how many table records are contained within all
+                            intervals.
+   - get_quick_select()   - Walk the SEL_ARG, materialize the key intervals,
+                            and create QUICK_RANGE_SELECT object that will
+                            read records within these intervals.
+*/
+
 class SEL_ARG :public Sql_alloc
 {
 public:
   uint8 min_flag,max_flag,maybe_flag;
   uint8 part;					// Which key part
   uint8 maybe_null;
-  uint16 elements;				// Elements in tree
-  ulong use_count;				// use of this sub_tree
+  /* 
+    Number of children of this element in the RB-tree, plus 1 for this
+    element itself.
+  */
+  uint16 elements;
+  /*
+    Valid only for elements which are RB-tree roots: Number of times this
+    RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
+    SEL_TREE::keys[i] or by a temporary SEL_ARG* variable)
+  */
+  ulong use_count;
+
   Field *field;
   char *min_value,*max_value;			// Pointer to range
 
-  SEL_ARG *left,*right,*next,*prev,*parent,*next_key_part;
+  SEL_ARG *left,*right;   /* R-B tree children */
+  SEL_ARG *next,*prev;    /* Links for bi-directional interval list */
+  SEL_ARG *parent;        /* R-B tree parent */
+  SEL_ARG *next_key_part; 
   enum leaf_color { BLACK,RED } color;
   enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
 
@@ -498,6 +599,7 @@ SEL_ARG *SEL_ARG::clone(SEL_ARG *new_par
   }
   increment_use_count(1);
   tmp->color= color;
+  tmp->elements= this->elements;
   return tmp;
 }
 
@@ -1525,8 +1627,21 @@ and_all_keys(SEL_ARG *key1,SEL_ARG *key2
 
 
 
+/*
+  Produce a SEL_ARG graph that represents "key1 AND key2"
+
+  SYNOPSIS
+    key_and()
+      key1   First argument, root of its RB-tree
+      key2   Second argument, root of its RB-tree
+
+  RETURN
+    RB-tree root of the resulting SEL_ARG graph.
+    NULL if the result of AND operation is an empty interval {0}.
+*/
+
 static SEL_ARG *
-key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
+key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
 {
   if (!key1)
     return key2;
@@ -1589,6 +1704,7 @@ key_and(SEL_ARG *key1,SEL_ARG *key2,uint
 
   if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
   {
+    /* TODO: why not leave one of the trees? */
     key1->free_tree();
     key2->free_tree();
     return 0;					// Can't optimize this
@@ -2303,6 +2419,51 @@ int test_rb_tree(SEL_ARG *element,SEL_AR
   return -1;					// Error, no more warnings
 }
 
+
+/*
+  Count how many times SEL_ARG graph "root" refers to its part "key"
+  
+  SYNOPSIS
+    count_key_part_usage()
+      root  An RB-Root node in a SEL_ARG graph.
+      key   Another RB-Root node in that SEL_ARG graph.
+
+  DESCRIPTION
+    The passed "root" node may refer to "key" node via root->next_key_part,
+    root->next->n
+
+    This function counts how many times the node "key" is referred (via
+    SEL_ARG::next_key_part) by 
+     - intervals of RB-tree pointed by "root", 
+     - intervals of RB-trees that are pointed by SEL_ARG::next_key_part from 
+       intervals of RB-tree pointed by "root",
+     - and so on.
+    
+    Here is an example (horizontal links represent next_key_part pointers, 
+    vertical links - next/prev prev pointers):  
+    
+         +----+               $
+         |root|-----------------+
+         +----+               $ |
+           |                  $ |
+           |                  $ |
+         +----+       +---+   $ |     +---+    Here the return value
+         |    |- ... -|   |---$-+--+->|key|    will be 4.
+         +----+       +---+   $ |  |  +---+
+           |                  $ |  |
+          ...                 $ |  |
+           |                  $ |  |
+         +----+   +---+       $ |  |
+         |    |---|   |---------+  |
+         +----+   +---+       $    |
+           |        |         $    |
+          ...     +---+       $    |
+                  |   |------------+
+                  +---+       $
+  RETURN 
+    Number of links to "key" from nodes reachable from "root".
+*/
+
 static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
 {
   ulong count= 0;
@@ -2319,6 +2480,20 @@ static ulong count_key_part_usage(SEL_AR
   return count;
 }
 
+
+/*
+  Check if SEL_ARG::use_count value is correct
+
+  SYNOPSIS
+    SEL_ARG::test_use_count()
+      root  The root node of the SEL_ARG graph (an RB-tree root node that
+            has the least value of sel_arg->part in the entire graph, and
+            thus is the "origin" of the graph)
+
+  DESCRIPTION
+    Check if SEL_ARG::use_count value is correct. See the definition of
+    use_count for what is "correct".
+*/
 
 void SEL_ARG::test_use_count(SEL_ARG *root)
 {

--- 1.34/mysql-test/r/range.result	2005-06-23 11:56:39 +04:00
+++ 1.35/mysql-test/r/range.result	2006-06-30 09:05:08 +04:00
@@ -627,3 +627,25 @@ SELECT count(*) FROM t1 WHERE CLIENT='00
 count(*)
 4
 drop table t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t2 (
+pk1 int(11) NOT NULL,
+pk2 int(11) NOT NULL,
+pk3 int(11) NOT NULL,
+pk4 int(11) NOT NULL,
+filler char(82),
+PRIMARY KEY (pk1,pk2,pk3,pk4)
+) DEFAULT CHARSET=latin1;
+insert into t2 select 1, A.a+10*B.a, 432, 44, 'fillerZ' from t1 A, t1 B;
+INSERT INTO t2 VALUES (2621, 2635, 0, 0,'filler'), (2621, 2635, 1, 0,'filler'),
+(2621, 2635, 10, 0,'filler'), (2621, 2635, 11, 0,'filler'),
+(2621, 2635, 14, 0,'filler'), (2621, 2635, 1000015, 0,'filler');
+SELECT * FROM t2
+WHERE ((((pk4 =0) AND (pk1 =2621) AND (pk2 =2635)))
+OR ((pk4 =1) AND (((pk1 IN ( 7, 2, 1 ))) OR (pk1 =522)) AND ((pk2 IN ( 0, 2635))))
+) AND (pk3 >=1000000);
+pk1	pk2	pk3	pk4	filler
+2621	2635	1000015	0	filler
+drop table t1, t2;

--- 1.30/mysql-test/t/range.test	2005-07-28 04:21:46 +04:00
+++ 1.31/mysql-test/t/range.test	2006-06-30 09:05:08 +04:00
@@ -484,4 +484,30 @@ SELECT count(*) FROM t1 WHERE CLIENT='00
 SELECT count(*) FROM t1 WHERE CLIENT='000' AND (ARG1 != ' 2' OR ARG1 != ' 1');
 drop table t1;
 
+# BUG#16168: Wrong range optimizer results, "Use_count: Wrong count ..."
+#            warnings in server stderr.
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t2 (
+  pk1 int(11) NOT NULL,
+  pk2 int(11) NOT NULL,
+  pk3 int(11) NOT NULL,
+  pk4 int(11) NOT NULL,
+  filler char(82),
+  PRIMARY KEY (pk1,pk2,pk3,pk4)
+) DEFAULT CHARSET=latin1;
+
+insert into t2 select 1, A.a+10*B.a, 432, 44, 'fillerZ' from t1 A, t1 B;
+INSERT INTO t2 VALUES (2621, 2635, 0, 0,'filler'), (2621, 2635, 1, 0,'filler'),
+  (2621, 2635, 10, 0,'filler'), (2621, 2635, 11, 0,'filler'),
+  (2621, 2635, 14, 0,'filler'), (2621, 2635, 1000015, 0,'filler');
+
+SELECT * FROM t2
+WHERE ((((pk4 =0) AND (pk1 =2621) AND (pk2 =2635)))
+OR ((pk4 =1) AND (((pk1 IN ( 7, 2, 1 ))) OR (pk1 =522)) AND ((pk2 IN ( 0, 2635))))
+) AND (pk3 >=1000000);
+drop table t1, t2;
+
 # End of 4.1 tests
Thread
bk commit into 4.1 tree (sergefp:1.2517) BUG#16168Sergey Petrunia30 Jun