List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:March 28 2007 4:16pm
Subject:bk commit into 4.1 tree (sergefp:1.2627) BUG#26624
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-28 20:16:01+04:00, sergefp@stripped +3 -0
  BUG#26624: high mem usage (crash) in range optimizer
  - Added PARAM::alloced_sel_args where we count the # of SEL_ARGs
    created by SEL_ARG tree cloning operations.
  - Made the range analyzer to shortcut and not do any more cloning 
    if we've already created MAX_SEL_ARGS SEL_ARG objects in cloning.
  - Added comments about space complexity of SEL_ARG-graph 
    representation.

  mysql-test/r/range.result@stripped, 2007-03-28 20:15:59+04:00, sergefp@stripped +28 -0
    BUG#26624: Testcase

  mysql-test/t/range.test@stripped, 2007-03-28 20:15:59+04:00, sergefp@stripped +32 -0
    BUG#26624: Testcase

  sql/opt_range.cc@stripped, 2007-03-28 20:15:59+04:00, sergefp@stripped +131 -31
    BUG#26624: high mem usage (crash) in range optimizer
    - Added PARAM::alloced_sel_args where we count the # of SEL_ARGs
      created by SEL_ARG tree cloning operations.
    - Made the range analyzer to shortcut and not do any more cloning 
      if we've already created MAX_SEL_ARGS SEL_ARG objects in cloning.
    - Added comments about space complexity of SEL_ARG-graph 
      representation.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	pylon.mylan
# Root:	/home/psergey/mysql-4.1-bug26624-r2

--- 1.150/sql/opt_range.cc	2007-03-28 20:16:04 +04:00
+++ 1.151/sql/opt_range.cc	2007-03-28 20:16:04 +04:00
@@ -128,6 +128,89 @@
    - get_quick_select()   - Walk the SEL_ARG, materialize the key intervals,
                             and create QUICK_RANGE_SELECT object that will
                             read records within these intervals.
+
+  4. SPACE COMPLEXITY NOTES 
+
+    SEL_ARG graph is a representation of an ordered disjoint sequence of
+    intervals over the ordered set of index tuple values.
+
+    For multi-part keys, one can construct a WHERE expression such that its
+    list of intervals will be of combinatorial size. Here is an example:
+     
+      (keypart1 IN (1,2, ..., n1)) AND 
+      (keypart2 IN (1,2, ..., n2)) AND 
+      (keypart3 IN (1,2, ..., n3))
+    
+    For this WHERE clause the list of intervals will have n1*n2*n3 intervals
+    of form
+     
+      (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
+    
+    SEL_ARG graph structure aims to reduce the amount of required space by
+    "sharing" the elementary intervals when possible (the pic at the
+    beginning of this comment has examples of such sharing). The sharing may 
+    prevent combinatorial blowup:
+
+      There are WHERE clauses that have combinatorial-size interval lists but
+      will be represented by a compact SEL_ARG graph.
+      Example:
+        (keypartN IN (1,2, ..., n1)) AND 
+        ...
+        (keypart2 IN (1,2, ..., n2)) AND 
+        (keypart1 IN (1,2, ..., n3))
+
+    but not in all cases:
+
+    - There are WHERE clauses that do have a compact SEL_ARG-graph
+      representation but get_mm_tree() and its callees will construct a
+      graph of combinatorial size.
+      Example:
+        (keypart1 IN (1,2, ..., n1)) AND 
+        (keypart2 IN (1,2, ..., n2)) AND 
+        ...
+        (keypartN IN (1,2, ..., n3))
+
+    - There are WHERE clauses for which the minimal possible SEL_ARG graph
+      representation will have combinatorial size.
+      Example:
+        By induction: Let's take any interval on some keypart in the middle:
+
+           kp15=1 
+        
+        Then let's AND it with this interval 'structure' from preceding and
+        following keyparts:
+
+          (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
+        
+        We will obtain this SEL_ARG graph:
+ 
+             kp14     $      kp15     $      kp16
+                      $               $
+         +---------+  $   +--------+  $   +---------+
+         | kp14=c1 |--$-->| kp15=1 |--$-->| kp16=c3 |
+         +---------+  $   +--------+  $   +---------+
+              |       $               $              
+         +---------+  $   +--------+  $             
+         | kp14=c2 |--$-->| kp15=1 |  $             
+         +---------+  $   +--------+  $             
+                      $               $
+                      
+       Note that we had to duplicate "kp15=1" and there was no way to avoid
+       that. 
+       The induction step: AND the obtained expression with another "wrapping"
+       expression like (*).
+       When the process ends because of the limit on max. number of keyparts 
+       we'll have:
+
+         WHERE clause length  is O(3*#max_keyparts)
+         SEL_ARG graph size   is O(2^(#max_keyparts/2))
+
+       (it is also possible to construct a case where instead of 2 in 2^n we
+        have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
+        nodes)
+
+    We avoid consuming too much memory by setting a limit on the number of
+    SEL_ARG object we can construct during one range analysis invocation.
 */
 
 class SEL_ARG :public Sql_alloc
@@ -158,6 +241,8 @@
   enum leaf_color { BLACK,RED } color;
   enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
 
+  enum { MAX_SEL_ARGS = 64000 };
+
   SEL_ARG() {}
   SEL_ARG(SEL_ARG &);
   SEL_ARG(Field *,const char *,const char *);
@@ -227,7 +312,8 @@
     return new SEL_ARG(field, part, min_value, arg->max_value,
 		       min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
   }
-  SEL_ARG *clone(SEL_ARG *new_parent,SEL_ARG **next);
+  SEL_ARG *clone(struct st_qsel_param *param, SEL_ARG *new_parent, 
+                  SEL_ARG **next);
 
   bool copy_min(SEL_ARG* arg)
   {						// Get overlapping range
@@ -365,7 +451,7 @@
   {
     return parent->left == this ? &parent->left : &parent->right;
   }
-  SEL_ARG *clone_tree();
+  SEL_ARG *clone_tree(struct st_qsel_param *param);
 };
 
 
@@ -391,6 +477,8 @@
     max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
   bool quick;				// Don't calulate possible keys
   COND *cond;
+  /* Numbr of SEL_ARG objects allocated by SEL_ARG::clone_tree operations */
+  uint alloced_sel_args; 
 } PARAM;
 
 static SEL_TREE * get_mm_parts(PARAM *param,COND *cond_func,Field *field,
@@ -413,8 +501,8 @@
 static SEL_TREE *tree_and(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_TREE *tree_or(PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
 static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
-static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
-static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
+static SEL_ARG *key_or(PARAM *param, SEL_ARG *key1,SEL_ARG *key2);
+static SEL_ARG *key_and(PARAM *param, SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
 static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
 static bool get_quick_keys(PARAM *param,QUICK_SELECT *quick,KEY_PART *key,
 			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
@@ -424,6 +512,7 @@
 static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
 static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length);
 
+
 /***************************************************************************
 ** Basic functions for SQL_SELECT and QUICK_SELECT
 ***************************************************************************/
@@ -568,12 +657,17 @@
   left=right= &null_element;
 }
 
-SEL_ARG *SEL_ARG::clone(SEL_ARG *new_parent,SEL_ARG **next_arg)
+SEL_ARG *SEL_ARG::clone(PARAM *param, SEL_ARG *new_parent, SEL_ARG **next_arg)
 {
   SEL_ARG *tmp;
+
+  /* Bail out if we have already generated too many SEL_ARGs */
+  if (++param->alloced_sel_args > MAX_SEL_ARGS)
+    return 0;
+
   if (type != KEY_RANGE)
   {
-    if (!(tmp= new SEL_ARG(type)))
+    if (!(tmp= new (param->mem_root) SEL_ARG(type)))
       return 0;					// out of memory
     tmp->prev= *next_arg;			// Link into next/prev chain
     (*next_arg)->next=tmp;
@@ -581,20 +675,20 @@
   }
   else
   {
-    if (!(tmp= new SEL_ARG(field,part, min_value,max_value,
-			   min_flag, max_flag, maybe_flag)))
+    if (!(tmp= new (param->mem_root) SEL_ARG(field,part, min_value,max_value,
+                                             min_flag, max_flag, maybe_flag)))
       return 0;					// OOM
     tmp->parent=new_parent;
     tmp->next_key_part=next_key_part;
     if (left != &null_element)
-      tmp->left=left->clone(tmp,next_arg);
+      tmp->left=left->clone(param, tmp, next_arg);
 
     tmp->prev= *next_arg;			// Link into next/prev chain
     (*next_arg)->next=tmp;
     (*next_arg)= tmp;
 
     if (right != &null_element)
-      if (!(tmp->right= right->clone(tmp,next_arg)))
+      if (!(tmp->right= right->clone(param, tmp, next_arg)))
 	return 0;				// OOM
   }
   increment_use_count(1);
@@ -672,11 +766,12 @@
 }
 
 
-SEL_ARG *SEL_ARG::clone_tree()
+SEL_ARG *SEL_ARG::clone_tree(PARAM *param)
 {
   SEL_ARG tmp_link,*next_arg,*root;
   next_arg= &tmp_link;
-  root= clone((SEL_ARG *) 0, &next_arg);
+  if (!(root= clone(param, (SEL_ARG *) 0, &next_arg)))
+    return 0;
   next_arg->next=0;				// Fix last link
   tmp_link.next->prev=0;			// Fix first link
   if (root)					// If not OOM
@@ -890,6 +985,7 @@
       param.real_keynr[param.keys++]=idx;
     }
     param.key_parts_end=key_parts;
+    param.alloced_sel_args= 0;
 
     if ((tree=get_mm_tree(&param,cond)))
     {
@@ -991,7 +1087,8 @@
       while ((item=li++))
       {
 	SEL_TREE *new_tree=get_mm_tree(param,item);
-	if (param->thd->is_fatal_error)
+	if (param->thd->is_fatal_error || 
+            param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
 	  DBUG_RETURN(0);	// out of memory
 	tree=tree_and(param,tree,new_tree);
 	if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
@@ -1524,7 +1621,7 @@
     tree1->type=SEL_TREE::KEY_SMALLER;
     DBUG_RETURN(tree1);
   }
-
+  
   /* Join the trees key per key */
   SEL_ARG **key1,**key2,**end;
   for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
@@ -1537,7 +1634,7 @@
 	flag|=CLONE_KEY1_MAYBE;
       if (*key2 && !(*key2)->simple_key())
 	flag|=CLONE_KEY2_MAYBE;
-      *key1=key_and(*key1,*key2,flag);
+      *key1=key_and(param, *key1, *key2, flag);
       if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
       {
 	tree1->type= SEL_TREE::IMPOSSIBLE;
@@ -1574,7 +1671,7 @@
   for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
        key1 != end ; key1++,key2++)
   {
-    *key1=key_or(*key1,*key2);
+    *key1= key_or(param, *key1, *key2);
     if (*key1)
     {
       result=tree1;				// Added to tree1
@@ -1590,14 +1687,14 @@
 /* And key trees where key1->part < key2 -> part */
 
 static SEL_ARG *
-and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
+and_all_keys(PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
 {
   SEL_ARG *next;
   ulong use_count=key1->use_count;
 
   if (key1->elements != 1)
   {
-    key2->use_count+=key1->elements-1;
+    key2->use_count+=key1->elements-1; //psergey: why we don't count that key1 has n-k-p?
     key2->increment_use_count((int) key1->elements-1);
   }
   if (key1->type == SEL_ARG::MAYBE_KEY)
@@ -1609,7 +1706,7 @@
   {
     if (next->next_key_part)
     {
-      SEL_ARG *tmp=key_and(next->next_key_part,key2,clone_flag);
+      SEL_ARG *tmp= key_and(param, next->next_key_part, key2, clone_flag);
       if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
       {
 	key1=key1->tree_delete(next);
@@ -1618,6 +1715,8 @@
       next->next_key_part=tmp;
       if (use_count)
 	next->increment_use_count(use_count);
+      if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
+        break;
     }
     else
       next->next_key_part=key2;
@@ -1644,7 +1743,7 @@
 */
 
 static SEL_ARG *
-key_and(SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
+key_and(PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
 {
   if (!key1)
     return key2;
@@ -1660,9 +1759,9 @@
     // key1->part < key2->part
     key1->use_count--;
     if (key1->use_count > 0)
-      if (!(key1= key1->clone_tree()))
+      if (!(key1= key1->clone_tree(param)))
 	return 0;				// OOM
-    return and_all_keys(key1,key2,clone_flag);
+    return and_all_keys(param, key1, key2, clone_flag);
   }
 
   if (((clone_flag & CLONE_KEY2_MAYBE) &&
@@ -1680,14 +1779,14 @@
     if (key1->use_count > 1)
     {
       key1->use_count--;
-      if (!(key1=key1->clone_tree()))
+      if (!(key1=key1->clone_tree(param)))
 	return 0;				// OOM
       key1->use_count++;
     }
     if (key1->type == SEL_ARG::MAYBE_KEY)
     {						// Both are maybe key
-      key1->next_key_part=key_and(key1->next_key_part,key2->next_key_part,
-				 clone_flag);
+      key1->next_key_part=key_and(param, key1->next_key_part, 
+                                  key2->next_key_part, clone_flag);
       if (key1->next_key_part &&
 	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
 	return key1;
@@ -1698,7 +1797,7 @@
       if (key2->next_key_part)
       {
 	key1->use_count--;			// Incremented in and_all_keys
-	return and_all_keys(key1,key2,clone_flag);
+	return and_all_keys(param, key1, key2, clone_flag);
       }
       key2->use_count--;			// Key2 doesn't have a tree
     }
@@ -1727,7 +1826,8 @@
     }
     else if (get_range(&e2,&e1,key2))
       continue;
-    SEL_ARG *next=key_and(e1->next_key_part,e2->next_key_part,clone_flag);
+    SEL_ARG *next=key_and(param, e1->next_key_part, e2->next_key_part,
+                          clone_flag);
     e1->increment_use_count(1);
     e2->increment_use_count(1);
     if (!next || next->type != SEL_ARG::IMPOSSIBLE)
@@ -1775,7 +1875,7 @@
 
 
 static SEL_ARG *
-key_or(SEL_ARG *key1,SEL_ARG *key2)
+key_or(PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
 {
   if (!key1)
   {
@@ -1823,7 +1923,7 @@
     {
       swap_variables(SEL_ARG *,key1,key2);
     }
-    if (key1->use_count > 0 || !(key1=key1->clone_tree()))
+    if (key1->use_count > 0 || !(key1=key1->clone_tree(param)))
       return 0;					// OOM
   }
 
@@ -1967,7 +2067,7 @@
       {						// tmp.min. <= x <= tmp.max
 	tmp->maybe_flag|= key.maybe_flag;
 	key.increment_use_count(key1->use_count+1);
-	tmp->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
+	tmp->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
 	if (!cmp)				// Key2 is ready
 	  break;
 	key.copy_max_to_min(tmp);
@@ -1998,7 +2098,7 @@
 	tmp->increment_use_count(key1->use_count+1);
 	/* Increment key count as it may be used for next loop */
 	key.increment_use_count(1);
-	new_arg->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
+	new_arg->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
 	key1=key1->insert(new_arg);
 	break;
       }

--- 1.38/mysql-test/r/range.result	2007-03-28 20:16:04 +04:00
+++ 1.39/mysql-test/r/range.result	2007-03-28 20:16:04 +04:00
@@ -701,4 +701,32 @@
 d8c4177d2380fc201.39666693
 d8c4177d24ccef970.14957924
 DROP TABLE t1;
+create table t1 (
+c1  char(10), c2  char(10), c3  char(10), c4  char(10),
+c5  char(10), c6  char(10), c7  char(10), c8  char(10),
+c9  char(10), c10 char(10), c11 char(10), c12 char(10),
+c13 char(10), c14 char(10), c15 char(10), c16 char(10),
+index(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,c13,c14,c15,c16)
+);
+insert into t1 (c1) values ('1'),('1'),('1'),('1');
+select * from t1 where
+c1 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c2 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c3 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c4 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c5 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c6 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c7 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c8 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c9 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c10 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c11 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c12 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c13 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c14 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c15 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+and c16 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+;
+c1	c2	c3	c4	c5	c6	c7	c8	c9	c10	c11	c12	c13	c14	c15	c16
+drop table t1;
 End of 4.1 tests

--- 1.34/mysql-test/t/range.test	2007-03-28 20:16:04 +04:00
+++ 1.35/mysql-test/t/range.test	2007-03-28 20:16:04 +04:00
@@ -563,4 +563,36 @@
 
 DROP TABLE t1;
 
+# BUG#26624 high mem usage (crash) in range optimizer (depends on order of fields in where)
+create table t1 (
+  c1  char(10), c2  char(10), c3  char(10), c4  char(10),
+  c5  char(10), c6  char(10), c7  char(10), c8  char(10),
+  c9  char(10), c10 char(10), c11 char(10), c12 char(10),
+  c13 char(10), c14 char(10), c15 char(10), c16 char(10),
+  index(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,c13,c14,c15,c16)
+);
+
+insert into t1 (c1) values ('1'),('1'),('1'),('1');
+
+# This must run without crash and fast:
+select * from t1 where
+     c1 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c2 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c3 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c4 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c5 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c6 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c7 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c8 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c9 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c10 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c11 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c12 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c13 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c14 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c15 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+ and c16 in ("abcdefgh", "123456789", "qwertyuio", "asddfgh")
+;
+drop table t1;
+
 --echo End of 4.1 tests
Thread
bk commit into 4.1 tree (sergefp:1.2627) BUG#26624Sergey Petrunia28 Mar