List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:May 6 2006 10:35am
Subject:bk commit into 5.0 tree (sergefp:1.2117)
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2117 06/05/06 14:35:21 sergefp@stripped +4 -0
  Merge, will need post-merge fixes

  sql/sql_select.cc
    1.410 06/05/06 14:35:17 sergefp@stripped +2 -4
    Merge, will need post-merge fixes

  mysql-test/t/select.test
    1.100 06/05/06 14:35:17 sergefp@stripped +0 -1
    Merge, will need post-merge fixes

  mysql-test/r/select.result
    1.122 06/05/06 14:35:17 sergefp@stripped +0 -0
    Merge, will need post-merge fixes

  mysql-test/r/subselect.result
    1.141 06/05/06 14:24:57 sergefp@stripped +0 -0
    Auto merged

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	newbox.mylan
# Root:	/home/psergey/mysql-5.0-bug17379-r4/RESYNC

--- 1.409/sql/sql_select.cc	2006-05-04 06:38:30 +04:00
+++ 1.410/sql/sql_select.cc	2006-05-06 14:35:17 +04:00
@@ -3322,7 +3322,11 @@
       uint key= keyuse->key;
       KEY *keyinfo= table->key_info+key;
       bool ft_key=  (keyuse->keypart == FT_KEYPART);
-      uint found_ref_or_null= 0;
+      /* Bitmap of keyparts where the ref access is over 'keypart=const': */
+      key_part_map const_part= 0;
+      /* The or-null keypart in ref-or-null access: */
+      key_part_map ref_or_null_part= 0;
+      uint found_ref_or_null= 0;//psergey-todo: check if we can remove this
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
@@ -3337,6 +3341,9 @@
               !(found_ref_or_null & keyuse->optimize))
           {
             found_part|= keyuse->keypart_map;
+            if (!(keyuse->used_tables & ~join->const_table_map))
+              const_part|= keyuse->keypart_map;
+
             double tmp= prev_record_reads(join,
 					  (found_ref |
 					   keyuse->used_tables));
@@ -3347,7 +3354,7 @@
             }
             if (rec > keyuse->ref_table_rows)
               rec= keyuse->ref_table_rows;
-	    /*
+	    /* psergey-todo: chekc if this needs ot be moved out
 	      If there is one 'key_column IS NULL' expression, we can
 	      use this ref_or_null optimisation of this field
 	    */
@@ -3400,6 +3407,29 @@
           {
             if (!found_ref)
             {                                     /* We found a const key */
+              /*
+                ReuseRangeEstimateForRef-1:
+                We get here if we've found a ref(const) (c_i are constants):
+                  "(keypart1=c1) AND ... AND (keypartN=cN)"   [ref_const_cond]
+                
+                If range optimizer was able to could construct a "range" 
+                access on this index, then its condition "quick_cond" was 
+                eqivalent (*) to ref_const_cond, and we can re-use E(#rows) 
+                from the range optimizer.
+                
+                Proof of (*): By properties of range and ref optimizers 
+                quick_cond will be equal or tighther than ref_const_cond. 
+                ref_const_cond already covers "smallest" possible interval - 
+                a singlepoint interval over all keyparts. Therefore, 
+                quick_cond is equivalent to ref_const_cond (if it was an 
+                empty interval we wouldn't have got here).
+
+                NOTE: here in the if-branch of "Check if we found full key"
+                we don't try adjusting the estimate if range optimizer 
+                produced an estimate for some prefix of condition used by ref
+                access. In the else-branch we do such adjustment (see
+                below). Why do we have this inconsistency?
+              */
               if (table->quick_keys.is_set(key))
                 records= (double) table->quick_rows[key];
               else
@@ -3448,12 +3478,49 @@
           {
             max_key_part= max_part_bit(found_part);
             /*
-              Check if quick_range could determinate how many rows we
-              will match
+              ReuseRangeEstimateForRef-2:
+              We're now considering a ref[or_null] access via
+              (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR  
+              (same-as-above but with one cond replaced 
+               with "t.keypart_i IS NULL")]  (*)
+              
+              Try re-using E(#rows) from "range" optimizer:              
+              We can do so iff "range" optimizer used the same intervals as
+              in (*). The intervals used by range optimizer may be not 
+              available at this point (as "range" access might have choosen to
+              create quick select over another index), so we can't compare
+              them to (*). We'll make indirect judgements instead.
+              The sufficient conditions for re-use are:
+              1. All e_i in (*) are constants, i.e. found_ref==FALSE. (if
+                 this is not satisfied we have no way to know which ranges
+                 will be actually scanned by 'ref' until we execute the join)
+              2. max #key parts in 'range' access == K == max_key_part (this 
+                 is apparently a necessary requirement)
+    
+              We also have a property that "range optimizer produces equal or 
+              tighter set of scan intervals than ref(const) optimizer". Each
+              of the intervals in (*) are "tightest possible" intervals when 
+              one limits itself to using keyparts 1..K (which we do in #2).              
+              From here it follows that range access used either one, or
+              both of the (I1) and (I2) intervals:
+              
+               (t.keypart1=c1 AND ... AND t.keypartK=eK)  (I1) 
+               (same-as-above but with one cond replaced  
+                with "t.keypart_i IS NULL")               (I2)
+
+              The remaining part is to exclude the situation where range
+              optimizer used one interval while we're considering
+              ref-or-null and looking for estimate for two intervals. This
+              is done by last limitation:
+
+              3. "range optimizer used (have ref_or_null?2:1) intervals"
             */
-            if (table->quick_keys.is_set(key) &&
-                table->quick_key_parts[key] == max_key_part)
+            if (table->quick_keys.is_set(key) && !found_ref &&          // (1)
+                table->quick_key_parts[key] == max_key_part &&          // (2)
+                table->quick_n_ranges[key] == 1+test(ref_or_null_part)) // (3)
+            {
               tmp= records= (double) table->quick_rows[key];
+            }
             else
             {
               /* Check if we have statistic about the distribution */
@@ -3497,21 +3564,37 @@
                 }
                 records = (ulong) tmp;
               }
+
+              if (found_ref_or_null)
+              {
+                /* We need to do two key searches to find key */
+                tmp *= 2.0;
+                records *= 2.0;
+              }
+
               /*
-                If quick_select was used on a part of this key, we know
-                the maximum number of rows that the key can match.
+                ReuseRangeEstimateForRef-3:  We get here if we could not reuse
+                E(#rows) from range optimizer. Make another try:
+                
+                If range optimizer produced E(#rows) for a prefix of the ref 
+                access we're considering, and that E(#rows) is lower then our
+                current estimate, make the adjustment.
+
+                The decision whether we can re-use the estimate from the range
+                optimizer is the same as in ReuseRangeEstimateForRef-2,
+                applied to first table->quick_key_parts[key] key parts.
               */
               if (table->quick_keys.is_set(key) &&
                   table->quick_key_parts[key] <= max_key_part &&
+                  const_part & (1 << table->quick_key_parts[key]) &&
+                  table->quick_n_ranges[key] == 1 + test(ref_or_null_part &
+                                                         const_part) &&
                   records > (double) table->quick_rows[key])
-                tmp= records= (double) table->quick_rows[key];
-              else if (found_ref_or_null)
               {
-                /* We need to do two key searches to find key */
-                tmp *= 2.0;
-                records *= 2.0;
+                tmp= records= (double) table->quick_rows[key];
               }
             }
+
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->used_keys.is_set(key))
@@ -4350,6 +4433,8 @@
 	  KEY *keyinfo=table->key_info+key;
           bool ft_key=(keyuse->keypart == FT_KEYPART);
 	  uint found_ref_or_null= 0;
+          key_part_map const_part= 0;
+          key_part_map ref_or_null_part= 0;
 
 	  /* Calculate how many key segments of the current key we can use */
 	  start_key=keyuse;
@@ -4364,6 +4449,10 @@
 		  !(found_ref_or_null & keyuse->optimize))
 	      {
 		found_part|=keyuse->keypart_map;
+                if (!(keyuse->used_tables & ~join->const_table_map))
+                  const_part|= keyuse->keypart_map;
+                if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
+                  ref_or_null_part= keyuse->keypart_map;
                 double tmp= prev_record_reads(join,
 					      (found_ref |
 					       keyuse->used_tables));
@@ -4426,7 +4515,8 @@
 	    else
 	    {
 	      if (!found_ref)
-	      {					// We found a const key
+	      {
+                /* See ReuseRangeEstimateForRef-1  */
 		if (table->quick_keys.is_set(key))
 		  records= (double) table->quick_rows[key];
 		else
@@ -4474,13 +4564,13 @@
 		 found_part == PREV_BITS(uint,keyinfo->key_parts)))
 	    {
 	      max_key_part=max_part_bit(found_part);
-	      /*
-		Check if quick_range could determinate how many rows we
-		will match
-	      */
-	      if (table->quick_keys.is_set(key) &&
-		  table->quick_key_parts[key] == max_key_part)
+	      /* See ReuseRangeEstimateForRef-2.  */
+	      if (table->quick_keys.is_set(key) && !found_ref &&
+		  table->quick_key_parts[key] == max_key_part &&
+                  table->quick_n_ranges[key] == 1+test(found_ref_or_null))
+              {
 		tmp=records= (double) table->quick_rows[key];
+              }
 	      else
 	      {
 		/* Check if we have statistic about the distribution */
@@ -4520,20 +4610,28 @@
 		  }
 		  records=(ulong) tmp;
 		}
+
+                if (found_ref_or_null)
+                {
+                  /* We need to do two key searches to find key */
+                  tmp *= 2.0;
+                  records *= 2.0;
+                }
+
 		/*
 		  If quick_select was used on a part of this key, we know
 		  the maximum number of rows that the key can match.
-		*/
-		if (table->quick_keys.is_set(key) &&
-		    table->quick_key_parts[key] <= max_key_part &&
-		    records > (double) table->quick_rows[key])
-		  tmp= records= (double) table->quick_rows[key];
-		else if (found_ref_or_null)
-		{
-		  /* We need to do two key searches to find key */
-		  tmp*= 2.0;
-		  records*= 2.0;
-		}
+                   See ReuseRangeEstimateForRef-3 for details.
+                */
+                if (table->quick_keys.is_set(key) &&
+                    table->quick_key_parts[key] <= max_key_part &&
+                    const_part & (1 << table->quick_key_parts[key]) &&
+                    table->quick_n_ranges[key] == 1 + test(ref_or_null_part &
+                                                           const_part) &&
+                    records > (double) table->quick_rows[key])
+                {
+                  tmp= records= (double) table->quick_rows[key];
+                }
 	      }
 	      /* Limit the number of matched rows */
 	      set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);

--- 1.140/mysql-test/r/subselect.result	2006-05-04 06:38:29 +04:00
+++ 1.141/mysql-test/r/subselect.result	2006-05-06 14:24:57 +04:00
@@ -1480,7 +1480,7 @@
 explain extended select s1, s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2') from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	index	NULL	s1	6	NULL	3	Using index
-2	DEPENDENT SUBQUERY	t2	index_subquery	s1	s1	6	func	1	Using index; Using where
+2	DEPENDENT SUBQUERY	t2	index_subquery	s1	s1	6	func	2	Using index; Using where
 Warnings:
 Note	1003	select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<exists>(<index_lookup>(<cache>(`test`.`t1`.`s1`) in t2 on s1 checking NULL where (`test`.`t2`.`s1` < _latin1'a2'))))) AS `s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2')` from `test`.`t1`
 drop table t1,t2;

--- 1.121/mysql-test/r/select.result	2006-05-04 18:05:16 +04:00
+++ 1.122/mysql-test/r/select.result	2006-05-06 14:35:17 +04:00
@@ -3390,6 +3390,22 @@
 1	SIMPLE	t2	const	PRIMARY	PRIMARY	4	const	1	
 1	SIMPLE	t1	range	PRIMARY	PRIMARY	4	NULL	2	Using where
 DROP TABLE t1,t2;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, b int, c int, e int, primary key(a,b,c));
+insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C;
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	OK
+select 'In next EXPLAIN, B.rows must be exactly 10:' Z;
+Z
+In next EXPLAIN, B.rows must be exactly 10:
+explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5
+and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	A	range	PRIMARY	PRIMARY	12	NULL	3	Using where
+1	SIMPLE	B	ref	PRIMARY	PRIMARY	8	const,test.A.e	10	
+drop table t1, t2;
 CREATE TABLE t1 (i TINYINT UNSIGNED NOT NULL);
 INSERT t1 SET i = 0;
 UPDATE t1 SET i = -1;

--- 1.99/mysql-test/t/select.test	2006-05-04 18:05:16 +04:00
+++ 1.100/mysql-test/t/select.test	2006-05-06 14:35:17 +04:00
@@ -2869,9 +2869,20 @@
 SELECT t2.sku, t2.sppr, t2.name, t1.sku, t1.pr
   FROM t2, t1 WHERE t2.sku=20 AND (t2.sku=t1.sku OR t2.sppr=t1.sku);
 
-
 DROP TABLE t1,t2;
 
+# BUG#17379
+
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, b int, c int, e int, primary key(a,b,c));
+insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C;
+analyze table t2;
+select 'In next EXPLAIN, B.rows must be exactly 10:' Z;
+
+explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5
+          and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5);
+drop table t1, t2;
 #
 # Bug#18712: Truncation problem (needs just documenting and test
 # cases to prevent fixing this accidently. It is intended behaviour)
Thread
bk commit into 5.0 tree (sergefp:1.2117)Sergey Petrunia6 May