List:Commits« Previous MessageNext Message »
From:mhansson Date:March 20 2007 7:24am
Subject:bk commit into 5.1 tree (mhansson:1.2466) BUG#26106
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of martin. When martin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-20 08:24:17+01:00, mhansson@stripped +3 -0
  bug #26106: Wrong plan may be chosen when there are several possible range and ref 
  accesses
  
  Credits go to Sergey Petrunia for this description of the bug.
  
  For ref(const) access methods there are two kinds of statistics when
  the WHERE has an AND-part of  "t.keyXpart1 = c1 AND ... t.keyXpartN = cN", 
  N < key size.
  
  1. Table statistics, i.e. index cardinality values
  2. Range access estimate
  
  those estimates are not necessarily consistent with each other
  for example we can get
  
    E(#rows(t.key1part1 = c1)) < E( #rows( t.key1part1 = c1 
                                           AND     
                                           t.key1part2 IN (1, 2) ) 
                                  )
  or 
     
    E(#rows(t.key1part1 = c1)) < E( #rows( t.key1part1 = c1
                                         AND
                                         t.key1part2 = othertbl.field )
                                  )
  
  which is obviously not true. The source of discrepancy is that index
  cardinality values can be very inaccurate for skewed datasets. The code
  marked as ReuseRangeEstimateForRef-[1-4] tries to do a smart job of using
  the best available estimate. e.g. if we have candidate accesses:
  
    * B = ref-access( t.key1part1 = c1 AND t.key1part2 = othertbl.field )
    * A = range-access( t.key1part1 = c1 )
  
  and we have the E( #rows(A) ) <  E( #rows(A) ) as above, we'll pick
  the #rows estimate from range access and use it with ref access. (we
  will use ref access as it uses more keyparts, that's a heuristic)
  
  When we have range and ref over the same index we have these rules:
  
    * When access method X scans a subset of rows that access method Y will
      scan, use X, no matter what the cost numbers say.
  
    * Look if the obtained cost estimates are consistent. If there is an
      inconsistency, eliminate it by using range access estimates instead of
      ref.
  
  The bug is that the above rules weren't followed. best_access_path always chose
  the index with the lowest number of records, even if the figure wasn't reliable.

  mysql-test/r/range.result@stripped, 2007-03-20 08:24:15+01:00, mhansson@stripped +26 -0
    bug #26106
    
    test case

  mysql-test/t/range.test@stripped, 2007-03-20 08:24:15+01:00, mhansson@stripped +28 -0
    bug #26106
    
    test case

  sql/sql_select.cc@stripped, 2007-03-20 08:24:15+01:00, mhansson@stripped +12 -1
    bug #26106
    
    Introduced variable key_matches_more_parts that overrules a 
    decision to use a certain key if that decision is  
    based solely upon obviously unreliable statistics.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	mhansson
# Host:	linux-st28.site
# Root:	/home/martin/mysql/src/5.1o-bug26106

--- 1.497/sql/sql_select.cc	2007-02-27 09:39:02 +01:00
+++ 1.498/sql/sql_select.cc	2007-03-20 08:24:15 +01:00
@@ -3807,6 +3807,13 @@ best_access_path(JOIN      *join,
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
 
+      /* 
+         This condition is raised if we find a key from range optimizer that
+         matches more key parts than the best key. In that case, use the current
+         key no matter what the statitistics say.
+      */
+      bool key_matches_more_parts= false;
+      
       do /* For each keypart */
       {
         uint keypart= keyuse->keypart;
@@ -4040,7 +4047,10 @@ best_access_path(JOIN      *join,
                 if (!found_ref && table->quick_keys.is_set(key) &&    // (1)
                     table->quick_key_parts[key] > max_key_part &&     // (2)
                     records < (double)table->quick_rows[key])         // (3)
+                {
                   records= (double)table->quick_rows[key];
+                  key_matches_more_parts= true;                  
+                }
 
                 tmp= records;
               }
@@ -4129,7 +4139,8 @@ best_access_path(JOIN      *join,
             tmp= best_time;                    // Do nothing
         }
       } /* not ft_key */
-      if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+      if (tmp < best_time - records/(double) TIME_FOR_COMPARE ||
+          key_matches_more_parts)
       {
         best_time= tmp + records/(double) TIME_FOR_COMPARE;
         best= tmp;

--- 1.59/mysql-test/r/range.result	2007-02-06 15:52:22 +01:00
+++ 1.60/mysql-test/r/range.result	2007-03-20 08:24:15 +01:00
@@ -1008,3 +1008,29 @@ explain select * from t2 where a=1000 an
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	ref	a	a	5	const	502	Using where
 drop table t1, t2;
+CREATE TABLE t1( a INT, b INT, KEY k ( a ), KEY l ( a, b ) );
+INSERT INTO t1(a) VALUES (1);
+INSERT INTO t1 VALUES (2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3);
+INSERT INTO t1 VALUES (3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4);
+INSERT INTO t1 VALUES (4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1);
+ANALYZE TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+INSERT INTO t1 VALUES 
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), 
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2);
+EXPLAIN SELECT * FROM t1 WHERE a = 1 AND b >= 2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	k,l	l	10	NULL	137	Using where; Using index
+DROP TABLE t1;

--- 1.47/mysql-test/t/range.test	2007-02-06 15:52:22 +01:00
+++ 1.48/mysql-test/t/range.test	2007-03-20 08:24:15 +01:00
@@ -820,3 +820,31 @@ explain select * from t2 where a=1000 an
 
 drop table t1, t2;
 
+#
+# Bug #26106: Wrong plan may be chosen when there are several possible range and
+# ref accesses
+#
+CREATE TABLE t1( a INT, b INT, KEY k ( a ), KEY l ( a, b ) ); 
+
+INSERT INTO t1(a) VALUES (1);
+INSERT INTO t1 VALUES (2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3);
+INSERT INTO t1 VALUES (3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4),(3,4);
+INSERT INTO t1 VALUES (4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1),(4,1);
+ANALYZE TABLE t1;
+INSERT INTO t1 VALUES 
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), 
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2),
+(1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2);
+EXPLAIN SELECT * FROM t1 WHERE a = 1 AND b >= 2;
+
+DROP TABLE t1;
Thread
bk commit into 5.1 tree (mhansson:1.2466) BUG#26106mhansson20 Mar