Below is the list of changes that have just been committed into a local
5.1 repository of martin. When martin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-03-22 13:48:41+01:00, mhansson@stripped +3 -0
bug #26106: Wrong plan may be chosen when there are several possible range and ref
accesses
Credits go to Sergey Petrunia for this description of the bug.
For ref(const) access methods there are two kinds of statistics when
the WHERE has an AND-part of "t.keyXpart1 = c1 AND ... t.keyXpartN = cN",
N < key size.
1. Table statistics, i.e. index cardinality values
2. Range access estimate
those estimates are not necessarily consistent with each other
for example we can get
E(#rows(t.key1part1 = c1)) < E( #rows( t.key1part1 = c1
AND
t.key1part2 IN (1, 2) )
)
or
E(#rows(t.key1part1 = c1)) < E( #rows( t.key1part1 = c1
AND
t.key1part2 = othertbl.field )
)
which is obviously not true. The source of discrepancy is that index
cardinality values can be very inaccurate for skewed datasets. The code
marked as ReuseRangeEstimateForRef-[1-4] tries to do a smart job of using
the best available estimate. e.g. if we have candidate accesses:
* B = ref-access( t.key1part1 = c1 AND t.key1part2 = othertbl.field )
* A = range-access( t.key1part1 = c1 )
and we have the E( #rows(A) ) < E( #rows(B) ) as above, we'll pick
the #rows estimate from range access and use it with ref access. (we
will use ref access as it uses more keyparts, that's a heuristic)
When we have range and ref over the same index we have these rules:
* When access method X scans a subset of rows that access method Y will
scan, use X, no matter what the cost numbers say.
* Look if the obtained cost estimates are consistent. If there is an
inconsistency, eliminate it by using range access estimates instead of
ref.
The bug is that the above rules weren't followed. best_access_path always chose
the index with the lowest number of records, even if the figure wasn't reliable.
mysql-test/r/range.result@stripped, 2007-03-22 13:48:37+01:00, mhansson@stripped +21 -0
bug #26106
test case
mysql-test/t/range.test@stripped, 2007-03-22 13:48:37+01:00, mhansson@stripped +23 -0
bug #26106
test case
sql/sql_select.cc@stripped, 2007-03-22 13:48:37+01:00, mhansson@stripped +12 -1
bug #26106
Introduced variable key_matches_more_parts that overrules a
decision to use a certain key if that decision is
based solely upon obviously unreliable statistics.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: mhansson
# Host: linux-st28.site
# Root: /home/martin/mysql/src/5.1o-bug26106
--- 1.497/sql/sql_select.cc 2007-02-27 09:39:02 +01:00
+++ 1.498/sql/sql_select.cc 2007-03-22 13:48:37 +01:00
@@ -3807,6 +3807,13 @@ best_access_path(JOIN *join,
/* Calculate how many key segments of the current key we can use */
start_key= keyuse;
+ /*
+ This condition is raised if we find a key from range optimizer that
+ matches more key parts than the best key. In that case, use the current
+ key no matter what the statitistics say.
+ */
+ bool key_matches_more_parts= false;
+
do /* For each keypart */
{
uint keypart= keyuse->keypart;
@@ -4040,7 +4047,10 @@ best_access_path(JOIN *join,
if (!found_ref && table->quick_keys.is_set(key) && // (1)
table->quick_key_parts[key] > max_key_part && // (2)
records < (double)table->quick_rows[key]) // (3)
+ {
records= (double)table->quick_rows[key];
+ key_matches_more_parts= true;
+ }
tmp= records;
}
@@ -4129,7 +4139,8 @@ best_access_path(JOIN *join,
tmp= best_time; // Do nothing
}
} /* not ft_key */
- if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
+ if (tmp < best_time - records/(double) TIME_FOR_COMPARE ||
+ key_matches_more_parts)
{
best_time= tmp + records/(double) TIME_FOR_COMPARE;
best= tmp;
--- 1.59/mysql-test/r/range.result 2007-02-06 15:52:22 +01:00
+++ 1.60/mysql-test/r/range.result 2007-03-22 13:48:37 +01:00
@@ -1008,3 +1008,24 @@ explain select * from t2 where a=1000 an
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ref a a 5 const 502 Using where
drop table t1, t2;
+CREATE TABLE t1( a INT, b INT, KEY k ( a ), KEY l ( a, b ) );
+INSERT INTO t1(a) VALUES (1);
+INSERT INTO t1
+VALUES (2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3);
+INSERT INTO t1 SELECT 3, 4 FROM t1 WHERE a = 2 AND b = 3;
+INSERT INTO t1 SELECT 4, 1 FROM t1 WHERE a = 2 AND b = 3;
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+INSERT INTO t1 VALUES (1, 2);
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2;
+EXPLAIN SELECT * FROM t1 WHERE a = 1 AND b >= 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range k,l l 10 NULL 138 Using where; Using index
+DROP TABLE t1;
--- 1.47/mysql-test/t/range.test 2007-02-06 15:52:22 +01:00
+++ 1.48/mysql-test/t/range.test 2007-03-22 13:48:37 +01:00
@@ -820,3 +820,26 @@ explain select * from t2 where a=1000 an
drop table t1, t2;
+#
+# Bug #26106: Wrong plan may be chosen when there are several possible range and
+# ref accesses
+#
+CREATE TABLE t1( a INT, b INT, KEY k ( a ), KEY l ( a, b ) );
+
+INSERT INTO t1(a) VALUES (1);
+INSERT INTO t1
+VALUES (2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3),(2,3);
+INSERT INTO t1 SELECT 3, 4 FROM t1 WHERE a = 2 AND b = 3;
+INSERT INTO t1 SELECT 4, 1 FROM t1 WHERE a = 2 AND b = 3;
+ANALYZE TABLE t1;
+INSERT INTO t1 VALUES (1, 2);
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 2
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 4
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 8
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 16
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 32
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 64
+INSERT INTO t1 SELECT a, b FROM t1 WHERE a=1 AND b=2; # 128
+EXPLAIN SELECT * FROM t1 WHERE a = 1 AND b >= 2;
+
+DROP TABLE t1;
| Thread |
|---|
| • bk commit into 5.1 tree (mhansson:1.2466) BUG#26106 | mhansson | 22 Mar |