#At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-5.1-telco-7.0/ based on revid:ole.john.aske@stripped
4223 Ole John Aske 2011-02-23
Fix for bug#11804277 - INCORRECT INDEX MAY BE SELECTED DUE TO INSUFFICIENT STATISTICS FROM CLUSTER
Add heuristics to ha_ndbcluster::records_in_range()
which identifies a range as:
- An open bound range (<column> LT/GT <bound)
- A closed bound range (<column> BETWEEN <low> AND <HIGH>)
- A (partial) EQ-range (<column> EQ <bound>)
... Or a combination of these....
These are handled as follows:
Open bound ranges
-----------------
Without a histogram of how the values in the index are
distributed, we can only assume an equal distrubution.
A statistically correct estimate for a condition of the form
'<column> LT/GT <bound>' would then have been to assume it selects
50% of the rows in the table. However, I have experienced that
this will cause the range-cost to directly compete with the cost
of a full table scan. We should therefore be somewhat more
conservative and estimate 10% of the rows to be returned.
Closed bound range
------------------
We assume this to be somewhat better than an open bounded range
returning 5% of the rows in the table.
EQ-range
--------
An EQ-range will excatly specify a fraction of the first part of
an index. It is reasonable to assume:
- Specifing a larger fraction of the index will improve the
selectivity of the EQ-range.
- Each part of the specified EQ-range will have the same
selectivity.
We can model this as a Binomial Distribution of the indexed
values. http://en.wikipedia.org/wiki/Binomial_distrib
modified:
mysql-test/suite/ndb/r/ndb_condition_pushdown.result
mysql-test/suite/ndb/r/ndb_index.result
mysql-test/suite/ndb/r/ndb_index_unique.result
mysql-test/suite/ndb/r/ndb_read_multi_range.result
mysql-test/suite/ndb/r/ndb_statistics.result
mysql-test/suite/ndb/t/ndb_statistics.test
sql/ha_ndbcluster.cc
=== modified file 'mysql-test/suite/ndb/r/ndb_condition_pushdown.result'
--- a/mysql-test/suite/ndb/r/ndb_condition_pushdown.result 2011-01-17 13:29:52 +0000
+++ b/mysql-test/suite/ndb/r/ndb_condition_pushdown.result 2011-02-23 15:54:12 +0000
@@ -1910,7 +1910,7 @@ insert into NodeAlias VALUES(null, 8 , '
12:22:26');
explain select * from NodeAlias where (aliasKey LIKE '491803%');
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE NodeAlias range NodeAlias_KeyIndex NodeAlias_KeyIndex 48 NULL 10 Using where with pushed condition
+1 SIMPLE NodeAlias range NodeAlias_KeyIndex NodeAlias_KeyIndex 48 NULL 2 Using where with pushed condition
select * from NodeAlias where (aliasKey LIKE '491803%') order by id;
id nodeId displayName aliasKey objectVersion changed
7 8 491803% 491803% 0 2008-03-10 12:22:26
@@ -2225,7 +2225,7 @@ join tx as t2 on tx.a = t2.c and tx.b =
where t2.a = 4
group by t2.c;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 10 100.00 Using where; Using filesort
+1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 2 100.00 Using where; Using filesort
1 SIMPLE tx eq_ref PRIMARY PRIMARY 8 test.t2.c,test.t2.d 1 100.00
Warnings:
Note 1003 select `test`.`t2`.`c` AS `c`,count(distinct `test`.`t2`.`a`) AS `count(distinct t2.a)` from `test`.`tx` join `test`.`tx` `t2` where ((`test`.`tx`.`b` = `test`.`t2`.`d`) and (`test`.`tx`.`a` = `test`.`t2`.`c`) and (`test`.`t2`.`a` = 4)) group by `test`.`t2`.`c`
@@ -2242,7 +2242,7 @@ join tx as t2 on tx.a = t2.c and tx.b =
where t2.a = 4
group by t2.c;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 10 100.00 Using where; Using filesort
+1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 2 100.00 Using where; Using filesort
1 SIMPLE tx eq_ref PRIMARY PRIMARY 8 test.t2.c,test.t2.d 1 100.00
Warnings:
Note 1003 select `test`.`t2`.`c` AS `c`,count(distinct `test`.`t2`.`a`) AS `count(distinct t2.a)` from `test`.`tx` join `test`.`tx` `t2` where ((`test`.`tx`.`b` = `test`.`t2`.`d`) and (`test`.`tx`.`a` = `test`.`t2`.`c`) and (`test`.`t2`.`a` = 4)) group by `test`.`t2`.`c`
=== modified file 'mysql-test/suite/ndb/r/ndb_index.result'
--- a/mysql-test/suite/ndb/r/ndb_index.result 2010-12-22 11:13:45 +0000
+++ b/mysql-test/suite/ndb/r/ndb_index.result 2011-02-23 15:54:12 +0000
@@ -306,7 +306,7 @@ explain
select i,vc from t1
where i>=1 or vc > '0';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 index_merge PRIMARY,i1,i2 i1,i2 5,18 NULL 20 Using sort_union(i1,i2); Using where with pushed condition
+1 SIMPLE t1 index_merge PRIMARY,i1,i2 i1,i2 5,18 NULL 4 Using sort_union(i1,i2); Using where with pushed condition
select i,vc from t1
where i>=1 or vc > '0';
i vc
@@ -350,7 +350,7 @@ explain
select i,vc from t2
where i>=1 or vc > '0';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t2 index_merge i1,i2 i1,i2 5,19 NULL 20 Using sort_union(i1,i2); Using where with pushed condition
+1 SIMPLE t2 index_merge i1,i2 i1,i2 5,19 NULL 4 Using sort_union(i1,i2); Using where with pushed condition
select i,vc from t2
where i>=1 or vc > '0';
i vc
=== modified file 'mysql-test/suite/ndb/r/ndb_index_unique.result'
--- a/mysql-test/suite/ndb/r/ndb_index_unique.result 2011-01-18 07:49:14 +0000
+++ b/mysql-test/suite/ndb/r/ndb_index_unique.result 2011-02-23 15:54:12 +0000
@@ -185,7 +185,7 @@ set @old_ecpd = @@session.engine_conditi
set engine_condition_pushdown = true;
explain select * from t2 where (b = 3 OR b = 5) AND c IS NULL AND a < 9 order by a;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t2 range PRIMARY,b b 9 NULL 2 Using where with pushed condition; Using filesort
+1 SIMPLE t2 range PRIMARY,b PRIMARY 4 NULL 2 Using where with pushed condition
select * from t2 where (b = 3 OR b = 5) AND c IS NULL AND a < 9 order by a;
a b c
3 3 NULL
=== modified file 'mysql-test/suite/ndb/r/ndb_read_multi_range.result'
--- a/mysql-test/suite/ndb/r/ndb_read_multi_range.result 2011-01-18 07:49:14 +0000
+++ b/mysql-test/suite/ndb/r/ndb_read_multi_range.result 2011-02-23 15:54:12 +0000
@@ -605,7 +605,7 @@ SELECT DISTINCT STRAIGHT_JOIN t1.pk FROM
t1 LEFT JOIN t2 ON t2.a = t1.a AND t2.pk != 6;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 3000 Using temporary
-1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 20 Using where; Distinct
+1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 4 Using where; Distinct
SELECT DISTINCT STRAIGHT_JOIN t1.pk FROM
t1 LEFT JOIN t2 ON t2.a = t1.a AND t2.pk != 6;
drop table t1, t2;
=== modified file 'mysql-test/suite/ndb/r/ndb_statistics.result'
--- a/mysql-test/suite/ndb/r/ndb_statistics.result 2011-01-18 11:49:03 +0000
+++ b/mysql-test/suite/ndb/r/ndb_statistics.result 2011-02-23 15:54:12 +0000
@@ -38,24 +38,124 @@ id select_type table type possible_keys
EXPLAIN
SELECT * FROM t10000 WHERE k >= 42 and k < 10000;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition
EXPLAIN
SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition
EXPLAIN
SELECT * FROM t10000 WHERE k < 42;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition
EXPLAIN
SELECT * FROM t10000 WHERE k > 42;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition
EXPLAIN
SELECT * FROM t10000 AS X JOIN t10000 AS Y
ON Y.I=X.I AND Y.J = X.I;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE X ALL I NULL NULL NULL 10000
1 SIMPLE Y ref J,I I 10 test.X.I,test.X.I 11 Using where
+EXPLAIN
+SELECT * FROM t100 WHERE k < 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t100 WHERE k > 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE k < 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE k > 42;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t100 WHERE k BETWEEN 42 AND 10000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 5 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref I I 5 const 200 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref J J 5 const 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J = 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref J,I I 10 const,const 4 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref I I 5 const 200 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J > 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J < 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I I 10 NULL 50 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J BETWEEN 1 AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I I 10 NULL 50 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref J,I I 10 const,const 4 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref J J 5 const 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K > 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range PRIMARY,J J 9 NULL 50 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K < 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range PRIMARY,J J 9 NULL 50 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K BETWEEN 1 AND 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range PRIMARY,J J 9 NULL 25 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J > 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I > 0 AND J = 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref J,I J 5 const 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE I > 0 AND J > 1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I J 5 NULL 1000 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J > 1 AND I = 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 1 AND I > 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 ref J,I J 5 const 100 Using where with pushed condition
+EXPLAIN
+SELECT * FROM t10000 WHERE J > 1 AND I > 0;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10000 range J,I J 5 NULL 1000 Using where with pushed condition
DROP TABLE t10,t100,t10000;
End of 5.1 tests
=== modified file 'mysql-test/suite/ndb/t/ndb_statistics.test'
--- a/mysql-test/suite/ndb/t/ndb_statistics.test 2011-01-18 11:49:03 +0000
+++ b/mysql-test/suite/ndb/t/ndb_statistics.test 2011-02-23 15:54:12 +0000
@@ -62,6 +62,81 @@ EXPLAIN
SELECT * FROM t10000 AS X JOIN t10000 AS Y
ON Y.I=X.I AND Y.J = X.I;
+#
+# Improved heurists for ::records_in_range() statistics
+#
+
+# Open bounded range should return 10% of #rows in table
+EXPLAIN
+SELECT * FROM t100 WHERE k < 42;
+EXPLAIN
+SELECT * FROM t100 WHERE k > 42;
+EXPLAIN
+SELECT * FROM t10000 WHERE k < 42;
+EXPLAIN
+SELECT * FROM t10000 WHERE k > 42;
+
+#Closed bounded range should return 5% of #rows in table
+EXPLAIN
+SELECT * FROM t100 WHERE k BETWEEN 42 AND 10000;
+EXPLAIN
+SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000;
+
+#EQ-range selectivity depends on
+# - key length specified
+# - #rows in table.
+# - unique/non-unique index
+# - min 2% selectivity
+#
+# Possibly combined with open/closed ranges as
+# above which further improves selectivity
+#
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0;
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0;
+
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J = 0;
+
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0;
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J > 1;
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J < 1;
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J BETWEEN 1 AND 10;
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J = 1;
+
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0;
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K > 1;
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K < 1;
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K BETWEEN 1 AND 10;
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 0 AND K = 1;
+
+## Verify selection of 'best' index
+## (The one of index I/J being EQ)
+EXPLAIN
+SELECT * FROM t10000 WHERE I = 0 AND J <> 1;
+EXPLAIN
+SELECT * FROM t10000 WHERE I <> 0 AND J = 1;
+EXPLAIN
+SELECT * FROM t10000 WHERE I <> 0 AND J <> 1;
+
+EXPLAIN
+SELECT * FROM t10000 WHERE J <> 1 AND I = 0;
+EXPLAIN
+SELECT * FROM t10000 WHERE J = 1 AND I <> 0;
+EXPLAIN
+SELECT * FROM t10000 WHERE J <> 1 AND I <> 0;
+
DROP TABLE t10,t100,t10000;
=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc 2011-02-18 10:19:20 +0000
+++ b/sql/ha_ndbcluster.cc 2011-02-23 15:54:12 +0000
@@ -11182,7 +11182,87 @@ ha_ndbcluster::records_in_range(uint inx
DBUG_RETURN(rows);
}
- DBUG_RETURN(10); /* Good guess when you don't know anything */
+ /* Use simple heuristics to estimate fraction
+ of 'stats.record' returned from range.
+ */
+ if (stats.records != 0 && stats.records != HA_POS_ERROR)
+ {
+ Uint64 rows;
+ Uint64 table_rows= stats.records;
+ size_t min_key_length= (min_key) ? min_key->length : 0;
+ size_t max_key_length= (max_key) ? max_key->length : 0;
+
+ // Might have an closed/open range bound:
+ // Low range open
+ if (!min_key_length)
+ {
+ rows= (!max_key_length)
+ ? table_rows // No range was specified
+ : table_rows/10; // -oo .. <high range> -> 10% selectivity
+ }
+ // High range open
+ else if (!max_key_length)
+ {
+ rows= table_rows/10; // <low range>..oo -> 10% selectivity
+ }
+ else
+ {
+ size_t bounds_len= min(min_key_length,max_key_length);
+ uint eq_bound_len= 0;
+ uint eq_bound_offs= 0;
+
+ KEY_PART_INFO* key_part= key_info->key_part;
+ KEY_PART_INFO* end= key_part+key_info->key_parts;
+ for (; key_part != end; key_part++)
+ {
+ uint part_length= key_part->store_length;
+ if (eq_bound_offs+part_length > bounds_len ||
+ memcmp(&min_key->key[eq_bound_offs],
+ &max_key->key[eq_bound_offs],
+ part_length))
+ {
+ break;
+ }
+ eq_bound_len+= key_part->length;
+ eq_bound_offs+= part_length;
+ }
+
+ if (!eq_bound_len)
+ {
+ rows= table_rows/20; // <low range>..<high range> -> 5% selectivity
+ }
+ else
+ {
+ // Has an equality range on a leading part of 'key_length':
+ // - Null indicator, and HA_KEY_BLOB_LENGTH bytes in
+ // 'extra_length' are removed from key_fraction calculations.
+ // - Assume reduced selectivity for non-unique indexes
+ // by decreasing 'eq_fraction' by 20%
+ // - Assume equal selectivity for all eq_parts in key.
+
+ double eq_fraction = (double)(eq_bound_len) /
+ (key_length - key_info->extra_length);
+ if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity
+ eq_fraction/= 1.20;
+ if (eq_fraction >= 1.0) // Exact match -> 1 row
+ DBUG_RETURN(1);
+
+ rows = (Uint64)(table_rows / pow(table_rows, eq_fraction));
+ if (rows > (table_rows/50)) // EQ-range: Max 2% of rows
+ rows= (table_rows/50);
+
+ if (min_key_length > eq_bound_offs)
+ rows/= 2;
+ if (max_key_length > eq_bound_offs)
+ rows/= 2;
+ }
+ }
+ if (rows < 2) // At least 2 rows as not exact
+ rows= 2;
+ DBUG_RETURN(min(rows,table_rows));
+ }
+
+ DBUG_RETURN(10); /* Poor guess when you don't know anything */
}
ulonglong ha_ndbcluster::table_flags(void) const
Attachment: [text/bzr-bundle] bzr/ole.john.aske@oracle.com-20110223155412-n0enzknm304djz0g.bundle