From: Jonas Oreland Date: February 23 2011 5:00pm Subject: Re: bzr commit into mysql-5.1-telco-7.0 branch (ole.john.aske:4223) Bug#11804277 List-Archive: http://lists.mysql.com/commits/131964 Message-Id: <4D653D49.8000207@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Hi OJ, 1) What do you think about using heuristic even if we don't have row-count. E.g if we don't have row-count, set it to 100 and run you range-heuristic anyway 2) What do you think about making sure that EQ is preferred over LT even if row-count is low, so that the divisions don't make estimate the same...if row-count is low... /Jonas On 02/23/11 16:54, Ole John Aske wrote: > #At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-5.1-telco-7.0/ based on revid:ole.john.aske@stripped > > 4223 Ole John Aske 2011-02-23 > Fix for bug#11804277 - INCORRECT INDEX MAY BE SELECTED DUE TO INSUFFICIENT STATISTICS FROM CLUSTER > > Add heuristics to ha_ndbcluster::records_in_range() > which identifies a range as: > > - An open bound range ( LT/GT - A closed bound range ( BETWEEN AND ) > - A (partial) EQ-range ( EQ ) > > ... Or a combination of these.... > > These are handled as follows: > > Open bound ranges > ----------------- > Without a histogram of how the values in the index are > distributed, we can only assume an equal distrubution. > A statistically correct estimate for a condition of the form > ' LT/GT ' would then have been to assume it selects > 50% of the rows in the table. However, I have experienced that > this will cause the range-cost to directly compete with the cost > of a full table scan. We should therefore be somewhat more > conservative and estimate 10% of the rows to be returned. > > Closed bound range > ------------------ > We assume this to be somewhat better than an open bounded range > returning 5% of the rows in the table. > > EQ-range > -------- > An EQ-range will excatly specify a fraction of the first part of > an index. It is reasonable to assume: > > - Specifing a larger fraction of the index will improve the > selectivity of the EQ-range. > - Each part of the specified EQ-range will have the same > selectivity. > > We can model this as a Binomial Distribution of the indexed > values. http://en.wikipedia.org/wiki/Binomial_distrib > > modified: > mysql-test/suite/ndb/r/ndb_condition_pushdown.result > mysql-test/suite/ndb/r/ndb_index.result > mysql-test/suite/ndb/r/ndb_index_unique.result > mysql-test/suite/ndb/r/ndb_read_multi_range.result > mysql-test/suite/ndb/r/ndb_statistics.result > mysql-test/suite/ndb/t/ndb_statistics.test > sql/ha_ndbcluster.cc > === modified file 'mysql-test/suite/ndb/r/ndb_condition_pushdown.result' > --- a/mysql-test/suite/ndb/r/ndb_condition_pushdown.result 2011-01-17 13:29:52 +0000 > +++ b/mysql-test/suite/ndb/r/ndb_condition_pushdown.result 2011-02-23 15:54:12 +0000 > @@ -1910,7 +1910,7 @@ insert into NodeAlias VALUES(null, 8 , ' > 12:22:26'); > explain select * from NodeAlias where (aliasKey LIKE '491803%'); > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE NodeAlias range NodeAlias_KeyIndex NodeAlias_KeyIndex 48 NULL 10 Using where with pushed condition > +1 SIMPLE NodeAlias range NodeAlias_KeyIndex NodeAlias_KeyIndex 48 NULL 2 Using where with pushed condition > select * from NodeAlias where (aliasKey LIKE '491803%') order by id; > id nodeId displayName aliasKey objectVersion changed > 7 8 491803% 491803% 0 2008-03-10 12:22:26 > @@ -2225,7 +2225,7 @@ join tx as t2 on tx.a = t2.c and tx.b = > where t2.a = 4 > group by t2.c; > id select_type table type possible_keys key key_len ref rows filtered Extra > -1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 10 100.00 Using where; Using filesort > +1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 2 100.00 Using where; Using filesort > 1 SIMPLE tx eq_ref PRIMARY PRIMARY 8 test.t2.c,test.t2.d 1 100.00 > Warnings: > Note 1003 select `test`.`t2`.`c` AS `c`,count(distinct `test`.`t2`.`a`) AS `count(distinct t2.a)` from `test`.`tx` join `test`.`tx` `t2` where ((`test`.`tx`.`b` = `test`.`t2`.`d`) and (`test`.`tx`.`a` = `test`.`t2`.`c`) and (`test`.`t2`.`a` = 4)) group by `test`.`t2`.`c` > @@ -2242,7 +2242,7 @@ join tx as t2 on tx.a = t2.c and tx.b = > where t2.a = 4 > group by t2.c; > id select_type table type possible_keys key key_len ref rows filtered Extra > -1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 10 100.00 Using where; Using filesort > +1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 2 100.00 Using where; Using filesort > 1 SIMPLE tx eq_ref PRIMARY PRIMARY 8 test.t2.c,test.t2.d 1 100.00 > Warnings: > Note 1003 select `test`.`t2`.`c` AS `c`,count(distinct `test`.`t2`.`a`) AS `count(distinct t2.a)` from `test`.`tx` join `test`.`tx` `t2` where ((`test`.`tx`.`b` = `test`.`t2`.`d`) and (`test`.`tx`.`a` = `test`.`t2`.`c`) and (`test`.`t2`.`a` = 4)) group by `test`.`t2`.`c` > > === modified file 'mysql-test/suite/ndb/r/ndb_index.result' > --- a/mysql-test/suite/ndb/r/ndb_index.result 2010-12-22 11:13:45 +0000 > +++ b/mysql-test/suite/ndb/r/ndb_index.result 2011-02-23 15:54:12 +0000 > @@ -306,7 +306,7 @@ explain > select i,vc from t1 > where i>=1 or vc > '0'; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t1 index_merge PRIMARY,i1,i2 i1,i2 5,18 NULL 20 Using sort_union(i1,i2); Using where with pushed condition > +1 SIMPLE t1 index_merge PRIMARY,i1,i2 i1,i2 5,18 NULL 4 Using sort_union(i1,i2); Using where with pushed condition > select i,vc from t1 > where i>=1 or vc > '0'; > i vc > @@ -350,7 +350,7 @@ explain > select i,vc from t2 > where i>=1 or vc > '0'; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t2 index_merge i1,i2 i1,i2 5,19 NULL 20 Using sort_union(i1,i2); Using where with pushed condition > +1 SIMPLE t2 index_merge i1,i2 i1,i2 5,19 NULL 4 Using sort_union(i1,i2); Using where with pushed condition > select i,vc from t2 > where i>=1 or vc > '0'; > i vc > > === modified file 'mysql-test/suite/ndb/r/ndb_index_unique.result' > --- a/mysql-test/suite/ndb/r/ndb_index_unique.result 2011-01-18 07:49:14 +0000 > +++ b/mysql-test/suite/ndb/r/ndb_index_unique.result 2011-02-23 15:54:12 +0000 > @@ -185,7 +185,7 @@ set @old_ecpd = @@session.engine_conditi > set engine_condition_pushdown = true; > explain select * from t2 where (b = 3 OR b = 5) AND c IS NULL AND a < 9 order by a; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t2 range PRIMARY,b b 9 NULL 2 Using where with pushed condition; Using filesort > +1 SIMPLE t2 range PRIMARY,b PRIMARY 4 NULL 2 Using where with pushed condition > select * from t2 where (b = 3 OR b = 5) AND c IS NULL AND a < 9 order by a; > a b c > 3 3 NULL > > === modified file 'mysql-test/suite/ndb/r/ndb_read_multi_range.result' > --- a/mysql-test/suite/ndb/r/ndb_read_multi_range.result 2011-01-18 07:49:14 +0000 > +++ b/mysql-test/suite/ndb/r/ndb_read_multi_range.result 2011-02-23 15:54:12 +0000 > @@ -605,7 +605,7 @@ SELECT DISTINCT STRAIGHT_JOIN t1.pk FROM > t1 LEFT JOIN t2 ON t2.a = t1.a AND t2.pk != 6; > id select_type table type possible_keys key key_len ref rows Extra > 1 SIMPLE t1 ALL NULL NULL NULL NULL 3000 Using temporary > -1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 20 Using where; Distinct > +1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 4 Using where; Distinct > SELECT DISTINCT STRAIGHT_JOIN t1.pk FROM > t1 LEFT JOIN t2 ON t2.a = t1.a AND t2.pk != 6; > drop table t1, t2; > > === modified file 'mysql-test/suite/ndb/r/ndb_statistics.result' > --- a/mysql-test/suite/ndb/r/ndb_statistics.result 2011-01-18 11:49:03 +0000 > +++ b/mysql-test/suite/ndb/r/ndb_statistics.result 2011-02-23 15:54:12 +0000 > @@ -38,24 +38,124 @@ id select_type table type possible_keys > EXPLAIN > SELECT * FROM t10000 WHERE k >= 42 and k < 10000; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition > EXPLAIN > SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition > EXPLAIN > SELECT * FROM t10000 WHERE k < 42; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition > EXPLAIN > SELECT * FROM t10000 WHERE k > 42; > id select_type table type possible_keys key key_len ref rows Extra > -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition > EXPLAIN > SELECT * FROM t10000 AS X JOIN t10000 AS Y > ON Y.I=X.I AND Y.J = X.I; > id select_type table type possible_keys key key_len ref rows Extra > 1 SIMPLE X ALL I NULL NULL NULL 10000 > 1 SIMPLE Y ref J,I I 10 test.X.I,test.X.I 11 Using where > +EXPLAIN > +SELECT * FROM t100 WHERE k < 42; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t100 WHERE k > 42; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE k < 42; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE k > 42; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t100 WHERE k BETWEEN 42 AND 10000; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 5 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref I I 5 const 200 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref J J 5 const 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J = 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref J,I I 10 const,const 4 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref I I 5 const 200 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J > 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J < 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I I 10 NULL 50 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J BETWEEN 1 AND 10; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I I 10 NULL 50 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J = 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref J,I I 10 const,const 4 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref J J 5 const 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K > 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range PRIMARY,J J 9 NULL 50 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K < 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range PRIMARY,J J 9 NULL 50 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K BETWEEN 1 AND 10; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range PRIMARY,J J 9 NULL 25 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K = 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J > 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I > 0 AND J = 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref J,I J 5 const 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE I > 0 AND J > 1; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I J 5 NULL 1000 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J > 1 AND I = 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 1 AND I > 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 ref J,I J 5 const 100 Using where with pushed condition > +EXPLAIN > +SELECT * FROM t10000 WHERE J > 1 AND I > 0; > +id select_type table type possible_keys key key_len ref rows Extra > +1 SIMPLE t10000 range J,I J 5 NULL 1000 Using where with pushed condition > DROP TABLE t10,t100,t10000; > End of 5.1 tests > > === modified file 'mysql-test/suite/ndb/t/ndb_statistics.test' > --- a/mysql-test/suite/ndb/t/ndb_statistics.test 2011-01-18 11:49:03 +0000 > +++ b/mysql-test/suite/ndb/t/ndb_statistics.test 2011-02-23 15:54:12 +0000 > @@ -62,6 +62,81 @@ EXPLAIN > SELECT * FROM t10000 AS X JOIN t10000 AS Y > ON Y.I=X.I AND Y.J = X.I; > > +# > +# Improved heurists for ::records_in_range() statistics > +# > + > +# Open bounded range should return 10% of #rows in table > +EXPLAIN > +SELECT * FROM t100 WHERE k < 42; > +EXPLAIN > +SELECT * FROM t100 WHERE k > 42; > +EXPLAIN > +SELECT * FROM t10000 WHERE k < 42; > +EXPLAIN > +SELECT * FROM t10000 WHERE k > 42; > + > +#Closed bounded range should return 5% of #rows in table > +EXPLAIN > +SELECT * FROM t100 WHERE k BETWEEN 42 AND 10000; > +EXPLAIN > +SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000; > + > +#EQ-range selectivity depends on > +# - key length specified > +# - #rows in table. > +# - unique/non-unique index > +# - min 2% selectivity > +# > +# Possibly combined with open/closed ranges as > +# above which further improves selectivity > +# > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0; > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0; > + > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J = 0; > + > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0; > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J > 1; > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J < 1; > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J BETWEEN 1 AND 10; > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J = 1; > + > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0; > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K > 1; > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K < 1; > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K BETWEEN 1 AND 10; > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 0 AND K = 1; > + > +## Verify selection of 'best' index > +## (The one of index I/J being EQ) > +EXPLAIN > +SELECT * FROM t10000 WHERE I = 0 AND J <> 1; > +EXPLAIN > +SELECT * FROM t10000 WHERE I <> 0 AND J = 1; > +EXPLAIN > +SELECT * FROM t10000 WHERE I <> 0 AND J <> 1; > + > +EXPLAIN > +SELECT * FROM t10000 WHERE J <> 1 AND I = 0; > +EXPLAIN > +SELECT * FROM t10000 WHERE J = 1 AND I <> 0; > +EXPLAIN > +SELECT * FROM t10000 WHERE J <> 1 AND I <> 0; > + > > DROP TABLE t10,t100,t10000; > > > === modified file 'sql/ha_ndbcluster.cc' > --- a/sql/ha_ndbcluster.cc 2011-02-18 10:19:20 +0000 > +++ b/sql/ha_ndbcluster.cc 2011-02-23 15:54:12 +0000 > @@ -11182,7 +11182,87 @@ ha_ndbcluster::records_in_range(uint inx > DBUG_RETURN(rows); > } > > - DBUG_RETURN(10); /* Good guess when you don't know anything */ > + /* Use simple heuristics to estimate fraction > + of 'stats.record' returned from range. > + */ > + if (stats.records != 0 && stats.records != HA_POS_ERROR) > + { > + Uint64 rows; > + Uint64 table_rows= stats.records; > + size_t min_key_length= (min_key) ? min_key->length : 0; > + size_t max_key_length= (max_key) ? max_key->length : 0; > + > + // Might have an closed/open range bound: > + // Low range open > + if (!min_key_length) > + { > + rows= (!max_key_length) > + ? table_rows // No range was specified > + : table_rows/10; // -oo .. -> 10% selectivity > + } > + // High range open > + else if (!max_key_length) > + { > + rows= table_rows/10; // ..oo -> 10% selectivity > + } > + else > + { > + size_t bounds_len= min(min_key_length,max_key_length); > + uint eq_bound_len= 0; > + uint eq_bound_offs= 0; > + > + KEY_PART_INFO* key_part= key_info->key_part; > + KEY_PART_INFO* end= key_part+key_info->key_parts; > + for (; key_part != end; key_part++) > + { > + uint part_length= key_part->store_length; > + if (eq_bound_offs+part_length > bounds_len || > + memcmp(&min_key->key[eq_bound_offs], > + &max_key->key[eq_bound_offs], > + part_length)) > + { > + break; > + } > + eq_bound_len+= key_part->length; > + eq_bound_offs+= part_length; > + } > + > + if (!eq_bound_len) > + { > + rows= table_rows/20; // .. -> 5% selectivity > + } > + else > + { > + // Has an equality range on a leading part of 'key_length': > + // - Null indicator, and HA_KEY_BLOB_LENGTH bytes in > + // 'extra_length' are removed from key_fraction calculations. > + // - Assume reduced selectivity for non-unique indexes > + // by decreasing 'eq_fraction' by 20% > + // - Assume equal selectivity for all eq_parts in key. > + > + double eq_fraction = (double)(eq_bound_len) / > + (key_length - key_info->extra_length); > + if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity > + eq_fraction/= 1.20; > + if (eq_fraction >= 1.0) // Exact match -> 1 row > + DBUG_RETURN(1); > + > + rows = (Uint64)(table_rows / pow(table_rows, eq_fraction)); > + if (rows > (table_rows/50)) // EQ-range: Max 2% of rows > + rows= (table_rows/50); > + > + if (min_key_length > eq_bound_offs) > + rows/= 2; > + if (max_key_length > eq_bound_offs) > + rows/= 2; > + } > + } > + if (rows < 2) // At least 2 rows as not exact > + rows= 2; > + DBUG_RETURN(min(rows,table_rows)); > + } > + > + DBUG_RETURN(10); /* Poor guess when you don't know anything */ > } > > ulonglong ha_ndbcluster::table_flags(void) const > > > > >