From: Ole John Aske Date: February 28 2011 10:42am Subject: bzr push into mysql-5.1-telco-7.0 branch (ole.john.aske:4233 to 4234) Bug#11804277 List-Archive: http://lists.mysql.com/commits/132097 X-Bug: 11804277 Message-Id: <20110228104232.2F5B1223@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4234 Ole John Aske 2011-02-28 Fix for bug#11804277 - INCORRECT INDEX MAY BE SELECTED DUE TO INSUFFICIENT STATISTICS FROM CLUSTER Add heuristics to ha_ndbcluster::records_in_range() which identifies a range as: - An open bound range ( LT/GT BETWEEN AND ) - A (partial) EQ-range ( EQ ) ... Or a combination of these.... These are handled as follows: Open bound ranges ----------------- Without a histogram of how the values in the index are distributed, we can only assume an equal distrubution. A statistically correct estimate for a condition of the form ' LT/GT ' would then have been to assume it selects 50% of the rows in the table. However, I have experienced that this will cause the range-cost to directly compete with the cost of a full table scan. We should therefore be somewhat more conservative and estimate 10% of the rows to be returned. Closed bound range ------------------ We assume this to be somewhat better than an open bounded range returning 5% of the rows in the table. EQ-range -------- An EQ-range will excatly specify a fraction of the first part of an index. It is reasonable to assume: - Specifing a larger fraction of the index will improve the selectivity of the EQ-range. - Each part of the specified EQ-range will have the same selectivity. We can model this as a Binomial Distribution of the indexed values. http://en.wikipedia.org/wiki/Binomial_distrib modified: mysql-test/suite/ndb/r/ndb_condition_pushdown.result mysql-test/suite/ndb/r/ndb_index.result mysql-test/suite/ndb/r/ndb_index_unique.result mysql-test/suite/ndb/r/ndb_read_multi_range.result mysql-test/suite/ndb/r/ndb_statistics.result mysql-test/suite/ndb/t/ndb_statistics.test sql/ha_ndbcluster.cc 4233 Maitrayi Sabaratnam 2011-02-24 [merge] Merge modified: storage/ndb/src/kernel/blocks/lgman.cpp === modified file 'mysql-test/suite/ndb/r/ndb_condition_pushdown.result' --- a/mysql-test/suite/ndb/r/ndb_condition_pushdown.result 2011-01-17 13:29:52 +0000 +++ b/mysql-test/suite/ndb/r/ndb_condition_pushdown.result 2011-02-28 10:42:04 +0000 @@ -1910,7 +1910,7 @@ insert into NodeAlias VALUES(null, 8 , ' 12:22:26'); explain select * from NodeAlias where (aliasKey LIKE '491803%'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE NodeAlias range NodeAlias_KeyIndex NodeAlias_KeyIndex 48 NULL 10 Using where with pushed condition +1 SIMPLE NodeAlias range NodeAlias_KeyIndex NodeAlias_KeyIndex 48 NULL 2 Using where with pushed condition select * from NodeAlias where (aliasKey LIKE '491803%') order by id; id nodeId displayName aliasKey objectVersion changed 7 8 491803% 491803% 0 2008-03-10 12:22:26 @@ -2225,7 +2225,7 @@ join tx as t2 on tx.a = t2.c and tx.b = where t2.a = 4 group by t2.c; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 10 100.00 Using where; Using filesort +1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 2 100.00 Using where; Using filesort 1 SIMPLE tx eq_ref PRIMARY PRIMARY 8 test.t2.c,test.t2.d 1 100.00 Warnings: Note 1003 select `test`.`t2`.`c` AS `c`,count(distinct `test`.`t2`.`a`) AS `count(distinct t2.a)` from `test`.`tx` join `test`.`tx` `t2` where ((`test`.`tx`.`b` = `test`.`t2`.`d`) and (`test`.`tx`.`a` = `test`.`t2`.`c`) and (`test`.`t2`.`a` = 4)) group by `test`.`t2`.`c` @@ -2242,7 +2242,7 @@ join tx as t2 on tx.a = t2.c and tx.b = where t2.a = 4 group by t2.c; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 10 100.00 Using where; Using filesort +1 SIMPLE t2 ref PRIMARY PRIMARY 4 const 2 100.00 Using where; Using filesort 1 SIMPLE tx eq_ref PRIMARY PRIMARY 8 test.t2.c,test.t2.d 1 100.00 Warnings: Note 1003 select `test`.`t2`.`c` AS `c`,count(distinct `test`.`t2`.`a`) AS `count(distinct t2.a)` from `test`.`tx` join `test`.`tx` `t2` where ((`test`.`tx`.`b` = `test`.`t2`.`d`) and (`test`.`tx`.`a` = `test`.`t2`.`c`) and (`test`.`t2`.`a` = 4)) group by `test`.`t2`.`c` === modified file 'mysql-test/suite/ndb/r/ndb_index.result' --- a/mysql-test/suite/ndb/r/ndb_index.result 2010-12-22 11:13:45 +0000 +++ b/mysql-test/suite/ndb/r/ndb_index.result 2011-02-28 10:42:04 +0000 @@ -306,7 +306,7 @@ explain select i,vc from t1 where i>=1 or vc > '0'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index_merge PRIMARY,i1,i2 i1,i2 5,18 NULL 20 Using sort_union(i1,i2); Using where with pushed condition +1 SIMPLE t1 index_merge PRIMARY,i1,i2 i1,i2 5,18 NULL 6 Using sort_union(i1,i2); Using where with pushed condition select i,vc from t1 where i>=1 or vc > '0'; i vc @@ -350,7 +350,7 @@ explain select i,vc from t2 where i>=1 or vc > '0'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index_merge i1,i2 i1,i2 5,19 NULL 20 Using sort_union(i1,i2); Using where with pushed condition +1 SIMPLE t2 index_merge i1,i2 i1,i2 5,19 NULL 6 Using sort_union(i1,i2); Using where with pushed condition select i,vc from t2 where i>=1 or vc > '0'; i vc === modified file 'mysql-test/suite/ndb/r/ndb_index_unique.result' --- a/mysql-test/suite/ndb/r/ndb_index_unique.result 2011-01-18 07:49:14 +0000 +++ b/mysql-test/suite/ndb/r/ndb_index_unique.result 2011-02-28 10:42:04 +0000 @@ -185,7 +185,7 @@ set @old_ecpd = @@session.engine_conditi set engine_condition_pushdown = true; explain select * from t2 where (b = 3 OR b = 5) AND c IS NULL AND a < 9 order by a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range PRIMARY,b b 9 NULL 2 Using where with pushed condition; Using filesort +1 SIMPLE t2 range PRIMARY,b PRIMARY 4 NULL 2 Using where with pushed condition select * from t2 where (b = 3 OR b = 5) AND c IS NULL AND a < 9 order by a; a b c 3 3 NULL === modified file 'mysql-test/suite/ndb/r/ndb_read_multi_range.result' --- a/mysql-test/suite/ndb/r/ndb_read_multi_range.result 2011-01-18 07:49:14 +0000 +++ b/mysql-test/suite/ndb/r/ndb_read_multi_range.result 2011-02-28 10:42:04 +0000 @@ -605,7 +605,7 @@ SELECT DISTINCT STRAIGHT_JOIN t1.pk FROM t1 LEFT JOIN t2 ON t2.a = t1.a AND t2.pk != 6; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 3000 Using temporary -1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 20 Using where; Distinct +1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 6 Using where; Distinct SELECT DISTINCT STRAIGHT_JOIN t1.pk FROM t1 LEFT JOIN t2 ON t2.a = t1.a AND t2.pk != 6; drop table t1, t2; === modified file 'mysql-test/suite/ndb/r/ndb_statistics.result' --- a/mysql-test/suite/ndb/r/ndb_statistics.result 2011-01-18 11:49:03 +0000 +++ b/mysql-test/suite/ndb/r/ndb_statistics.result 2011-02-28 10:42:04 +0000 @@ -38,24 +38,124 @@ id select_type table type possible_keys EXPLAIN SELECT * FROM t10000 WHERE k >= 42 and k < 10000; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition EXPLAIN SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition EXPLAIN SELECT * FROM t10000 WHERE k < 42; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition EXPLAIN SELECT * FROM t10000 WHERE k > 42; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition EXPLAIN SELECT * FROM t10000 AS X JOIN t10000 AS Y ON Y.I=X.I AND Y.J = X.I; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE X ALL I NULL NULL NULL 10000 1 SIMPLE Y ref J,I I 10 test.X.I,test.X.I 11 Using where +EXPLAIN +SELECT * FROM t100 WHERE k < 42; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition +EXPLAIN +SELECT * FROM t100 WHERE k > 42; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 10 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE k < 42; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE k > 42; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 1000 Using where with pushed condition +EXPLAIN +SELECT * FROM t100 WHERE k BETWEEN 42 AND 10000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t100 range PRIMARY PRIMARY 4 NULL 5 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range PRIMARY PRIMARY 4 NULL 500 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref I I 5 const 200 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref J J 5 const 100 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref J,I I 10 const,const 4 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref I I 5 const 200 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J > 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I I 10 NULL 100 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J < 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I I 10 NULL 50 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J BETWEEN 1 AND 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I I 10 NULL 50 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref J,I I 10 const,const 4 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref J J 5 const 100 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K > 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range PRIMARY,J J 9 NULL 50 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K < 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range PRIMARY,J J 9 NULL 50 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K BETWEEN 1 AND 10; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range PRIMARY,J J 9 NULL 25 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J <> 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I I 10 NULL 150 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I <> 0 AND J = 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref J,I J 5 const 100 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE I <> 0 AND J <> 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I J 5 NULL 1500 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J <> 1 AND I = 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I I 10 NULL 150 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J = 1 AND I <> 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 ref J,I J 5 const 100 Using where with pushed condition +EXPLAIN +SELECT * FROM t10000 WHERE J <> 1 AND I <> 0; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10000 range J,I J 5 NULL 1500 Using where with pushed condition DROP TABLE t10,t100,t10000; End of 5.1 tests === modified file 'mysql-test/suite/ndb/t/ndb_statistics.test' --- a/mysql-test/suite/ndb/t/ndb_statistics.test 2011-01-18 11:49:03 +0000 +++ b/mysql-test/suite/ndb/t/ndb_statistics.test 2011-02-28 10:42:04 +0000 @@ -62,6 +62,82 @@ EXPLAIN SELECT * FROM t10000 AS X JOIN t10000 AS Y ON Y.I=X.I AND Y.J = X.I; +# +# Bug #11804277: INCORRECT INDEX MAY BE SELECTED DUE TO INSUFFICIENT +# STATISTICS FROM CLUSTER +# + +# Open bounded range should return 10% of #rows in table +EXPLAIN +SELECT * FROM t100 WHERE k < 42; +EXPLAIN +SELECT * FROM t100 WHERE k > 42; +EXPLAIN +SELECT * FROM t10000 WHERE k < 42; +EXPLAIN +SELECT * FROM t10000 WHERE k > 42; + +#Closed bounded range should return 5% of #rows in table +EXPLAIN +SELECT * FROM t100 WHERE k BETWEEN 42 AND 10000; +EXPLAIN +SELECT * FROM t10000 WHERE k BETWEEN 42 AND 10000; + +#EQ-range selectivity depends on +# - key length specified +# - #rows in table. +# - unique/non-unique index +# - min 2% selectivity +# +# Possibly combined with open/closed ranges as +# above which further improves selectivity +# +EXPLAIN +SELECT * FROM t10000 WHERE I = 0; +EXPLAIN +SELECT * FROM t10000 WHERE J = 0; + +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J = 0; + +EXPLAIN +SELECT * FROM t10000 WHERE I = 0; +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J > 1; +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J < 1; +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J BETWEEN 1 AND 10; +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J = 1; + +EXPLAIN +SELECT * FROM t10000 WHERE J = 0; +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K > 1; +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K < 1; +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K BETWEEN 1 AND 10; +EXPLAIN +SELECT * FROM t10000 WHERE J = 0 AND K = 1; + +## Verify selection of 'best' index +## (The one of index I/J being EQ) +EXPLAIN +SELECT * FROM t10000 WHERE I = 0 AND J <> 1; +EXPLAIN +SELECT * FROM t10000 WHERE I <> 0 AND J = 1; +EXPLAIN +SELECT * FROM t10000 WHERE I <> 0 AND J <> 1; + +EXPLAIN +SELECT * FROM t10000 WHERE J <> 1 AND I = 0; +EXPLAIN +SELECT * FROM t10000 WHERE J = 1 AND I <> 0; +EXPLAIN +SELECT * FROM t10000 WHERE J <> 1 AND I <> 0; + DROP TABLE t10,t100,t10000; === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2011-02-24 09:46:11 +0000 +++ b/sql/ha_ndbcluster.cc 2011-02-28 10:42:04 +0000 @@ -11182,7 +11182,100 @@ ha_ndbcluster::records_in_range(uint inx DBUG_RETURN(rows); } - DBUG_RETURN(10); /* Good guess when you don't know anything */ + /* Use simple heuristics to estimate fraction + of 'stats.record' returned from range. + */ + do + { + if (stats.records == ~(ha_rows)0 || stats.records == 0) + { + /* Refresh statistics, only read from datanodes if 'use_exact_count' */ + THD *thd= current_thd; + if (update_stats(thd, THDVAR(thd, use_exact_count))) + break; + } + + Uint64 rows; + Uint64 table_rows= stats.records; + size_t eq_bound_len= 0; + size_t min_key_length= (min_key) ? min_key->length : 0; + size_t max_key_length= (max_key) ? max_key->length : 0; + + // Might have an closed/open range bound: + // Low range open + if (!min_key_length) + { + rows= (!max_key_length) + ? table_rows // No range was specified + : table_rows/10; // -oo .. -> 10% selectivity + } + // High range open + else if (!max_key_length) + { + rows= table_rows/10; // ..oo -> 10% selectivity + } + else + { + size_t bounds_len= min(min_key_length,max_key_length); + uint eq_bound_len= 0; + uint eq_bound_offs= 0; + + KEY_PART_INFO* key_part= key_info->key_part; + KEY_PART_INFO* end= key_part+key_info->key_parts; + for (; key_part != end; key_part++) + { + uint part_length= key_part->store_length; + if (eq_bound_offs+part_length > bounds_len || + memcmp(&min_key->key[eq_bound_offs], + &max_key->key[eq_bound_offs], + part_length)) + { + break; + } + eq_bound_len+= key_part->length; + eq_bound_offs+= part_length; + } + + if (!eq_bound_len) + { + rows= table_rows/20; // .. -> 5% + } + else + { + // Has an equality range on a leading part of 'key_length': + // - Null indicator, and HA_KEY_BLOB_LENGTH bytes in + // 'extra_length' are removed from key_fraction calculations. + // - Assume reduced selectivity for non-unique indexes + // by decreasing 'eq_fraction' by 20% + // - Assume equal selectivity for all eq_parts in key. + + double eq_fraction = (double)(eq_bound_len) / + (key_length - key_info->extra_length); + if (idx_type == ORDERED_INDEX) // Non-unique index -> less selectivity + eq_fraction/= 1.20; + if (eq_fraction >= 1.0) // Exact match -> 1 row + DBUG_RETURN(1); + + rows = (Uint64)(table_rows / pow(table_rows, eq_fraction)); + if (rows > (table_rows/50)) // EQ-range: Max 2% of rows + rows= (table_rows/50); + + if (min_key_length > eq_bound_offs) + rows/= 2; + if (max_key_length > eq_bound_offs) + rows/= 2; + } + } + + // Make sure that EQ is preferred even if row-count is low + if (eq_bound_len && rows < 2) // At least 2 rows as not exact + rows= 2; + else if (rows < 3) + rows= 3; + DBUG_RETURN(min(rows,table_rows)); + } while (0); + + DBUG_RETURN(10); /* Poor guess when you don't know anything */ } ulonglong ha_ndbcluster::table_flags(void) const No bundle (reason: useless for push emails).