From: Jorgen Loland Date: February 2 2012 4:56pm Subject: bzr push into mysql-trunk branch (jorgen.loland:3831 to 3832) WL#5957 List-Archive: http://lists.mysql.com/commits/142731 Message-Id: <20120202165656.A70A9285@atum21.no.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3832 Jorgen Loland 2012-02-02 WL#5957 followup: use index statistics in ror_scan_selectivity() as well if the optimizer requests it. @ mysql-test/r/eq_range_idx_stat.result Add test file for WL#5957 @ mysql-test/r/mysqld--help-notwin.result Better description of eq-range-index-dive-limit @ mysql-test/r/mysqld--help-win.result Better description of eq-range-index-dive-limit @ mysql-test/suite/opt_trace/r/eq_range_statistics.result Added more checks that records_in_range is not called @ mysql-test/suite/opt_trace/t/eq_range_statistics.test Added more checks that records_in_range is not called @ mysql-test/t/eq_range_idx_stat.test Add test file for WL#5957 @ sql/handler.cc Added DBUG_EXECUTE_IF that crashes server if records_in_range is called @ sql/opt_range.cc Use index statistics in ror_scan_selectivity() if the optimizer requests it. Added DBUG_EXECUTE_IF that crashes server if records_in_range is called @ sql/sys_vars.cc Better description of eq-range-index-dive-limit added: mysql-test/r/eq_range_idx_stat.result mysql-test/t/eq_range_idx_stat.test modified: mysql-test/r/mysqld--help-notwin.result mysql-test/r/mysqld--help-win.result mysql-test/suite/opt_trace/r/eq_range_statistics.result mysql-test/suite/opt_trace/t/eq_range_statistics.test sql/handler.cc sql/opt_range.cc sql/sys_vars.cc 3831 Ashish Agarwal 2012-02-02 [merge] BUG#11754145: merge from mysql-5.5 to mysql-trunk modified: include/myisam.h storage/myisam/ha_myisam.cc storage/myisam/ha_myisam.h storage/myisam/mi_check.c storage/myisam/myisamchk.c storage/myisam/myisamdef.h storage/myisam/sort.c === added file 'mysql-test/r/eq_range_idx_stat.result' --- a/mysql-test/r/eq_range_idx_stat.result 1970-01-01 00:00:00 +0000 +++ b/mysql-test/r/eq_range_idx_stat.result 2012-02-02 16:56:36 +0000 @@ -0,0 +1,125 @@ +SET eq_range_index_dive_limit=default; +SELECT @@eq_range_index_dive_limit; +@@eq_range_index_dive_limit +10 +CREATE TABLE t1 +( +/* Field names reflect value(rowid) distribution, st=STairs, swt= SaWTooth */ +st_a int, +swt1a int, +swt2a int, +st_b int, +swt1b int, +swt2b int, +key sta_swt12a(st_a,swt1a,swt2a), +key sta_swt1a(st_a,swt1a), +key sta_swt2a(st_a,swt2a), +key sta_swt21a(st_a,swt2a,swt1a), +key st_a(st_a), +key stb_swt1a_2b(st_b,swt1b,swt2a), +key stb_swt1b(st_b,swt1b), +key st_b(st_b) +) ; +ALTER TABLE t1 DISABLE KEYS; +# +# Printing of many insert into t1 disabled. +# +ALTER TABLE t1 ENABLE KEYS; +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Table is already up to date +# +# Run index_merge queries two times: 1) with index dives +# 2) with index statistics +# + +explain +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1 limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a,stb_swt1a_2b 15,15 NULL 3 Using intersect(sta_swt12a,stb_swt1a_2b); Using where +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1 limit 5; +st_a swt1a swt2a st_b swt1b swt2b +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 + +explain +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a,stb_swt1a_2b 15,15 NULL 3 Using intersect(sta_swt12a,stb_swt1a_2b); Using where +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 limit 5; +st_a swt1a swt2a st_b swt1b swt2b +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 + +explain +select * from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1 limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a 10 const,const 89 Using where +select * from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1 limit 5; +st_a swt1a swt2a st_b swt1b swt2b +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 + +SET eq_range_index_dive_limit=1; +SET SESSION DEBUG="+d,crash_records_in_range"; + +explain +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1 limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a,stb_swt1a_2b 15,15 NULL 3 Using intersect(sta_swt12a,stb_swt1a_2b); Using where +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1 limit 5; +st_a swt1a swt2a st_b swt1b swt2b +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 + +explain +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index_merge sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a,stb_swt1a_2b 15,15 NULL 3 Using intersect(sta_swt12a,stb_swt1a_2b); Using where +select * from t1 +where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 limit 5; +st_a swt1a swt2a st_b swt1b swt2b +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 + +explain +select * from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1 limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref sta_swt12a,sta_swt1a,sta_swt2a,sta_swt21a,st_a,stb_swt1a_2b,stb_swt1b,st_b sta_swt12a 10 const,const 90 Using where +select * from t1 +where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1 limit 5; +st_a swt1a swt2a st_b swt1b swt2b +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 +1 1 1 1 1 1 + +SET eq_range_index_dive_limit=1; +SET SESSION DEBUG="+d,crash_records_in_range"; +DROP TABLE t1; === modified file 'mysql-test/r/mysqld--help-notwin.result' --- a/mysql-test/r/mysqld--help-notwin.result 2012-01-31 10:40:02 +0000 +++ b/mysql-test/r/mysqld--help-notwin.result 2012-02-02 16:56:36 +0000 @@ -154,7 +154,8 @@ The following options may be given as th --eq-range-index-dive-limit=# The optimizer will use existing index statistics instead of doing index dives for equality ranges if the number of - equality ranges is larger than or equal to this number. + equality ranges for the index is larger than or equal to + this number. --event-scheduler[=name] Enable the event scheduler. Possible values are ON, OFF, and DISABLED (keep the event scheduler completely === modified file 'mysql-test/r/mysqld--help-win.result' --- a/mysql-test/r/mysqld--help-win.result 2012-01-31 10:40:02 +0000 +++ b/mysql-test/r/mysqld--help-win.result 2012-02-02 16:56:36 +0000 @@ -154,7 +154,8 @@ The following options may be given as th --eq-range-index-dive-limit=# The optimizer will use existing index statistics instead of doing index dives for equality ranges if the number of - equality ranges is larger than or equal to this number. + equality ranges for the index is larger than or equal to + this number. --event-scheduler[=name] Enable the event scheduler. Possible values are ON, OFF, and DISABLED (keep the event scheduler completely === modified file 'mysql-test/suite/opt_trace/r/eq_range_statistics.result' --- a/mysql-test/suite/opt_trace/r/eq_range_statistics.result 2012-01-26 13:09:59 +0000 +++ b/mysql-test/suite/opt_trace/r/eq_range_statistics.result 2012-02-02 16:56:36 +0000 @@ -36,10 +36,17 @@ SET eq_range_index_dive_limit=3; SELECT @@eq_range_index_dive_limit; @@eq_range_index_dive_limit 3 +SET SESSION DEBUG="+d,crash_records_in_range"; # Index statistics kicks in, giving incorrect estimate of 3x2=6 records EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 6 Using where; Using index +SELECT * FROM t1 WHERE a IN (1,2,3); +a b +1 1 +2 2 +3 3 +SET SESSION DEBUG="-d,crash_records_in_range"; ##### # Below: A number of tests to verify that the number of equality ranges # are counted correctly @@ -422,6 +429,7 @@ EXPLAIN SELECT * FROM t1 WHERE a=5 OR a> } 0 0 # 3 equality ranges: should use index statistics +SET SESSION DEBUG="+d,crash_records_in_range"; EXPLAIN SELECT * FROM t1 WHERE a=5 AND (b=2 OR b=3 OR b=4); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 10 NULL 3 Using where; Using index @@ -631,6 +639,7 @@ EXPLAIN SELECT * FROM t1 WHERE a=5 AND ( } ] /* steps */ } 0 0 +SET SESSION DEBUG="-d,crash_records_in_range"; # 2 equality ranges: should not use index statistics EXPLAIN SELECT * FROM t1 WHERE a=5 AND (b=2 OR b=3 OR b>4); === modified file 'mysql-test/suite/opt_trace/t/eq_range_statistics.test' --- a/mysql-test/suite/opt_trace/t/eq_range_statistics.test 2012-01-26 13:09:59 +0000 +++ b/mysql-test/suite/opt_trace/t/eq_range_statistics.test 2012-02-02 16:56:36 +0000 @@ -38,8 +38,12 @@ EXPLAIN SELECT * FROM t1 WHERE a IN (1,2 SET eq_range_index_dive_limit=3; SELECT @@eq_range_index_dive_limit; +# Crash server if records_in_range() is called +SET SESSION DEBUG="+d,crash_records_in_range"; --echo # Index statistics kicks in, giving incorrect estimate of 3x2=6 records EXPLAIN SELECT * FROM t1 WHERE a IN (1,2,3); +SELECT * FROM t1 WHERE a IN (1,2,3); +SET SESSION DEBUG="-d,crash_records_in_range"; --echo ##### --echo # Below: A number of tests to verify that the number of equality ranges @@ -53,15 +57,21 @@ EXPLAIN SELECT * FROM t1 WHERE a IN (1,2 EXPLAIN SELECT * FROM t1 WHERE a=5 OR a>10 OR a IN (1); SELECT * FROM information_schema.OPTIMIZER_TRACE; +# This query will use index statistics (as shown in trace) but +# we cannot make it crash if records_in_range() is called +# because the "a>10" range will correctly call records_in_range() --echo --echo # 3 equality ranges: should use index statistics EXPLAIN SELECT * FROM t1 WHERE a=5 OR a>10 OR a IN (1,2); SELECT * FROM information_schema.OPTIMIZER_TRACE; +# Crash server if records_in_range() is called --echo --echo # 3 equality ranges: should use index statistics +SET SESSION DEBUG="+d,crash_records_in_range"; EXPLAIN SELECT * FROM t1 WHERE a=5 AND (b=2 OR b=3 OR b=4); SELECT * FROM information_schema.OPTIMIZER_TRACE; +SET SESSION DEBUG="-d,crash_records_in_range"; --echo --echo # 2 equality ranges: should not use index statistics === added file 'mysql-test/t/eq_range_idx_stat.test' --- a/mysql-test/t/eq_range_idx_stat.test 1970-01-01 00:00:00 +0000 +++ b/mysql-test/t/eq_range_idx_stat.test 2012-02-02 16:56:36 +0000 @@ -0,0 +1,101 @@ +# Tests for eq_range_index_dive_limit variable: +# test that index dives are not done when index +# statistics is requested + +--source include/have_debug.inc + +SET eq_range_index_dive_limit=default; +SELECT @@eq_range_index_dive_limit; + +CREATE TABLE t1 +( + /* Field names reflect value(rowid) distribution, st=STairs, swt= SaWTooth */ + st_a int, + swt1a int, + swt2a int, + + st_b int, + swt1b int, + swt2b int, + + key sta_swt12a(st_a,swt1a,swt2a), + key sta_swt1a(st_a,swt1a), + key sta_swt2a(st_a,swt2a), + key sta_swt21a(st_a,swt2a,swt1a), + + key st_a(st_a), + key stb_swt1a_2b(st_b,swt1b,swt2a), + key stb_swt1b(st_b,swt1b), + key st_b(st_b) +) ; + +ALTER TABLE t1 DISABLE KEYS; + +--disable_query_log +--echo # +--echo # Printing of many insert into t1 disabled. +--echo # +let $1=3; +while ($1) +{ + let $2=3; + while ($2) + { + let $3=3; + while ($3) + { + let $equal_records=30; + while ($equal_records) + { + eval insert into t1 select $1, $2, $3, $1 ,$2, $3; + dec $equal_records; + } + dec $3; + } + dec $2; + } + dec $1; +} +--enable_query_log +ALTER TABLE t1 ENABLE KEYS; +ANALYZE TABLE t1; + +--echo # +--echo # Run index_merge queries two times: 1) with index dives +--echo # 2) with index statistics +--echo # +let $iterations=2; +while ($iterations) +{ + dec $iterations; + + --echo + explain + select * from t1 + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1 limit 5; + + select * from t1 + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 and swt2b=1 limit 5; + + --echo + explain + select * from t1 + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 limit 5; + + select * from t1 + where st_a=1 and swt1a=1 and swt2a=1 and st_b=1 and swt1b=1 limit 5; + + --echo + explain + select * from t1 + where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1 limit 5; + + select * from t1 + where st_a=1 and swt1a=1 and st_b=1 and swt1b=1 and swt1b=1 limit 5; + + --echo + SET eq_range_index_dive_limit=1; + SET SESSION DEBUG="+d,crash_records_in_range"; +} + +DROP TABLE t1; === modified file 'sql/handler.cc' --- a/sql/handler.cc 2012-01-31 07:45:59 +0000 +++ b/sql/handler.cc 2012-02-02 16:56:36 +0000 @@ -4616,6 +4616,7 @@ handler::multi_range_read_info_const(uin rows= table->key_info[keyno].rec_per_key[keyparts_used-1]; else { + DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE();); if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, max_endp))) { === modified file 'sql/opt_range.cc' --- a/sql/opt_range.cc 2012-01-31 07:45:59 +0000 +++ b/sql/opt_range.cc 2012-02-02 16:56:36 +0000 @@ -4654,22 +4654,26 @@ void ror_intersect_cpy(ROR_INTERSECT_INF /* - Get selectivity of a ROR scan wrt ROR-intersection. + Get selectivity of adding a ROR scan to the ROR-intersection. SYNOPSIS ror_scan_selectivity() - info ROR-interection - scan ROR scan + info ROR-interection, an intersection of ROR index scans + scan ROR scan that may or may not improve the selectivity + of 'info' NOTES - Suppose we have a condition on several keys - cond=k_11=c_11 AND k_12=c_12 AND ... // parts of first key - k_21=c_21 AND k_22=c_22 AND ... // parts of second key + Suppose we have conditions on several keys + cond=k_11=c_11 AND k_12=c_12 AND ... // key_parts of first key in 'info' + k_21=c_21 AND k_22=c_22 AND ... // key_parts of second key in 'info' ... - k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //parts of the key used by *scan + k_n1=c_n1 AND k_n3=c_n3 AND ... (1) //key_parts of 'scan' where k_ij may be the same as any k_pq (i.e. keys may have common parts). + Note that for ROR retrieval, only equality conditions are usable so there + are no open ranges (e.g., k_ij > c_ij) in 'scan' or 'info' + A full row is retrieved if entire condition holds. The recursive procedure for finding P(cond) is as follows: @@ -4736,19 +4740,30 @@ void ror_intersect_cpy(ROR_INTERSECT_INF where i1,i2, .. are key parts that were already marked as fixed. - In order to minimize number of expensive records_in_range calls we group - and reduce adjacent fractions. + In order to minimize number of expensive records_in_range calls we + group and reduce adjacent fractions. Note that on the optimizer's + request, index statistics may be used instead of records_in_range + @see RANGE_OPT_PARAM::use_index_statistics. RETURN - Selectivity of given ROR scan. + Selectivity of given ROR scan, a number between 0 and 1. 1 means that + adding 'scan' to the intersection does not improve the selectivity. */ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, const ROR_SCAN_INFO *scan) { double selectivity_mult= 1.0; - KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part; - uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */ + const TABLE * const table= info->param->table; + const KEY_PART_INFO * const key_part= table->key_info[scan->keynr].key_part; + /** + key values tuple, used to store both min_range.key and + max_range.key. This function is only called for equality ranges; + open ranges (e.g. "min_value < X < max_value") cannot be used for + rowid ordered retrieval, so in this function we know that + min_range.key == max_range.key + */ + uchar key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; uchar *key_ptr= key_val; SEL_ARG *sel_arg, *tuple_arg= NULL; key_part_map keypart_map= 0; @@ -4761,7 +4776,7 @@ static double ror_scan_selectivity(const min_range.flag= HA_READ_KEY_EXACT; max_range.key= key_val; max_range.flag= HA_READ_AFTER_KEY; - ha_rows prev_records= info->param->table->file->stats.records; + ha_rows prev_records= table->file->stats.records; DBUG_ENTER("ror_scan_selectivity"); for (sel_arg= scan->sel_arg; sel_arg; @@ -4773,12 +4788,14 @@ static double ror_scan_selectivity(const if (cur_covered != prev_covered) { /* create (part1val, ..., part{n-1}val) tuple. */ + bool is_null_range= false; ha_rows records; if (!tuple_arg) { tuple_arg= scan->sel_arg; /* Here we use the length of the first key part */ - tuple_arg->store_min(key_part->store_length, &key_ptr, 0); + tuple_arg->store_min(key_part[0].store_length, &key_ptr, 0); + is_null_range|= tuple_arg->is_null_interval(); keypart_map= 1; } while (tuple_arg->next_key_part != sel_arg) @@ -4786,12 +4803,33 @@ static double ror_scan_selectivity(const tuple_arg= tuple_arg->next_key_part; tuple_arg->store_min(key_part[tuple_arg->part].store_length, &key_ptr, 0); + is_null_range|= tuple_arg->is_null_interval(); keypart_map= (keypart_map << 1) | 1; } min_range.length= max_range.length= (size_t) (key_ptr - key_val); min_range.keypart_map= max_range.keypart_map= keypart_map; - records= (info->param->table->file-> - records_in_range(scan->keynr, &min_range, &max_range)); + + /* + Get the number of rows in this range. This is done by calling + records_in_range() unless all these are true: + 1) The user has requested that index statistics should be used + for equality ranges to avoid the incurred overhead of + index dives in records_in_range() + 2) The range is not on the form "x IS NULL". The reason is + that the number of rows with this value are likely to be + very different than the values in the index statistics + 3) Index statistics is available. + @see key_val + */ + if (!info->param->use_index_statistics || // (1) + is_null_range || // (2) + !(records= table->key_info[scan->keynr]. + rec_per_key[tuple_arg->part])) // (3) + { + DBUG_EXECUTE_IF("crash_records_in_range", DBUG_SUICIDE();); + records= (table->file-> + records_in_range(scan->keynr, &min_range, &max_range)); + } if (cur_covered) { /* uncovered -> covered */ @@ -4810,11 +4848,12 @@ static double ror_scan_selectivity(const } if (!prev_covered) { - double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) / + double tmp= rows2double(table->quick_rows[scan->keynr]) / rows2double(prev_records); DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp)); selectivity_mult *= tmp; } + DBUG_ASSERT(selectivity_mult <= 1.0); DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult)); DBUG_RETURN(selectivity_mult); } === modified file 'sql/sys_vars.cc' --- a/sql/sys_vars.cc 2012-01-31 10:40:02 +0000 +++ b/sql/sys_vars.cc 2012-02-02 16:56:36 +0000 @@ -2114,7 +2114,7 @@ static Sys_var_uint Sys_eq_range_index_d "eq_range_index_dive_limit", "The optimizer will use existing index statistics instead of " "doing index dives for equality ranges if the number of equality " - "ranges is larger than or equal to this number.", + "ranges for the index is larger than or equal to this number.", SESSION_VAR(eq_range_index_dive_limit), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, UINT_MAX32), DEFAULT(10), BLOCK_SIZE(1)); No bundle (reason: useless for push emails).