From: Jorgen Loland Date: March 21 2011 3:18pm Subject: Re: bzr commit into mysql-trunk branch (ole.john.aske:3303) Bug#11764737 List-Archive: http://lists.mysql.com/commits/133422 Message-Id: <4D876C4F.8020601@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 8bit Ole John, Thank you for the patch. Patch approved. On 03/21/2011 02:06 PM, Ole John Aske wrote: > #At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on revid:georgi.kodinov@stripped > > 3303 Ole John Aske 2011-03-21 > Updated fix (W/ Jorgens review comments) for bug#11764737: Optimizer is overly eager to request ordered access > > The essential part of this fix is: > > 1) Ensure that JOIN_TAB::sorted is set, and QUICK_RANGE_SELECT::mrr_flags contains 'HA_MRR_SORTED' > only iff strict ordering is required from the handler - Either as a result of the query itself > specifying ORDER/GROUP BY, or usage of a QUICK access method which require the sources to > be ordered (Typical if its internals use ::ha_index_first, _last, _next, _prev) > > 2) Change calls to handler::ha_index_init() to take its 'sorted' argument either directly > from JOIN_TAB::sorted or from mrr_flags containing HA_MRR_SORTED. > > In order to implement this QUICK_SELECT_I::need_sorted_output() had to be extended to > take a 'bool sort' argument: Where 'sort==false' will enable us to turn off > requirement that a QUICK_SELECT_I access method should deliver rows in sorted > order. (Similar logic already exists for 'non-quick' access methods.) > NOTE: QUICK_SELECT_I::need_sorted_output(sort==false) is only regarded as a hint > and the different QUICK_SELECT_I methods may still request sorted order from the > handler if required by their internals. > > Furthermore the function 'disable_sorted_access(JOIN_TAB* join_tab)' has been > introduced which collect all logic for turning of sort requirement. > @ mysql-test/r/innodb_mysql_lock2.result > Accept changed result order for this (unordered) result set. > @ mysql-test/r/partition.result > Accept changed result order for this (unordered) result set. > @ mysql-test/r/partition_explicit_prune.result > Accept changed result order for (unordered) result sets. > @ sql/opt_range.cc > Correcly set, and use, HA_MRR_SORTED in order to only request sorted access > iff it is required either due to optimizer using ::need_sorted_output(), > or the internals of the quick access needing ordered access itself. > > Also added mrr_flags and its mrr relatives to C'tor initializer list. > Fixed an issue with missing 'delete quick' + return in case > get_quick_select() failed. > > modified: > mysql-test/r/innodb_mysql_lock2.result > mysql-test/r/partition.result > mysql-test/r/partition_explicit_prune.result > sql/opt_range.cc > sql/opt_range.h > sql/sql_select.cc > === modified file 'mysql-test/r/innodb_mysql_lock2.result' > --- a/mysql-test/r/innodb_mysql_lock2.result 2010-07-19 16:09:51 +0000 > +++ b/mysql-test/r/innodb_mysql_lock2.result 2011-03-21 13:06:21 +0000 > @@ -605,11 +605,11 @@ begin; > # Acquire SR metadata lock on t1. > select * from t1; > i > +4 > 1 > +5 > 2 > 3 > -4 > -5 > # Switching to connection 'con1'. > # Sending: > alter table t1 rebuild partition p0; > > === modified file 'mysql-test/r/partition.result' > --- a/mysql-test/r/partition.result 2011-03-11 09:58:45 +0000 > +++ b/mysql-test/r/partition.result 2011-03-21 13:06:21 +0000 > @@ -1776,9 +1776,9 @@ insert into t1 values (18446744073709551 > (18446744073709551613), (18446744073709551612); > select * from t1; > a > +18446744073709551614 > 18446744073709551612 > 18446744073709551613 > -18446744073709551614 > 18446744073709551615 > select * from t1 where a = 18446744073709551615; > a > @@ -1786,9 +1786,9 @@ a > delete from t1 where a = 18446744073709551615; > select * from t1; > a > +18446744073709551614 > 18446744073709551612 > 18446744073709551613 > -18446744073709551614 > drop table t1; > CREATE TABLE t1 ( > num int(11) NOT NULL, cs int(11) NOT NULL) > > === modified file 'mysql-test/r/partition_explicit_prune.result' > --- a/mysql-test/r/partition_explicit_prune.result 2011-02-01 11:38:39 +0000 > +++ b/mysql-test/r/partition_explicit_prune.result 2011-03-21 13:06:21 +0000 > @@ -332,12 +332,12 @@ ERROR 42000: You have an error in your S > SELECT * FROM t1 `PARTITION`; > a b > -2 (pNeg-)subp0 > -5 p0-9:subp3 > -10 p10-99 > -3 pNeg(-subp1) > -1 pNeg(-subp1) > +5 p0-9:subp3 > 1 subp3 > 3 subp3 > +10 p10-99 > 100 `p100-99999`(-subp6) > 1000 `p100-99999`(-subp6) > 101 `p100-99999`(-subp7) > @@ -346,12 +346,12 @@ ERROR 42000: You have an error in your S > SELECT * FROM t1 AS `PARTITION`; > a b > -2 (pNeg-)subp0 > -5 p0-9:subp3 > -10 p10-99 > -3 pNeg(-subp1) > -1 pNeg(-subp1) > +5 p0-9:subp3 > 1 subp3 > 3 subp3 > +10 p10-99 > 100 `p100-99999`(-subp6) > 1000 `p100-99999`(-subp6) > 101 `p100-99999`(-subp7) > @@ -536,10 +536,10 @@ HANDLER_WRITE 127 > SELECT * FROM t1 PARTITION (pNeg, `p10-99`); > a b > -2 (pNeg-)subp0 > -10 p10-99 > -3 pNeg(-subp1) > -1 pNeg(-subp1) > -21 REPLACEd by REPLACE > +10 p10-99 > FLUSH STATUS; > SELECT * FROM t1 PARTITION (pNeg, `p10-99`) INTO OUTFILE 'loadtest.txt'; > SELECT * FROM INFORMATION_SCHEMA.SESSION_STATUS > @@ -576,7 +576,7 @@ WHERE VARIABLE_NAME LIKE 'HANDLER_%' AND > VARIABLE_NAME VARIABLE_VALUE > HANDLER_EXTERNAL_LOCK 6 > HANDLER_ROLLBACK 1 > -HANDLER_WRITE 18 > +HANDLER_WRITE 21 > # 6 locks (1 ha_partiiton + 2 ha_innobase) x 2 (lock+unlock) > # 1 rollback > SELECT * FROM t1 PARTITION (pNeg, `p10-99`); > @@ -950,12 +950,12 @@ SUBPARTITION BY HASH (a) > SELECT * FROM t1; > a b > -2 (pNeg-)subp0, Updated > -10 p10-99 > +-222 `p100-99999`(-subp6), Updated from a = 100 > -21 REPLACEd by REPLACE > 1 subp3 > 3 subp3 > +10 p10-99 > 1000 `p100-99999`(-subp6) > --222 `p100-99999`(-subp6), Updated from a = 100 > 110 `p100-99999`(-subp7), Updated to 103, Updated to 110 > SHOW CREATE TABLE t2; > Table Create Table > @@ -981,9 +981,9 @@ SUBPARTITION BY HASH (a) > SUBPARTITION subp7 ENGINE = InnoDB)) */ > SELECT * FROM t2; > a b > -10 p10-99 > 1 subp3 > 3 subp3 > +10 p10-99 > 1000 `p100-99999`(-subp6) > 110 `p100-99999`(-subp7), Updated to 103, Updated to 110 > SHOW CREATE TABLE t3; > @@ -995,11 +995,11 @@ t3 CREATE TABLE `t3` ( > SELECT * FROM t3; > a b > -2 (pNeg-)subp0, Updated > +-222 `p100-99999`(-subp6), Updated from a = 100 > -21 REPLACEd by REPLACE > 1 subp3 > 3 subp3 > 1000 `p100-99999`(-subp6) > --222 `p100-99999`(-subp6), Updated from a = 100 > 110 `p100-99999`(-subp7), Updated to 103, Updated to 110 > FLUSH STATUS; > DELETE t1 PARTITION (pNeg), t3 FROM t1, t3 > > === modified file 'sql/opt_range.cc' > --- a/sql/opt_range.cc 2011-03-17 11:48:04 +0000 > +++ b/sql/opt_range.cc 2011-03-21 13:06:21 +0000 > @@ -1179,7 +1179,9 @@ QUICK_SELECT_I::QUICK_SELECT_I() > QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, > bool no_alloc, MEM_ROOT *parent_alloc, > bool *create_error) > - :free_file(0),cur_range(NULL),last_range(0),dont_free(0) > + :free_file(0),cur_range(NULL),last_range(0), > + mrr_flags(0),mrr_buf_size(0),mrr_buf_desc(NULL), > + dont_free(0) > { > my_bitmap_map *bitmap; > DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT"); > @@ -1192,7 +1194,6 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T > > /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */ > mrr_buf_size= thd->variables.read_rnd_buff_size; > - mrr_buf_desc= NULL; > > if (!no_alloc&& !parent_alloc) > { > @@ -1220,17 +1221,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T > } > > > -void QUICK_RANGE_SELECT::need_sorted_output() > +void QUICK_RANGE_SELECT::need_sorted_output(bool sort) > { > - if (!(mrr_flags& HA_MRR_SORTED)) > - { > - /* > - Native implementation can't produce sorted output. We'll have to > - switch to default > - */ > - mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; > - } > - mrr_flags |= HA_MRR_SORTED; > + if (sort) > + mrr_flags |= HA_MRR_SORTED; > + else > + mrr_flags&= ~HA_MRR_SORTED; > } > > > @@ -8714,7 +8710,8 @@ int QUICK_RANGE_SELECT::reset() > { > if (in_ror_merged_scan) > head->column_bitmaps_set_no_signal(&column_bitmap,&column_bitmap); > - if ((error= file->ha_index_init(index,1))) > + bool sorted= (mrr_flags& HA_MRR_SORTED); > + if ((error= file->ha_index_init(index,sorted))) > DBUG_RETURN(error); > } > > @@ -8989,10 +8986,11 @@ int QUICK_RANGE_SELECT::get_next_prefix( > last_range->make_min_endpoint(&start_key, prefix_length, keypart_map); > last_range->make_max_endpoint(&end_key, prefix_length, keypart_map); > > + bool sorted= (mrr_flags& HA_MRR_SORTED); > result= file->read_range_first(last_range->min_keypart_map ?&start_key : 0, > last_range->max_keypart_map ?&end_key : 0, > test(last_range->flag& EQ_RANGE), > - TRUE); > + sorted); > if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE)) > last_range= 0; // Stop searching > > @@ -9104,6 +9102,7 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUI > */ > mrr_buf_desc= NULL; > mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; > + mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev > mrr_buf_size= 0; > > > @@ -10584,12 +10583,19 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *par > if (quick_prefix_records == HA_POS_ERROR) > quick->quick_prefix_select= NULL; /* Can't construct a quick select. */ > else > + { > /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */ > quick->quick_prefix_select= get_quick_select(param, param_idx, > index_tree, > - HA_MRR_USE_DEFAULT_IMPL, 0, > + HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED, > + 0, > &quick->alloc); > - > + if (!quick->quick_prefix_select) > + { > + delete quick; > + DBUG_RETURN(NULL); > + } > + } > /* > Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX > attribute, and create an array of QUICK_RANGES to be used by the > @@ -10985,7 +10991,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(vo > DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset"); > > head->set_keyread(TRUE); /* We need only the key attributes */ > - if ((result= file->ha_index_init(index,1))) > + /* > + Request ordered index access as usage of ::index_last(), > + ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it. > + */ > + if ((result= file->ha_index_init(index,true))) > DBUG_RETURN(result); > if (quick_prefix_select&& quick_prefix_select->reset()) > DBUG_RETURN(1); > > === modified file 'sql/opt_range.h' > --- a/sql/opt_range.h 2010-12-17 12:58:04 +0000 > +++ b/sql/opt_range.h 2011-03-21 13:06:21 +0000 > @@ -285,11 +285,15 @@ public: > virtual bool clustered_pk_range() { return false; } > > /* > - Request that this quick select produces sorted output. Not all quick > - selects can do it, the caller is responsible for calling this function > - only for those quick selects that can. > + Request that this quick select produces sorted output if 'sort==true'. > + Not all quick selects can provide sorted output, the caller is responsible > + for calling this function only for those quick selects that can. > + Caller is allowed to later cancel its sorted request by setting > + 'sort==false'. However, the implementation is allowed to provide sorted > + output even in this case if benificial, or required by implementation > + internals. > */ > - virtual void need_sorted_output() = 0; > + virtual void need_sorted_output(bool sort) = 0; > enum { > QS_TYPE_RANGE = 0, > QS_TYPE_INDEX_MERGE = 1, > @@ -399,7 +403,7 @@ uint quick_range_seq_next(range_seq_t rs > > /* > Quick select that does a range scan on a single key. The records are > - returned in key order. > + returned in key order if ::need_sorted_output(true) has been called. > */ > class QUICK_RANGE_SELECT : public QUICK_SELECT_I > { > @@ -465,7 +469,7 @@ public: > MEM_ROOT *parent_alloc, bool *create_error); > ~QUICK_RANGE_SELECT(); > > - void need_sorted_output(); > + void need_sorted_output(bool sort); > int init(); > int reset(void); > int get_next(); > @@ -569,7 +573,7 @@ public: > ~QUICK_INDEX_MERGE_SELECT(); > > int init(); > - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } > + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ } > int reset(void); > int get_next(); > bool reverse_sorted() { return false; } > @@ -647,7 +651,7 @@ public: > ~QUICK_ROR_INTERSECT_SELECT(); > > int init(); > - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } > + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ } > int reset(void); > int get_next(); > bool reverse_sorted() { return false; } > @@ -718,7 +722,7 @@ public: > ~QUICK_ROR_UNION_SELECT(); > > int init(); > - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } > + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ } > int reset(void); > int get_next(); > bool reverse_sorted() { return false; } > @@ -860,7 +864,7 @@ public: > void adjust_prefix_ranges(); > bool alloc_buffers(); > int init(); > - void need_sorted_output() { /* always do it */ } > + void need_sorted_output(bool sort) { /* always do it */ } > int reset(); > int get_next(); > bool reverse_sorted() { return false; } > > === modified file 'sql/sql_select.cc' > --- a/sql/sql_select.cc 2011-03-17 12:45:19 +0000 > +++ b/sql/sql_select.cc 2011-03-21 13:06:21 +0000 > @@ -1467,7 +1467,7 @@ bool setup_semijoin_dups_elimination(JOI > output is not supported. > */ > if (tab->select&& tab->select->quick) > - tab->select->quick->need_sorted_output(); > + tab->select->quick->need_sorted_output(true); > > /* Calculate key length */ > keylen= 0; > @@ -2743,6 +2743,23 @@ JOIN::save_join_tab() > > > /** > + There may be a pending 'sorted' request on the specified > + 'join_tab' which we now has decided we can ignore. > +*/ > +static void > +disable_sorted_access(JOIN_TAB* join_tab) > +{ > + DBUG_ENTER("disable_sorted_access"); > + join_tab->sorted= 0; > + if (join_tab->select&& join_tab->select->quick) > + { > + join_tab->select->quick->need_sorted_output(false); > + } > + DBUG_VOID_RETURN; > +} > + > + > +/** > Exec select. > > @todo > @@ -2921,7 +2938,7 @@ JOIN::exec() > curr_join->const_tables != curr_join->tables&& > curr_join->best_positions[curr_join->const_tables].sj_strategy > != SJ_OPT_LOOSE_SCAN) > - curr_join->join_tab[curr_join->const_tables].sorted= 0; > + disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]); > if ((tmp_error= do_select(curr_join, (List *) 0, curr_tmp_table, 0))) > { > error= tmp_error; > @@ -3087,7 +3104,7 @@ JOIN::exec() > curr_join->group_list= 0; > if (!curr_join->sort_and_group&& > curr_join->const_tables != curr_join->tables) > - curr_join->join_tab[curr_join->const_tables].sorted= 0; > + disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]); > if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || > (tmp_error= do_select(curr_join, (List *) 0, curr_tmp_table, > 0))) > @@ -11176,7 +11193,7 @@ make_join_readinfo(JOIN *join, ulonglong > { > uint i, jcl; > bool statistics= test(!(join->select_options& SELECT_DESCRIBE)); > - bool sorted= 1; > + bool sorted= (join->order || join->group_list); > uint first_sjm_table= MAX_TABLES; > uint last_sjm_table= MAX_TABLES; > DBUG_ENTER("make_join_readinfo"); > @@ -18179,6 +18196,7 @@ join_read_key2(JOIN_TAB *tab, TABLE *tab > int error; > if (!table->file->inited) > { > + DBUG_ASSERT(!tab->sorted); // Don't expect sort req. for single row. > table->file->ha_index_init(table_ref->key, tab->sorted); > } > > @@ -18488,7 +18506,7 @@ join_read_last(JOIN_TAB *tab) > tab->read_record.index=tab->index; > tab->read_record.record=table->record[0]; > if (!table->file->inited) > - table->file->ha_index_init(tab->index, 1); > + table->file->ha_index_init(tab->index, tab->sorted); > if ((error= tab->table->file->ha_index_last(tab->table->record[0]))) > return report_error(table, error); > return 0; > @@ -18512,7 +18530,7 @@ join_ft_read_first(JOIN_TAB *tab) > TABLE *table= tab->table; > > if (!table->file->inited) > - table->file->ha_index_init(tab->ref.key, 1); > + table->file->ha_index_init(tab->ref.key, tab->sorted); > table->file->ft_init(); > > if ((error= table->file->ft_read(table->record[0]))) > @@ -20290,7 +20308,7 @@ check_reverse_order: > } > } > else if (select&& select->quick) > - select->quick->need_sorted_output(); > + select->quick->need_sorted_output(true); > > } // QEP has been modified > > @@ -23197,9 +23215,22 @@ void select_describe(JOIN *join, bool ne > > if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE&& > !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags& > - HA_MRR_USE_DEFAULT_IMPL)) > + (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))) > { > - extra.append(STRING_WITH_LEN("; Using MRR")); > + /* > + During normal execution of a query, multi_range_read_init() is > + called to initialize MRR. If HA_MRR_SORTED is set at this point, > + multi_range_read_init() for any native MRR implementation will > + revert to default MRR because they cannot produce sorted output > + currently. > + Calling multi_range_read_init() can potentially be costly, so it > + is not done when executing an EXPLAIN. We therefore make the > + assumption that HA_MRR_SORTED means no MRR. If some MRR native > + implementation will support sorted output in the future, a > + function "bool mrr_supports_sorted()" should be added in the > + handler. > + */ > + extra.append(STRING_WITH_LEN("; Using MRR")); > } > if (need_tmp_table) > { > > > > > -- Jørgen Løland | Senior Software Engineer | +47 73842138 Oracle MySQL Trondheim, Norway