From: Roy Lyseng Date: March 22 2011 3:00pm Subject: Re: bzr commit into mysql-trunk branch (ole.john.aske:3303) Bug#11764737 List-Archive: http://lists.mysql.com/commits/133536 Message-Id: <4D88B98E.2080301@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi Ole John, thanks for fixing this problem. The bugfix is approved, however I do have some non-mandatory style issues and a comment on a test change below. On 21.03.11 14.06, Ole John Aske wrote: > #At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on revid:georgi.kodinov@stripped > > 3303 Ole John Aske 2011-03-21 > Updated fix (W/ Jorgens review comments) for bug#11764737: Optimizer is overly eager to request ordered access If you recommit this, it would be nice to reformat the commit header so that each line wraps at column 70. It will be more readable that way. > > The essential part of this fix is: > > 1) Ensure that JOIN_TAB::sorted is set, and QUICK_RANGE_SELECT::mrr_flags contains 'HA_MRR_SORTED' > only iff strict ordering is required from the handler - Either as a result of the query itself > specifying ORDER/GROUP BY, or usage of a QUICK access method which require the sources to > be ordered (Typical if its internals use ::ha_index_first, _last, _next, _prev) > > 2) Change calls to handler::ha_index_init() to take its 'sorted' argument either directly > from JOIN_TAB::sorted or from mrr_flags containing HA_MRR_SORTED. > > In order to implement this QUICK_SELECT_I::need_sorted_output() had to be extended to > take a 'bool sort' argument: Where 'sort==false' will enable us to turn off > requirement that a QUICK_SELECT_I access method should deliver rows in sorted > order. (Similar logic already exists for 'non-quick' access methods.) > NOTE: QUICK_SELECT_I::need_sorted_output(sort==false) is only regarded as a hint > and the different QUICK_SELECT_I methods may still request sorted order from the > handler if required by their internals. > > Furthermore the function 'disable_sorted_access(JOIN_TAB* join_tab)' has been > introduced which collect all logic for turning of sort requirement. of -> off > @ mysql-test/r/innodb_mysql_lock2.result > Accept changed result order for this (unordered) result set. > @ mysql-test/r/partition.result > Accept changed result order for this (unordered) result set. > @ mysql-test/r/partition_explicit_prune.result > Accept changed result order for (unordered) result sets. > @ sql/opt_range.cc > Correcly set, and use, HA_MRR_SORTED in order to only request sorted access > iff it is required either due to optimizer using ::need_sorted_output(), > or the internals of the quick access needing ordered access itself. > > Also added mrr_flags and its mrr relatives to C'tor initializer list. > Fixed an issue with missing 'delete quick' + return in case > get_quick_select() failed. > > modified: > mysql-test/r/innodb_mysql_lock2.result > mysql-test/r/partition.result > mysql-test/r/partition_explicit_prune.result > sql/opt_range.cc > sql/opt_range.h > sql/sql_select.cc > === modified file 'mysql-test/r/innodb_mysql_lock2.result' > --- a/mysql-test/r/innodb_mysql_lock2.result 2010-07-19 16:09:51 +0000 > +++ b/mysql-test/r/innodb_mysql_lock2.result 2011-03-21 13:06:21 +0000 > @@ -605,11 +605,11 @@ begin; > # Acquire SR metadata lock on t1. > select * from t1; > i > +4 > 1 > +5 > 2 > 3 > -4 > -5 > # Switching to connection 'con1'. > # Sending: > alter table t1 rebuild partition p0; > > === modified file 'mysql-test/r/partition.result' > --- a/mysql-test/r/partition.result 2011-03-11 09:58:45 +0000 > +++ b/mysql-test/r/partition.result 2011-03-21 13:06:21 +0000 > @@ -1776,9 +1776,9 @@ insert into t1 values (18446744073709551 > (18446744073709551613), (18446744073709551612); > select * from t1; > a > +18446744073709551614 > 18446744073709551612 > 18446744073709551613 > -18446744073709551614 > 18446744073709551615 > select * from t1 where a = 18446744073709551615; > a > @@ -1786,9 +1786,9 @@ a > delete from t1 where a = 18446744073709551615; > select * from t1; > a > +18446744073709551614 > 18446744073709551612 > 18446744073709551613 > -18446744073709551614 > drop table t1; > CREATE TABLE t1 ( > num int(11) NOT NULL, cs int(11) NOT NULL) > > === modified file 'mysql-test/r/partition_explicit_prune.result' > --- a/mysql-test/r/partition_explicit_prune.result 2011-02-01 11:38:39 +0000 > +++ b/mysql-test/r/partition_explicit_prune.result 2011-03-21 13:06:21 +0000 > @@ -332,12 +332,12 @@ ERROR 42000: You have an error in your S > SELECT * FROM t1 `PARTITION`; > a b > -2 (pNeg-)subp0 > -5 p0-9:subp3 > -10 p10-99 > -3 pNeg(-subp1) > -1 pNeg(-subp1) > +5 p0-9:subp3 > 1 subp3 > 3 subp3 > +10 p10-99 > 100 `p100-99999`(-subp6) > 1000 `p100-99999`(-subp6) > 101 `p100-99999`(-subp7) > @@ -346,12 +346,12 @@ ERROR 42000: You have an error in your S > SELECT * FROM t1 AS `PARTITION`; > a b > -2 (pNeg-)subp0 > -5 p0-9:subp3 > -10 p10-99 > -3 pNeg(-subp1) > -1 pNeg(-subp1) > +5 p0-9:subp3 > 1 subp3 > 3 subp3 > +10 p10-99 > 100 `p100-99999`(-subp6) > 1000 `p100-99999`(-subp6) > 101 `p100-99999`(-subp7) > @@ -536,10 +536,10 @@ HANDLER_WRITE 127 > SELECT * FROM t1 PARTITION (pNeg, `p10-99`); > a b > -2 (pNeg-)subp0 > -10 p10-99 > -3 pNeg(-subp1) > -1 pNeg(-subp1) > -21 REPLACEd by REPLACE > +10 p10-99 > FLUSH STATUS; > SELECT * FROM t1 PARTITION (pNeg, `p10-99`) INTO OUTFILE 'loadtest.txt'; > SELECT * FROM INFORMATION_SCHEMA.SESSION_STATUS > @@ -576,7 +576,7 @@ WHERE VARIABLE_NAME LIKE 'HANDLER_%' AND > VARIABLE_NAME VARIABLE_VALUE > HANDLER_EXTERNAL_LOCK 6 > HANDLER_ROLLBACK 1 > -HANDLER_WRITE 18 > +HANDLER_WRITE 21 It is a bit worrying that HANDLER_WRITE count increases from 18 to 21 with this patch. An explanation of this would be good. > # 6 locks (1 ha_partiiton + 2 ha_innobase) x 2 (lock+unlock) > # 1 rollback > SELECT * FROM t1 PARTITION (pNeg, `p10-99`); > @@ -950,12 +950,12 @@ SUBPARTITION BY HASH (a) > SELECT * FROM t1; > a b > -2 (pNeg-)subp0, Updated > -10 p10-99 > +-222 `p100-99999`(-subp6), Updated from a = 100 > -21 REPLACEd by REPLACE > 1 subp3 > 3 subp3 > +10 p10-99 > 1000 `p100-99999`(-subp6) > --222 `p100-99999`(-subp6), Updated from a = 100 > 110 `p100-99999`(-subp7), Updated to 103, Updated to 110 > SHOW CREATE TABLE t2; > Table Create Table > @@ -981,9 +981,9 @@ SUBPARTITION BY HASH (a) > SUBPARTITION subp7 ENGINE = InnoDB)) */ > SELECT * FROM t2; > a b > -10 p10-99 > 1 subp3 > 3 subp3 > +10 p10-99 > 1000 `p100-99999`(-subp6) > 110 `p100-99999`(-subp7), Updated to 103, Updated to 110 > SHOW CREATE TABLE t3; > @@ -995,11 +995,11 @@ t3 CREATE TABLE `t3` ( > SELECT * FROM t3; > a b > -2 (pNeg-)subp0, Updated > +-222 `p100-99999`(-subp6), Updated from a = 100 > -21 REPLACEd by REPLACE > 1 subp3 > 3 subp3 > 1000 `p100-99999`(-subp6) > --222 `p100-99999`(-subp6), Updated from a = 100 > 110 `p100-99999`(-subp7), Updated to 103, Updated to 110 > FLUSH STATUS; > DELETE t1 PARTITION (pNeg), t3 FROM t1, t3 > > === modified file 'sql/opt_range.cc' > --- a/sql/opt_range.cc 2011-03-17 11:48:04 +0000 > +++ b/sql/opt_range.cc 2011-03-21 13:06:21 +0000 > @@ -1179,7 +1179,9 @@ QUICK_SELECT_I::QUICK_SELECT_I() > QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, > bool no_alloc, MEM_ROOT *parent_alloc, > bool *create_error) > - :free_file(0),cur_range(NULL),last_range(0),dont_free(0) > + :free_file(0),cur_range(NULL),last_range(0), > + mrr_flags(0),mrr_buf_size(0),mrr_buf_desc(NULL), > + dont_free(0) There should be a space after each comma in the initializer list. > { > my_bitmap_map *bitmap; > DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT"); > @@ -1192,7 +1194,6 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T > > /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */ > mrr_buf_size= thd->variables.read_rnd_buff_size; > - mrr_buf_desc= NULL; It would be nice if mrr_buf_size was also completely initialized in the initializer list. > > if (!no_alloc&& !parent_alloc) > { > @@ -1220,17 +1221,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T > } > > > -void QUICK_RANGE_SELECT::need_sorted_output() > +void QUICK_RANGE_SELECT::need_sorted_output(bool sort) > { > - if (!(mrr_flags& HA_MRR_SORTED)) > - { > - /* > - Native implementation can't produce sorted output. We'll have to > - switch to default > - */ > - mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; > - } > - mrr_flags |= HA_MRR_SORTED; > + if (sort) > + mrr_flags |= HA_MRR_SORTED; > + else > + mrr_flags&= ~HA_MRR_SORTED; > } > > > @@ -8714,7 +8710,8 @@ int QUICK_RANGE_SELECT::reset() > { > if (in_ror_merged_scan) > head->column_bitmaps_set_no_signal(&column_bitmap,&column_bitmap); > - if ((error= file->ha_index_init(index,1))) > + bool sorted= (mrr_flags& HA_MRR_SORTED); > + if ((error= file->ha_index_init(index,sorted))) > DBUG_RETURN(error); Personal preference: The 'sorted' variable is only used one place. It is just as simple to replace 'sorted' with 'test(mrr_flags & HA_MRR_SORTED)' in the function call. In case you keep it, consider prefixing the declaration with 'const'. > } > > @@ -8989,10 +8986,11 @@ int QUICK_RANGE_SELECT::get_next_prefix( > last_range->make_min_endpoint(&start_key, prefix_length, keypart_map); > last_range->make_max_endpoint(&end_key, prefix_length, keypart_map); > > + bool sorted= (mrr_flags& HA_MRR_SORTED); > result= file->read_range_first(last_range->min_keypart_map ?&start_key : 0, > last_range->max_keypart_map ?&end_key : 0, > test(last_range->flag& EQ_RANGE), > - TRUE); > + sorted); Personal preference: Same as above. > if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE)) > last_range= 0; // Stop searching > > @@ -9104,6 +9102,7 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUI > */ > mrr_buf_desc= NULL; > mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; > + mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev One assignment to mrr_flags would be sufficient. > mrr_buf_size= 0; > > > @@ -10584,12 +10583,19 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *par > if (quick_prefix_records == HA_POS_ERROR) > quick->quick_prefix_select= NULL; /* Can't construct a quick select. */ > else > + { > /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */ > quick->quick_prefix_select= get_quick_select(param, param_idx, > index_tree, > - HA_MRR_USE_DEFAULT_IMPL, 0, > + HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED, > + 0, > &quick->alloc); Line wraps around. Consider rewriting as: quick->quick_prefix_select= get_quick_selec(...); > - > + if (!quick->quick_prefix_select) > + { > + delete quick; > + DBUG_RETURN(NULL); > + } > + } > /* > Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX > attribute, and create an array of QUICK_RANGES to be used by the > @@ -10985,7 +10991,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(vo > DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset"); > > head->set_keyread(TRUE); /* We need only the key attributes */ > - if ((result= file->ha_index_init(index,1))) > + /* > + Request ordered index access as usage of ::index_last(), > + ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it. > + */ > + if ((result= file->ha_index_init(index,true))) Space after comma. > DBUG_RETURN(result); > if (quick_prefix_select&& quick_prefix_select->reset()) > DBUG_RETURN(1); > > === modified file 'sql/opt_range.h' > --- a/sql/opt_range.h 2010-12-17 12:58:04 +0000 > +++ b/sql/opt_range.h 2011-03-21 13:06:21 +0000 > @@ -285,11 +285,15 @@ public: > virtual bool clustered_pk_range() { return false; } > > /* > - Request that this quick select produces sorted output. Not all quick > - selects can do it, the caller is responsible for calling this function > - only for those quick selects that can. > + Request that this quick select produces sorted output if 'sort==true'. > + Not all quick selects can provide sorted output, the caller is responsible > + for calling this function only for those quick selects that can. > + Caller is allowed to later cancel its sorted request by setting > + 'sort==false'. However, the implementation is allowed to provide sorted > + output even in this case if benificial, or required by implementation > + internals. > */ > - virtual void need_sorted_output() = 0; > + virtual void need_sorted_output(bool sort) = 0; > enum { > QS_TYPE_RANGE = 0, > QS_TYPE_INDEX_MERGE = 1, > @@ -399,7 +403,7 @@ uint quick_range_seq_next(range_seq_t rs > > /* > Quick select that does a range scan on a single key. The records are > - returned in key order. > + returned in key order if ::need_sorted_output(true) has been called. > */ > class QUICK_RANGE_SELECT : public QUICK_SELECT_I > { > @@ -465,7 +469,7 @@ public: > MEM_ROOT *parent_alloc, bool *create_error); > ~QUICK_RANGE_SELECT(); > > - void need_sorted_output(); > + void need_sorted_output(bool sort); > int init(); > int reset(void); > int get_next(); > @@ -569,7 +573,7 @@ public: > ~QUICK_INDEX_MERGE_SELECT(); > > int init(); > - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } > + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ } > int reset(void); > int get_next(); > bool reverse_sorted() { return false; } > @@ -647,7 +651,7 @@ public: > ~QUICK_ROR_INTERSECT_SELECT(); > > int init(); > - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } > + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ } > int reset(void); > int get_next(); > bool reverse_sorted() { return false; } > @@ -718,7 +722,7 @@ public: > ~QUICK_ROR_UNION_SELECT(); > > int init(); > - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ } > + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ } > int reset(void); > int get_next(); > bool reverse_sorted() { return false; } > @@ -860,7 +864,7 @@ public: > void adjust_prefix_ranges(); > bool alloc_buffers(); > int init(); > - void need_sorted_output() { /* always do it */ } > + void need_sorted_output(bool sort) { /* always do it */ } > int reset(); > int get_next(); > bool reverse_sorted() { return false; } > > === modified file 'sql/sql_select.cc' > --- a/sql/sql_select.cc 2011-03-17 12:45:19 +0000 > +++ b/sql/sql_select.cc 2011-03-21 13:06:21 +0000 > @@ -1467,7 +1467,7 @@ bool setup_semijoin_dups_elimination(JOI > output is not supported. > */ > if (tab->select&& tab->select->quick) > - tab->select->quick->need_sorted_output(); > + tab->select->quick->need_sorted_output(true); > > /* Calculate key length */ > keylen= 0; > @@ -2743,6 +2743,23 @@ JOIN::save_join_tab() > > > /** > + There may be a pending 'sorted' request on the specified > + 'join_tab' which we now has decided we can ignore. > +*/ > +static void > +disable_sorted_access(JOIN_TAB* join_tab) > +{ > + DBUG_ENTER("disable_sorted_access"); > + join_tab->sorted= 0; > + if (join_tab->select&& join_tab->select->quick) > + { > + join_tab->select->quick->need_sorted_output(false); > + } > + DBUG_VOID_RETURN; > +} > + > + > +/** > Exec select. > > @todo > @@ -2921,7 +2938,7 @@ JOIN::exec() > curr_join->const_tables != curr_join->tables&& > curr_join->best_positions[curr_join->const_tables].sj_strategy > != SJ_OPT_LOOSE_SCAN) > - curr_join->join_tab[curr_join->const_tables].sorted= 0; > + disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]); > if ((tmp_error= do_select(curr_join, (List *) 0, curr_tmp_table, 0))) > { > error= tmp_error; > @@ -3087,7 +3104,7 @@ JOIN::exec() > curr_join->group_list= 0; > if (!curr_join->sort_and_group&& > curr_join->const_tables != curr_join->tables) > - curr_join->join_tab[curr_join->const_tables].sorted= 0; > + disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]); > if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) || > (tmp_error= do_select(curr_join, (List *) 0, curr_tmp_table, > 0))) > @@ -11176,7 +11193,7 @@ make_join_readinfo(JOIN *join, ulonglong > { > uint i, jcl; > bool statistics= test(!(join->select_options& SELECT_DESCRIBE)); > - bool sorted= 1; > + bool sorted= (join->order || join->group_list); I did not see an explanation for this change. There is also a TODO near the use of 'sorted' that I wonder if can be removed. > uint first_sjm_table= MAX_TABLES; > uint last_sjm_table= MAX_TABLES; > DBUG_ENTER("make_join_readinfo"); > @@ -18179,6 +18196,7 @@ join_read_key2(JOIN_TAB *tab, TABLE *tab > int error; > if (!table->file->inited) > { > + DBUG_ASSERT(!tab->sorted); // Don't expect sort req. for single row. > table->file->ha_index_init(table_ref->key, tab->sorted); > } > > @@ -18488,7 +18506,7 @@ join_read_last(JOIN_TAB *tab) > tab->read_record.index=tab->index; > tab->read_record.record=table->record[0]; > if (!table->file->inited) > - table->file->ha_index_init(tab->index, 1); > + table->file->ha_index_init(tab->index, tab->sorted); > if ((error= tab->table->file->ha_index_last(tab->table->record[0]))) > return report_error(table, error); > return 0; > @@ -18512,7 +18530,7 @@ join_ft_read_first(JOIN_TAB *tab) > TABLE *table= tab->table; > > if (!table->file->inited) > - table->file->ha_index_init(tab->ref.key, 1); > + table->file->ha_index_init(tab->ref.key, tab->sorted); > table->file->ft_init(); > > if ((error= table->file->ft_read(table->record[0]))) > @@ -20290,7 +20308,7 @@ check_reverse_order: > } > } > else if (select&& select->quick) > - select->quick->need_sorted_output(); > + select->quick->need_sorted_output(true); > > } // QEP has been modified > > @@ -23197,9 +23215,22 @@ void select_describe(JOIN *join, bool ne > > if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE&& > !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags& > - HA_MRR_USE_DEFAULT_IMPL)) > + (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))) > { > - extra.append(STRING_WITH_LEN("; Using MRR")); > + /* > + During normal execution of a query, multi_range_read_init() is > + called to initialize MRR. If HA_MRR_SORTED is set at this point, > + multi_range_read_init() for any native MRR implementation will > + revert to default MRR because they cannot produce sorted output > + currently. > + Calling multi_range_read_init() can potentially be costly, so it > + is not done when executing an EXPLAIN. We therefore make the > + assumption that HA_MRR_SORTED means no MRR. If some MRR native > + implementation will support sorted output in the future, a > + function "bool mrr_supports_sorted()" should be added in the > + handler. > + */ > + extra.append(STRING_WITH_LEN("; Using MRR")); > } > if (need_tmp_table) > { Thanks, Roy