Ole John,
Thank you for the patch. Patch approved.
On 03/21/2011 02:06 PM, Ole John Aske wrote:
> #At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on
> revid:georgi.kodinov@stripped
>
> 3303 Ole John Aske 2011-03-21
> Updated fix (W/ Jorgens review comments) for bug#11764737: Optimizer is overly
> eager to request ordered access
>
> The essential part of this fix is:
>
> 1) Ensure that JOIN_TAB::sorted is set, and QUICK_RANGE_SELECT::mrr_flags
> contains 'HA_MRR_SORTED'
> only iff strict ordering is required from the handler - Either as a result
> of the query itself
> specifying ORDER/GROUP BY, or usage of a QUICK access method which require
> the sources to
> be ordered (Typical if its internals use ::ha_index_first, _last, _next,
> _prev)
>
> 2) Change calls to handler::ha_index_init() to take its 'sorted' argument
> either directly
> from JOIN_TAB::sorted or from mrr_flags containing HA_MRR_SORTED.
>
> In order to implement this QUICK_SELECT_I::need_sorted_output() had to be
> extended to
> take a 'bool sort' argument: Where 'sort==false' will enable us to turn off
> requirement that a QUICK_SELECT_I access method should deliver rows in sorted
> order. (Similar logic already exists for 'non-quick' access methods.)
> NOTE: QUICK_SELECT_I::need_sorted_output(sort==false) is only regarded as a
> hint
> and the different QUICK_SELECT_I methods may still request sorted order from
> the
> handler if required by their internals.
>
> Furthermore the function 'disable_sorted_access(JOIN_TAB* join_tab)' has been
> introduced which collect all logic for turning of sort requirement.
> @ mysql-test/r/innodb_mysql_lock2.result
> Accept changed result order for this (unordered) result set.
> @ mysql-test/r/partition.result
> Accept changed result order for this (unordered) result set.
> @ mysql-test/r/partition_explicit_prune.result
> Accept changed result order for (unordered) result sets.
> @ sql/opt_range.cc
> Correcly set, and use, HA_MRR_SORTED in order to only request sorted access
> iff it is required either due to optimizer using ::need_sorted_output(),
> or the internals of the quick access needing ordered access itself.
>
> Also added mrr_flags and its mrr relatives to C'tor initializer list.
> Fixed an issue with missing 'delete quick' + return in case
> get_quick_select() failed.
>
> modified:
> mysql-test/r/innodb_mysql_lock2.result
> mysql-test/r/partition.result
> mysql-test/r/partition_explicit_prune.result
> sql/opt_range.cc
> sql/opt_range.h
> sql/sql_select.cc
> === modified file 'mysql-test/r/innodb_mysql_lock2.result'
> --- a/mysql-test/r/innodb_mysql_lock2.result 2010-07-19 16:09:51 +0000
> +++ b/mysql-test/r/innodb_mysql_lock2.result 2011-03-21 13:06:21 +0000
> @@ -605,11 +605,11 @@ begin;
> # Acquire SR metadata lock on t1.
> select * from t1;
> i
> +4
> 1
> +5
> 2
> 3
> -4
> -5
> # Switching to connection 'con1'.
> # Sending:
> alter table t1 rebuild partition p0;
>
> === modified file 'mysql-test/r/partition.result'
> --- a/mysql-test/r/partition.result 2011-03-11 09:58:45 +0000
> +++ b/mysql-test/r/partition.result 2011-03-21 13:06:21 +0000
> @@ -1776,9 +1776,9 @@ insert into t1 values (18446744073709551
> (18446744073709551613), (18446744073709551612);
> select * from t1;
> a
> +18446744073709551614
> 18446744073709551612
> 18446744073709551613
> -18446744073709551614
> 18446744073709551615
> select * from t1 where a = 18446744073709551615;
> a
> @@ -1786,9 +1786,9 @@ a
> delete from t1 where a = 18446744073709551615;
> select * from t1;
> a
> +18446744073709551614
> 18446744073709551612
> 18446744073709551613
> -18446744073709551614
> drop table t1;
> CREATE TABLE t1 (
> num int(11) NOT NULL, cs int(11) NOT NULL)
>
> === modified file 'mysql-test/r/partition_explicit_prune.result'
> --- a/mysql-test/r/partition_explicit_prune.result 2011-02-01 11:38:39 +0000
> +++ b/mysql-test/r/partition_explicit_prune.result 2011-03-21 13:06:21 +0000
> @@ -332,12 +332,12 @@ ERROR 42000: You have an error in your S
> SELECT * FROM t1 `PARTITION`;
> a b
> -2 (pNeg-)subp0
> -5 p0-9:subp3
> -10 p10-99
> -3 pNeg(-subp1)
> -1 pNeg(-subp1)
> +5 p0-9:subp3
> 1 subp3
> 3 subp3
> +10 p10-99
> 100 `p100-99999`(-subp6)
> 1000 `p100-99999`(-subp6)
> 101 `p100-99999`(-subp7)
> @@ -346,12 +346,12 @@ ERROR 42000: You have an error in your S
> SELECT * FROM t1 AS `PARTITION`;
> a b
> -2 (pNeg-)subp0
> -5 p0-9:subp3
> -10 p10-99
> -3 pNeg(-subp1)
> -1 pNeg(-subp1)
> +5 p0-9:subp3
> 1 subp3
> 3 subp3
> +10 p10-99
> 100 `p100-99999`(-subp6)
> 1000 `p100-99999`(-subp6)
> 101 `p100-99999`(-subp7)
> @@ -536,10 +536,10 @@ HANDLER_WRITE 127
> SELECT * FROM t1 PARTITION (pNeg, `p10-99`);
> a b
> -2 (pNeg-)subp0
> -10 p10-99
> -3 pNeg(-subp1)
> -1 pNeg(-subp1)
> -21 REPLACEd by REPLACE
> +10 p10-99
> FLUSH STATUS;
> SELECT * FROM t1 PARTITION (pNeg, `p10-99`) INTO OUTFILE 'loadtest.txt';
> SELECT * FROM INFORMATION_SCHEMA.SESSION_STATUS
> @@ -576,7 +576,7 @@ WHERE VARIABLE_NAME LIKE 'HANDLER_%' AND
> VARIABLE_NAME VARIABLE_VALUE
> HANDLER_EXTERNAL_LOCK 6
> HANDLER_ROLLBACK 1
> -HANDLER_WRITE 18
> +HANDLER_WRITE 21
> # 6 locks (1 ha_partiiton + 2 ha_innobase) x 2 (lock+unlock)
> # 1 rollback
> SELECT * FROM t1 PARTITION (pNeg, `p10-99`);
> @@ -950,12 +950,12 @@ SUBPARTITION BY HASH (a)
> SELECT * FROM t1;
> a b
> -2 (pNeg-)subp0, Updated
> -10 p10-99
> +-222 `p100-99999`(-subp6), Updated from a = 100
> -21 REPLACEd by REPLACE
> 1 subp3
> 3 subp3
> +10 p10-99
> 1000 `p100-99999`(-subp6)
> --222 `p100-99999`(-subp6), Updated from a = 100
> 110 `p100-99999`(-subp7), Updated to 103, Updated to 110
> SHOW CREATE TABLE t2;
> Table Create Table
> @@ -981,9 +981,9 @@ SUBPARTITION BY HASH (a)
> SUBPARTITION subp7 ENGINE = InnoDB)) */
> SELECT * FROM t2;
> a b
> -10 p10-99
> 1 subp3
> 3 subp3
> +10 p10-99
> 1000 `p100-99999`(-subp6)
> 110 `p100-99999`(-subp7), Updated to 103, Updated to 110
> SHOW CREATE TABLE t3;
> @@ -995,11 +995,11 @@ t3 CREATE TABLE `t3` (
> SELECT * FROM t3;
> a b
> -2 (pNeg-)subp0, Updated
> +-222 `p100-99999`(-subp6), Updated from a = 100
> -21 REPLACEd by REPLACE
> 1 subp3
> 3 subp3
> 1000 `p100-99999`(-subp6)
> --222 `p100-99999`(-subp6), Updated from a = 100
> 110 `p100-99999`(-subp7), Updated to 103, Updated to 110
> FLUSH STATUS;
> DELETE t1 PARTITION (pNeg), t3 FROM t1, t3
>
> === modified file 'sql/opt_range.cc'
> --- a/sql/opt_range.cc 2011-03-17 11:48:04 +0000
> +++ b/sql/opt_range.cc 2011-03-21 13:06:21 +0000
> @@ -1179,7 +1179,9 @@ QUICK_SELECT_I::QUICK_SELECT_I()
> QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
> bool no_alloc, MEM_ROOT *parent_alloc,
> bool *create_error)
> - :free_file(0),cur_range(NULL),last_range(0),dont_free(0)
> + :free_file(0),cur_range(NULL),last_range(0),
> + mrr_flags(0),mrr_buf_size(0),mrr_buf_desc(NULL),
> + dont_free(0)
> {
> my_bitmap_map *bitmap;
> DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
> @@ -1192,7 +1194,6 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
>
> /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
> mrr_buf_size= thd->variables.read_rnd_buff_size;
> - mrr_buf_desc= NULL;
>
> if (!no_alloc&& !parent_alloc)
> {
> @@ -1220,17 +1221,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
> }
>
>
> -void QUICK_RANGE_SELECT::need_sorted_output()
> +void QUICK_RANGE_SELECT::need_sorted_output(bool sort)
> {
> - if (!(mrr_flags& HA_MRR_SORTED))
> - {
> - /*
> - Native implementation can't produce sorted output. We'll have to
> - switch to default
> - */
> - mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
> - }
> - mrr_flags |= HA_MRR_SORTED;
> + if (sort)
> + mrr_flags |= HA_MRR_SORTED;
> + else
> + mrr_flags&= ~HA_MRR_SORTED;
> }
>
>
> @@ -8714,7 +8710,8 @@ int QUICK_RANGE_SELECT::reset()
> {
> if (in_ror_merged_scan)
>
> head->column_bitmaps_set_no_signal(&column_bitmap,&column_bitmap);
> - if ((error= file->ha_index_init(index,1)))
> + bool sorted= (mrr_flags& HA_MRR_SORTED);
> + if ((error= file->ha_index_init(index,sorted)))
> DBUG_RETURN(error);
> }
>
> @@ -8989,10 +8986,11 @@ int QUICK_RANGE_SELECT::get_next_prefix(
> last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
> last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
>
> + bool sorted= (mrr_flags& HA_MRR_SORTED);
> result= file->read_range_first(last_range->min_keypart_map
> ?&start_key : 0,
> last_range->max_keypart_map ?&end_key : 0,
> test(last_range->flag& EQ_RANGE),
> - TRUE);
> + sorted);
> if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
> last_range= 0; // Stop searching
>
> @@ -9104,6 +9102,7 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUI
> */
> mrr_buf_desc= NULL;
> mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
> + mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
> mrr_buf_size= 0;
>
>
> @@ -10584,12 +10583,19 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *par
> if (quick_prefix_records == HA_POS_ERROR)
> quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
> else
> + {
> /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
> quick->quick_prefix_select= get_quick_select(param, param_idx,
> index_tree,
> - HA_MRR_USE_DEFAULT_IMPL, 0,
> + HA_MRR_USE_DEFAULT_IMPL |
> HA_MRR_SORTED,
> + 0,
> &quick->alloc);
> -
> + if (!quick->quick_prefix_select)
> + {
> + delete quick;
> + DBUG_RETURN(NULL);
> + }
> + }
> /*
> Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
> attribute, and create an array of QUICK_RANGES to be used by the
> @@ -10985,7 +10991,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(vo
> DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
>
> head->set_keyread(TRUE); /* We need only the key attributes */
> - if ((result= file->ha_index_init(index,1)))
> + /*
> + Request ordered index access as usage of ::index_last(),
> + ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
> + */
> + if ((result= file->ha_index_init(index,true)))
> DBUG_RETURN(result);
> if (quick_prefix_select&& quick_prefix_select->reset())
> DBUG_RETURN(1);
>
> === modified file 'sql/opt_range.h'
> --- a/sql/opt_range.h 2010-12-17 12:58:04 +0000
> +++ b/sql/opt_range.h 2011-03-21 13:06:21 +0000
> @@ -285,11 +285,15 @@ public:
> virtual bool clustered_pk_range() { return false; }
>
> /*
> - Request that this quick select produces sorted output. Not all quick
> - selects can do it, the caller is responsible for calling this function
> - only for those quick selects that can.
> + Request that this quick select produces sorted output if 'sort==true'.
> + Not all quick selects can provide sorted output, the caller is responsible
> + for calling this function only for those quick selects that can.
> + Caller is allowed to later cancel its sorted request by setting
> + 'sort==false'. However, the implementation is allowed to provide sorted
> + output even in this case if benificial, or required by implementation
> + internals.
> */
> - virtual void need_sorted_output() = 0;
> + virtual void need_sorted_output(bool sort) = 0;
> enum {
> QS_TYPE_RANGE = 0,
> QS_TYPE_INDEX_MERGE = 1,
> @@ -399,7 +403,7 @@ uint quick_range_seq_next(range_seq_t rs
>
> /*
> Quick select that does a range scan on a single key. The records are
> - returned in key order.
> + returned in key order if ::need_sorted_output(true) has been called.
> */
> class QUICK_RANGE_SELECT : public QUICK_SELECT_I
> {
> @@ -465,7 +469,7 @@ public:
> MEM_ROOT *parent_alloc, bool *create_error);
> ~QUICK_RANGE_SELECT();
>
> - void need_sorted_output();
> + void need_sorted_output(bool sort);
> int init();
> int reset(void);
> int get_next();
> @@ -569,7 +573,7 @@ public:
> ~QUICK_INDEX_MERGE_SELECT();
>
> int init();
> - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
> + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
> int reset(void);
> int get_next();
> bool reverse_sorted() { return false; }
> @@ -647,7 +651,7 @@ public:
> ~QUICK_ROR_INTERSECT_SELECT();
>
> int init();
> - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
> + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
> int reset(void);
> int get_next();
> bool reverse_sorted() { return false; }
> @@ -718,7 +722,7 @@ public:
> ~QUICK_ROR_UNION_SELECT();
>
> int init();
> - void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
> + void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
> int reset(void);
> int get_next();
> bool reverse_sorted() { return false; }
> @@ -860,7 +864,7 @@ public:
> void adjust_prefix_ranges();
> bool alloc_buffers();
> int init();
> - void need_sorted_output() { /* always do it */ }
> + void need_sorted_output(bool sort) { /* always do it */ }
> int reset();
> int get_next();
> bool reverse_sorted() { return false; }
>
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2011-03-17 12:45:19 +0000
> +++ b/sql/sql_select.cc 2011-03-21 13:06:21 +0000
> @@ -1467,7 +1467,7 @@ bool setup_semijoin_dups_elimination(JOI
> output is not supported.
> */
> if (tab->select&& tab->select->quick)
> - tab->select->quick->need_sorted_output();
> + tab->select->quick->need_sorted_output(true);
>
> /* Calculate key length */
> keylen= 0;
> @@ -2743,6 +2743,23 @@ JOIN::save_join_tab()
>
>
> /**
> + There may be a pending 'sorted' request on the specified
> + 'join_tab' which we now has decided we can ignore.
> +*/
> +static void
> +disable_sorted_access(JOIN_TAB* join_tab)
> +{
> + DBUG_ENTER("disable_sorted_access");
> + join_tab->sorted= 0;
> + if (join_tab->select&& join_tab->select->quick)
> + {
> + join_tab->select->quick->need_sorted_output(false);
> + }
> + DBUG_VOID_RETURN;
> +}
> +
> +
> +/**
> Exec select.
>
> @todo
> @@ -2921,7 +2938,7 @@ JOIN::exec()
> curr_join->const_tables != curr_join->tables&&
> curr_join->best_positions[curr_join->const_tables].sj_strategy
> != SJ_OPT_LOOSE_SCAN)
> - curr_join->join_tab[curr_join->const_tables].sorted= 0;
> +
> disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]);
> if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
> 0)))
> {
> error= tmp_error;
> @@ -3087,7 +3104,7 @@ JOIN::exec()
> curr_join->group_list= 0;
> if (!curr_join->sort_and_group&&
> curr_join->const_tables != curr_join->tables)
> - curr_join->join_tab[curr_join->const_tables].sorted= 0;
> +
> disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]);
> if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
> (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
> 0)))
> @@ -11176,7 +11193,7 @@ make_join_readinfo(JOIN *join, ulonglong
> {
> uint i, jcl;
> bool statistics= test(!(join->select_options& SELECT_DESCRIBE));
> - bool sorted= 1;
> + bool sorted= (join->order || join->group_list);
> uint first_sjm_table= MAX_TABLES;
> uint last_sjm_table= MAX_TABLES;
> DBUG_ENTER("make_join_readinfo");
> @@ -18179,6 +18196,7 @@ join_read_key2(JOIN_TAB *tab, TABLE *tab
> int error;
> if (!table->file->inited)
> {
> + DBUG_ASSERT(!tab->sorted); // Don't expect sort req. for single row.
> table->file->ha_index_init(table_ref->key, tab->sorted);
> }
>
> @@ -18488,7 +18506,7 @@ join_read_last(JOIN_TAB *tab)
> tab->read_record.index=tab->index;
> tab->read_record.record=table->record[0];
> if (!table->file->inited)
> - table->file->ha_index_init(tab->index, 1);
> + table->file->ha_index_init(tab->index, tab->sorted);
> if ((error=
> tab->table->file->ha_index_last(tab->table->record[0])))
> return report_error(table, error);
> return 0;
> @@ -18512,7 +18530,7 @@ join_ft_read_first(JOIN_TAB *tab)
> TABLE *table= tab->table;
>
> if (!table->file->inited)
> - table->file->ha_index_init(tab->ref.key, 1);
> + table->file->ha_index_init(tab->ref.key, tab->sorted);
> table->file->ft_init();
>
> if ((error= table->file->ft_read(table->record[0])))
> @@ -20290,7 +20308,7 @@ check_reverse_order:
> }
> }
> else if (select&& select->quick)
> - select->quick->need_sorted_output();
> + select->quick->need_sorted_output(true);
>
> } // QEP has been modified
>
> @@ -23197,9 +23215,22 @@ void select_describe(JOIN *join, bool ne
>
> if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE&&
> !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags&
> - HA_MRR_USE_DEFAULT_IMPL))
> + (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)))
> {
> - extra.append(STRING_WITH_LEN("; Using MRR"));
> + /*
> + During normal execution of a query, multi_range_read_init() is
> + called to initialize MRR. If HA_MRR_SORTED is set at this point,
> + multi_range_read_init() for any native MRR implementation will
> + revert to default MRR because they cannot produce sorted output
> + currently.
> + Calling multi_range_read_init() can potentially be costly, so it
> + is not done when executing an EXPLAIN. We therefore make the
> + assumption that HA_MRR_SORTED means no MRR. If some MRR native
> + implementation will support sorted output in the future, a
> + function "bool mrr_supports_sorted()" should be added in the
> + handler.
> + */
> + extra.append(STRING_WITH_LEN("; Using MRR"));
> }
> if (need_tmp_table)
> {
>
>
>
>
>
--
Jørgen Løland | Senior Software Engineer | +47 73842138
Oracle MySQL
Trondheim, Norway