Forgot to cc commit list
-------- Original Message --------
Subject: Re: bzr commit into mysql-trunk-bugfixing branch
(tor.didriksen:3258) Bug#58782
Date: Fri, 10 Dec 2010 12:04:37 +0100
From: Øystein Grøvlen <oystein.grovlen@stripped>
Organization: Oracle
To: Tor Didriksen <tor.didriksen@stripped>
Tor,
I feel this issue exposes a problematic issue with the solution for
WL#1393. Originally, JOIN::select_limit has been carefully(?) set to
override the specified limit when it is necessary to read the entire
table. One scenario is when there is a group by, but there are others.
Before WL#1393, one would always have to read the entire table if
CALC_FOUND_ROWS where specified. Hence, select_limit was set to
HA_POS_ERROR. In order to speed up CALC_FOUND_ROWS queries with limit
you then resorted to using unit->select_limit_cnt since the original
limit was no longer available in JOIN::select_limit. However, this
means that all other logic around setting JOIN::select_limit for other
reasons is lost, and you need to re-establish this logic when calling
create_sort_index. This does not seem like a good idea to me.
Instead, I think we need some way to be able to identify when calling
filesort that the only reason that select_limit is HA_POS_ERROR is
because of CALC_FOUND_ROWS. It is not as easy as not setting
select_limit for CALC_FOUND_ROWS anymore, because I think we still need
that setting for queries where filesort is not use, and I suspect that
the probability of not using filesort even increasing if
JOIN::select_limit!=HA_POS_ERROR.
--
Øystein
On 12/ 9/10 01:31 PM, Tor Didriksen wrote:
> #At file:///export/home/didrik/repo/next-mr-opt-team-wl1393-merge/ based on
> revid:tor.didriksen@stripped
>
> 3258 Tor Didriksen 2010-12-09
> Bug #58782 Missing rows with SELECT .. WHERE .. IN subquery with full GROUP BY
> and no aggr
>
> The GROUP BY is optimized away earlier than any example queries seen so far:
> In JOIN::optimize()
> We have found that grouping can be removed since groups correspond to
> only one row anyway, but we still have to guarantee correct result
> order. The line below effectively rewrites the query from GROUP BY
> <fields> to ORDER BY<fields>.
> So, the handling of GROUP BY to disable PQ in JOIN::exec does not catch it.
> This is a DEPENDENT SUBQUERY, which is executed once for each outer row.
> This means we have to return all rows each time we do filesort,
> i.e. we must set limit == HA_POS_ERROR, and disable PQ.
> @ mysql-test/r/group_by.result
> New test case.
> @ mysql-test/r/order_by_icp_mrr.result
> New (correct) result.
> @ mysql-test/r/order_by_none.result
> New (correct) result.
> @ mysql-test/t/group_by.test
> New test case.
> @ sql/sql_select.cc
> If the query as a GROUP BY, then remember that fact in JOIN::prepare.
> Use this fact in JOIN::exec, to disable PQ.
> @ sql/sql_select.h
> Rename group to has_group_by, for readability, add had_group_by.
>
> modified:
> mysql-test/r/group_by.result
> mysql-test/r/order_by_icp_mrr.result
> mysql-test/r/order_by_none.result
> mysql-test/t/group_by.test
> sql/sql_select.cc
> sql/sql_select.h
> === modified file 'mysql-test/r/group_by.result'
> --- a/mysql-test/r/group_by.result 2010-12-06 13:12:51 +0000
> +++ b/mysql-test/r/group_by.result 2010-12-09 12:31:15 +0000
> @@ -1886,3 +1886,48 @@ f1 MIN(f2) MAX(f2)
> 4 00:25:00 00:25:00
> DROP TABLE t1;
> #End of test#49771
> +#
> +# Bug #58782
> +# Missing rows with SELECT .. WHERE .. IN subquery
> +# with full GROUP BY and no aggr
> +#
> +CREATE TABLE t1 (
> +pk INT NOT NULL,
> +col_int_nokey INT,
> +PRIMARY KEY (pk)
> +);
> +INSERT INTO t1 VALUES (10,7);
> +INSERT INTO t1 VALUES (11,1);
> +INSERT INTO t1 VALUES (12,5);
> +INSERT INTO t1 VALUES (13,3);
> +SELECT pk AS field1, col_int_nokey AS field2
> +FROM t1
> +WHERE col_int_nokey> 0
> +GROUP BY field1, field2;
> +field1 field2
> +10 7
> +11 1
> +12 5
> +13 3
> +CREATE TABLE where_subselect
> +SELECT pk AS field1, col_int_nokey AS field2
> +FROM t1
> +WHERE col_int_nokey> 0
> +GROUP BY field1, field2
> +;
> +SELECT *
> +FROM where_subselect
> +WHERE (field1, field2) IN (
> +SELECT pk AS field1, col_int_nokey AS field2
> +FROM t1
> +WHERE col_int_nokey> 0
> +GROUP BY field1, field2
> +);
> +field1 field2
> +10 7
> +11 1
> +12 5
> +13 3
> +DROP TABLE t1;
> +DROP TABLE where_subselect;
> +# End of Bug #58782
>
> === modified file 'mysql-test/r/order_by_icp_mrr.result'
> --- a/mysql-test/r/order_by_icp_mrr.result 2010-12-09 11:54:39 +0000
> +++ b/mysql-test/r/order_by_icp_mrr.result 2010-12-09 12:31:15 +0000
> @@ -2384,6 +2384,8 @@ GROUP BY field1, field2
> );
> field1 field2
> 27 27
> +28 28
> +29 29
> DROP TABLE t1;
> DROP TABLE where_subselect;
> # End of Bug #58761
>
> === modified file 'mysql-test/r/order_by_none.result'
> --- a/mysql-test/r/order_by_none.result 2010-12-09 11:54:39 +0000
> +++ b/mysql-test/r/order_by_none.result 2010-12-09 12:31:15 +0000
> @@ -2383,6 +2383,8 @@ GROUP BY field1, field2
> );
> field1 field2
> 27 27
> +28 28
> +29 29
> DROP TABLE t1;
> DROP TABLE where_subselect;
> # End of Bug #58761
>
> === modified file 'mysql-test/t/group_by.test'
> --- a/mysql-test/t/group_by.test 2010-12-06 13:12:51 +0000
> +++ b/mysql-test/t/group_by.test 2010-12-09 12:31:15 +0000
> @@ -1273,3 +1273,52 @@ SELECT f1,MIN(f2),MAX(f2) FROM t1 GROUP
>
> DROP TABLE t1;
> --echo #End of test#49771
> +
> +--echo #
> +--echo # Bug #58782
> +--echo # Missing rows with SELECT .. WHERE .. IN subquery
> +--echo # with full GROUP BY and no aggr
> +--echo #
> +
> +CREATE TABLE t1 (
> + pk INT NOT NULL,
> + col_int_nokey INT,
> + PRIMARY KEY (pk)
> +);
> +
> +INSERT INTO t1 VALUES (10,7);
> +INSERT INTO t1 VALUES (11,1);
> +INSERT INTO t1 VALUES (12,5);
> +INSERT INTO t1 VALUES (13,3);
> +
> +## original query:
> +
> +SELECT pk AS field1, col_int_nokey AS field2
> +FROM t1
> +WHERE col_int_nokey> 0
> +GROUP BY field1, field2;
> +
> +## store query results in a new table:
> +
> +CREATE TABLE where_subselect
> + SELECT pk AS field1, col_int_nokey AS field2
> + FROM t1
> + WHERE col_int_nokey> 0
> + GROUP BY field1, field2
> +;
> +
> +## query the new table and compare to original using WHERE ... IN():
> +
> +SELECT *
> +FROM where_subselect
> +WHERE (field1, field2) IN (
> + SELECT pk AS field1, col_int_nokey AS field2
> + FROM t1
> + WHERE col_int_nokey> 0
> + GROUP BY field1, field2
> +);
> +
> +DROP TABLE t1;
> +DROP TABLE where_subselect;
> +
> +--echo # End of Bug #58782
>
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2010-12-09 11:54:39 +0000
> +++ b/sql/sql_select.cc 2010-12-09 12:31:15 +0000
> @@ -744,7 +744,8 @@ JOIN::prepare(Item ***rref_pointer_array
> /* Init join struct */
> count_field_types(select_lex,&tmp_table_param, all_fields, 0);
> ref_pointer_array_size= all_fields.elements*sizeof(Item*);
> - this->group= group_list != 0;
> + this->has_group_by= group_list != 0;
> + this->had_group_by= this->has_group_by;
> unit= unit_arg;
>
> if (tmp_table_param.sum_func_count&& !group_list)
> @@ -2118,7 +2119,7 @@ JOIN::optimize()
> join_tab->table->keys_in_use_for_order_by=
> join_tab->table->keys_in_use_for_group_by;
> group_list= 0;
> - group= 0;
> + has_group_by= false;
> }
> if (select_distinct&&
> list_contains_unique_index(join_tab[const_tables].table,
> @@ -2183,7 +2184,8 @@ JOIN::optimize()
> }
> order=0;
> }
> - group=1; // For end_write_group
> + has_group_by= true; // For end_write_group
> + had_group_by= true;
> }
> else
> group_list= 0;
> @@ -2206,7 +2208,7 @@ JOIN::optimize()
> if (old_group_list&& !group_list)
> select_distinct= 0;
> }
> - if (!group_list&& group)
> + if (!group_list&& has_group_by)
> {
> order=0; // The output has only one row
> simple_order=1;
> @@ -2740,8 +2742,6 @@ JOIN::exec()
> int tmp_error;
> DBUG_ENTER("JOIN::exec");
>
> - const bool has_group_by= this->group;
> -
> thd_proc_info(thd, "executing");
> error= 0;
> if (procedure)
> @@ -3127,7 +3127,7 @@ JOIN::exec()
> count_field_types(select_lex,&curr_join->tmp_table_param,
> *curr_all_fields, 0);
>
> - if (curr_join->group || curr_join->implicit_grouping ||
> + if (curr_join->has_group_by || curr_join->implicit_grouping ||
> curr_join->tmp_table_param.sum_func_count ||
> (procedure&& (procedure->flags& PROC_GROUP)))
> {
> @@ -3220,7 +3220,7 @@ JOIN::exec()
> }
> }
> {
> - if (group)
> + if (has_group_by)
> curr_join->m_select_limit= HA_POS_ERROR;
> else
> {
> @@ -3270,17 +3270,17 @@ JOIN::exec()
> unit->select_limit_cnt == 1 (we only need one row in the result set)
> */
> const ha_rows filesort_limit_arg=
> - (has_group_by || curr_join->tables> 1)
> + (had_group_by || curr_join->tables> 1)
> ? curr_join->m_select_limit : unit->select_limit_cnt;
> const ha_rows select_limit_arg=
> select_options& OPTION_FOUND_ROWS
> ? HA_POS_ERROR : unit->select_limit_cnt;
>
> - DBUG_PRINT("info", ("has_group_by %d "
> + DBUG_PRINT("info", ("had_group_by %d "
> "curr_join->tables %d "
> "curr_join->m_select_limit %d "
> "unit->select_limit_cnt %d",
> - has_group_by,
> + had_group_by,
> curr_join->tables,
> (int) curr_join->m_select_limit,
> (int) unit->select_limit_cnt));
> @@ -9089,7 +9089,7 @@ JOIN::make_simple_join(JOIN *parent, TAB
> tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
> first_record= sort_and_group=0;
> send_records= (ha_rows) 0;
> - group= 0;
> + has_group_by= 0;
> row_limit= unit->select_limit_cnt;
> do_send_rows= row_limit ? 1 : 0;
>
> @@ -18525,7 +18525,7 @@ end_send_group(JOIN *join, JOIN_TAB *joi
> (idx=test_if_item_cache_changed(join->group_fields))>= 0)
> {
> if (join->first_record ||
> - (end_of_records&& !join->group&&
> !join->group_optimized_away))
> + (end_of_records&& !join->has_group_by&&
> !join->group_optimized_away))
> {
> if (join->procedure)
> join->procedure->end_group();
> @@ -18821,7 +18821,7 @@ end_write_group(JOIN *join, JOIN_TAB *jo
> if (!join->first_record || end_of_records ||
> (idx=test_if_item_cache_changed(join->group_fields))>= 0)
> {
> - if (join->first_record || (end_of_records&& !join->group))
> + if (join->first_record || (end_of_records&&
> !join->has_group_by))
> {
> if (join->procedure)
> join->procedure->end_group();
> @@ -21216,7 +21216,10 @@ calc_group_buffer(JOIN *join,ORDER *grou
> uint key_length=0, parts=0, null_parts=0;
>
> if (group)
> - join->group= 1;
> + {
> + join->has_group_by= true;
> + join->had_group_by= true;
> + }
> for (; group ; group=group->next)
> {
> Item *group_item= *group->item;
> @@ -23622,7 +23625,7 @@ test_if_cheaper_ordering(const JOIN_TAB
> bool is_best_covering= FALSE;
> double fanout= 1;
> ha_rows table_records= table->file->stats.records;
> - bool group= join&& join->group&& order ==
> join->group_list;
> + bool group= join&& join->has_group_by&& order ==
> join->group_list;
> ha_rows ref_key_quick_rows= HA_POS_ERROR;
>
> /*
>
> === modified file 'sql/sql_select.h'
> --- a/sql/sql_select.h 2010-12-06 15:21:08 +0000
> +++ b/sql/sql_select.h 2010-12-09 12:31:15 +0000
> @@ -1638,7 +1638,8 @@ public:
> */
> bool sort_and_group;
> bool first_record,full_join, no_field_update;
> - bool group; /**< If query contains GROUP BY clause */
> + bool has_group_by; ///< If query contains GROUP BY clause.
> + bool had_group_by; ///< If query contained GROUP BY before optimizations.
> bool do_send_rows;
> table_map const_table_map,found_const_table_map;
> /*
> @@ -1978,7 +1979,7 @@ public:
> JOIN_TAB *get_sort_by_join_tab()
> {
> return (!sort_by_table || skip_sort_order ||
> - ((group || tmp_table_param.sum_func_count)&& !group_list)) ?
> + ((has_group_by || tmp_table_param.sum_func_count)&&
> !group_list)) ?
> NULL : join_tab+const_tables;
> }
> private:
>
>
>
>
>
--
Øystein Grøvlen, Principal Software Engineer
MySQL Group, Oracle
Trondheim, Norway
| Thread |
|---|
| • Fwd: Re: bzr commit into mysql-trunk-bugfixing branch (tor.didriksen:3258)Bug#58782 | Øystein Grøvlen | 10 Dec |