#At file:///export/home/didrik/repo/next-mr-opt-team-wl1393-merge/ based on revid:tor.didriksen@stripped
3258 Tor Didriksen 2010-12-09
Bug #58782 Missing rows with SELECT .. WHERE .. IN subquery with full GROUP BY and no aggr
The GROUP BY is optimized away earlier than any example queries seen so far:
In JOIN::optimize()
We have found that grouping can be removed since groups correspond to
only one row anyway, but we still have to guarantee correct result
order. The line below effectively rewrites the query from GROUP BY
<fields> to ORDER BY <fields>.
So, the handling of GROUP BY to disable PQ in JOIN::exec does not catch it.
This is a DEPENDENT SUBQUERY, which is executed once for each outer row.
This means we have to return all rows each time we do filesort,
i.e. we must set limit == HA_POS_ERROR, and disable PQ.
@ mysql-test/r/group_by.result
New test case.
@ mysql-test/r/order_by_icp_mrr.result
New (correct) result.
@ mysql-test/r/order_by_none.result
New (correct) result.
@ mysql-test/t/group_by.test
New test case.
@ sql/sql_select.cc
If the query as a GROUP BY, then remember that fact in JOIN::prepare.
Use this fact in JOIN::exec, to disable PQ.
@ sql/sql_select.h
Rename group to has_group_by, for readability, add had_group_by.
modified:
mysql-test/r/group_by.result
mysql-test/r/order_by_icp_mrr.result
mysql-test/r/order_by_none.result
mysql-test/t/group_by.test
sql/sql_select.cc
sql/sql_select.h
=== modified file 'mysql-test/r/group_by.result'
--- a/mysql-test/r/group_by.result 2010-12-06 13:12:51 +0000
+++ b/mysql-test/r/group_by.result 2010-12-09 12:31:15 +0000
@@ -1886,3 +1886,48 @@ f1 MIN(f2) MAX(f2)
4 00:25:00 00:25:00
DROP TABLE t1;
#End of test#49771
+#
+# Bug #58782
+# Missing rows with SELECT .. WHERE .. IN subquery
+# with full GROUP BY and no aggr
+#
+CREATE TABLE t1 (
+pk INT NOT NULL,
+col_int_nokey INT,
+PRIMARY KEY (pk)
+);
+INSERT INTO t1 VALUES (10,7);
+INSERT INTO t1 VALUES (11,1);
+INSERT INTO t1 VALUES (12,5);
+INSERT INTO t1 VALUES (13,3);
+SELECT pk AS field1, col_int_nokey AS field2
+FROM t1
+WHERE col_int_nokey > 0
+GROUP BY field1, field2;
+field1 field2
+10 7
+11 1
+12 5
+13 3
+CREATE TABLE where_subselect
+SELECT pk AS field1, col_int_nokey AS field2
+FROM t1
+WHERE col_int_nokey > 0
+GROUP BY field1, field2
+;
+SELECT *
+FROM where_subselect
+WHERE (field1, field2) IN (
+SELECT pk AS field1, col_int_nokey AS field2
+FROM t1
+WHERE col_int_nokey > 0
+GROUP BY field1, field2
+);
+field1 field2
+10 7
+11 1
+12 5
+13 3
+DROP TABLE t1;
+DROP TABLE where_subselect;
+# End of Bug #58782
=== modified file 'mysql-test/r/order_by_icp_mrr.result'
--- a/mysql-test/r/order_by_icp_mrr.result 2010-12-09 11:54:39 +0000
+++ b/mysql-test/r/order_by_icp_mrr.result 2010-12-09 12:31:15 +0000
@@ -2384,6 +2384,8 @@ GROUP BY field1, field2
);
field1 field2
27 27
+28 28
+29 29
DROP TABLE t1;
DROP TABLE where_subselect;
# End of Bug #58761
=== modified file 'mysql-test/r/order_by_none.result'
--- a/mysql-test/r/order_by_none.result 2010-12-09 11:54:39 +0000
+++ b/mysql-test/r/order_by_none.result 2010-12-09 12:31:15 +0000
@@ -2383,6 +2383,8 @@ GROUP BY field1, field2
);
field1 field2
27 27
+28 28
+29 29
DROP TABLE t1;
DROP TABLE where_subselect;
# End of Bug #58761
=== modified file 'mysql-test/t/group_by.test'
--- a/mysql-test/t/group_by.test 2010-12-06 13:12:51 +0000
+++ b/mysql-test/t/group_by.test 2010-12-09 12:31:15 +0000
@@ -1273,3 +1273,52 @@ SELECT f1,MIN(f2),MAX(f2) FROM t1 GROUP
DROP TABLE t1;
--echo #End of test#49771
+
+--echo #
+--echo # Bug #58782
+--echo # Missing rows with SELECT .. WHERE .. IN subquery
+--echo # with full GROUP BY and no aggr
+--echo #
+
+CREATE TABLE t1 (
+ pk INT NOT NULL,
+ col_int_nokey INT,
+ PRIMARY KEY (pk)
+);
+
+INSERT INTO t1 VALUES (10,7);
+INSERT INTO t1 VALUES (11,1);
+INSERT INTO t1 VALUES (12,5);
+INSERT INTO t1 VALUES (13,3);
+
+## original query:
+
+SELECT pk AS field1, col_int_nokey AS field2
+FROM t1
+WHERE col_int_nokey > 0
+GROUP BY field1, field2;
+
+## store query results in a new table:
+
+CREATE TABLE where_subselect
+ SELECT pk AS field1, col_int_nokey AS field2
+ FROM t1
+ WHERE col_int_nokey > 0
+ GROUP BY field1, field2
+;
+
+## query the new table and compare to original using WHERE ... IN():
+
+SELECT *
+FROM where_subselect
+WHERE (field1, field2) IN (
+ SELECT pk AS field1, col_int_nokey AS field2
+ FROM t1
+ WHERE col_int_nokey > 0
+ GROUP BY field1, field2
+);
+
+DROP TABLE t1;
+DROP TABLE where_subselect;
+
+--echo # End of Bug #58782
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-12-09 11:54:39 +0000
+++ b/sql/sql_select.cc 2010-12-09 12:31:15 +0000
@@ -744,7 +744,8 @@ JOIN::prepare(Item ***rref_pointer_array
/* Init join struct */
count_field_types(select_lex, &tmp_table_param, all_fields, 0);
ref_pointer_array_size= all_fields.elements*sizeof(Item*);
- this->group= group_list != 0;
+ this->has_group_by= group_list != 0;
+ this->had_group_by= this->has_group_by;
unit= unit_arg;
if (tmp_table_param.sum_func_count && !group_list)
@@ -2118,7 +2119,7 @@ JOIN::optimize()
join_tab->table->keys_in_use_for_order_by=
join_tab->table->keys_in_use_for_group_by;
group_list= 0;
- group= 0;
+ has_group_by= false;
}
if (select_distinct &&
list_contains_unique_index(join_tab[const_tables].table,
@@ -2183,7 +2184,8 @@ JOIN::optimize()
}
order=0;
}
- group=1; // For end_write_group
+ has_group_by= true; // For end_write_group
+ had_group_by= true;
}
else
group_list= 0;
@@ -2206,7 +2208,7 @@ JOIN::optimize()
if (old_group_list && !group_list)
select_distinct= 0;
}
- if (!group_list && group)
+ if (!group_list && has_group_by)
{
order=0; // The output has only one row
simple_order=1;
@@ -2740,8 +2742,6 @@ JOIN::exec()
int tmp_error;
DBUG_ENTER("JOIN::exec");
- const bool has_group_by= this->group;
-
thd_proc_info(thd, "executing");
error= 0;
if (procedure)
@@ -3127,7 +3127,7 @@ JOIN::exec()
count_field_types(select_lex, &curr_join->tmp_table_param,
*curr_all_fields, 0);
- if (curr_join->group || curr_join->implicit_grouping ||
+ if (curr_join->has_group_by || curr_join->implicit_grouping ||
curr_join->tmp_table_param.sum_func_count ||
(procedure && (procedure->flags & PROC_GROUP)))
{
@@ -3220,7 +3220,7 @@ JOIN::exec()
}
}
{
- if (group)
+ if (has_group_by)
curr_join->m_select_limit= HA_POS_ERROR;
else
{
@@ -3270,17 +3270,17 @@ JOIN::exec()
unit->select_limit_cnt == 1 (we only need one row in the result set)
*/
const ha_rows filesort_limit_arg=
- (has_group_by || curr_join->tables > 1)
+ (had_group_by || curr_join->tables > 1)
? curr_join->m_select_limit : unit->select_limit_cnt;
const ha_rows select_limit_arg=
select_options & OPTION_FOUND_ROWS
? HA_POS_ERROR : unit->select_limit_cnt;
- DBUG_PRINT("info", ("has_group_by %d "
+ DBUG_PRINT("info", ("had_group_by %d "
"curr_join->tables %d "
"curr_join->m_select_limit %d "
"unit->select_limit_cnt %d",
- has_group_by,
+ had_group_by,
curr_join->tables,
(int) curr_join->m_select_limit,
(int) unit->select_limit_cnt));
@@ -9089,7 +9089,7 @@ JOIN::make_simple_join(JOIN *parent, TAB
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
first_record= sort_and_group=0;
send_records= (ha_rows) 0;
- group= 0;
+ has_group_by= 0;
row_limit= unit->select_limit_cnt;
do_send_rows= row_limit ? 1 : 0;
@@ -18525,7 +18525,7 @@ end_send_group(JOIN *join, JOIN_TAB *joi
(idx=test_if_item_cache_changed(join->group_fields)) >= 0)
{
if (join->first_record ||
- (end_of_records && !join->group && !join->group_optimized_away))
+ (end_of_records && !join->has_group_by && !join->group_optimized_away))
{
if (join->procedure)
join->procedure->end_group();
@@ -18821,7 +18821,7 @@ end_write_group(JOIN *join, JOIN_TAB *jo
if (!join->first_record || end_of_records ||
(idx=test_if_item_cache_changed(join->group_fields)) >= 0)
{
- if (join->first_record || (end_of_records && !join->group))
+ if (join->first_record || (end_of_records && !join->has_group_by))
{
if (join->procedure)
join->procedure->end_group();
@@ -21216,7 +21216,10 @@ calc_group_buffer(JOIN *join,ORDER *grou
uint key_length=0, parts=0, null_parts=0;
if (group)
- join->group= 1;
+ {
+ join->has_group_by= true;
+ join->had_group_by= true;
+ }
for (; group ; group=group->next)
{
Item *group_item= *group->item;
@@ -23622,7 +23625,7 @@ test_if_cheaper_ordering(const JOIN_TAB
bool is_best_covering= FALSE;
double fanout= 1;
ha_rows table_records= table->file->stats.records;
- bool group= join && join->group && order == join->group_list;
+ bool group= join && join->has_group_by && order == join->group_list;
ha_rows ref_key_quick_rows= HA_POS_ERROR;
/*
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-12-06 15:21:08 +0000
+++ b/sql/sql_select.h 2010-12-09 12:31:15 +0000
@@ -1638,7 +1638,8 @@ public:
*/
bool sort_and_group;
bool first_record,full_join, no_field_update;
- bool group; /**< If query contains GROUP BY clause */
+ bool has_group_by; ///< If query contains GROUP BY clause.
+ bool had_group_by; ///< If query contained GROUP BY before optimizations.
bool do_send_rows;
table_map const_table_map,found_const_table_map;
/*
@@ -1978,7 +1979,7 @@ public:
JOIN_TAB *get_sort_by_join_tab()
{
return (!sort_by_table || skip_sort_order ||
- ((group || tmp_table_param.sum_func_count) && !group_list)) ?
+ ((has_group_by || tmp_table_param.sum_func_count) && !group_list)) ?
NULL : join_tab+const_tables;
}
private:
Attachment: [text/bzr-bundle] bzr/tor.didriksen@oracle.com-20101209123115-fduno7x6gpt9n2tp.bundle