3712 Tor Didriksen 2009-11-17
Bug#46550 Azalea returning duplicate results for some IN subqueries w/ semijoin=on
The optimizer was choosing an illegal plan for FirstMatch.
Add an explicit check that all inner tables are part of 'remaining_tables'
when we initiate the FirstMatch algorithm.
@ mysql-test/r/subselect_sj.result
Add test case.
@ mysql-test/r/subselect_sj_jcl6.result
Add test case.
@ mysql-test/t/subselect_sj.test
Add test case.
@ sql/sql_select.cc
Add an explicit check that all inner tables are part of 'remaining_tables'
when we initiate the FirstMatch algorithm.
Some cosmetics for readability and coding style compliance.
modified:
mysql-test/r/subselect_sj.result
mysql-test/r/subselect_sj_jcl6.result
mysql-test/t/subselect_sj.test
sql/sql_select.cc
3711 Alexey Botchkov 2009-11-15 [merge]
merging
modified:
mysql-test/r/mysqlbinlog_row.result
mysql-test/r/mysqlbinlog_row_innodb.result
mysql-test/r/mysqlbinlog_row_myisam.result
mysql-test/r/mysqlbinlog_row_trans.result
=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result 2009-09-01 14:44:35 +0000
+++ b/mysql-test/r/subselect_sj.result 2009-11-17 10:12:07 +0000
@@ -386,3 +386,80 @@ E4 Don
set optimizer_switch='default';
drop table STAFF,WORKS,PROJ;
# End of bug#45191
+#
+# Bug#46550 Azalea returning duplicate results for some IN subqueries
+# w/ semijoin=on
+#
+DROP TABLE IF EXISTS t0, t1, t2;
+CREATE TABLE t0 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t0 VALUES
+(1,'m','m'),
+(40,'h','h'),
+(1,'r','r'),
+(1,'h','h'),
+(9,'x','x'),
+(NULL,'q','q'),
+(NULL,'k','k'),
+(7,'l','l'),
+(182,'k','k'),
+(202,'a','a'),
+(7,'x','x'),
+(6,'j','j'),
+(119,'z','z'),
+(4,'d','d'),
+(5,'h','h'),
+(1,'u','u'),
+(3,'q','q'),
+(7,'a','a'),
+(3,'e','e'),
+(6,'l','l');
+CREATE TABLE t1 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t1 VALUES (7,NULL,NULL),(4,'x','x');
+CREATE TABLE t2 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t2 VALUES (123,NULL,NULL);
+SELECT int_key
+FROM t0
+WHERE varchar_nokey IN (
+SELECT t1 .varchar_key from t1
+);
+int_key
+9
+7
+SELECT t0.int_key
+FROM t0
+WHERE t0.varchar_nokey IN (
+SELECT t1_1 .varchar_key
+FROM t1 AS t1_1 JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+int_key
+9
+7
+SELECT t0.int_key
+FROM t0, t2
+WHERE t0.varchar_nokey IN (
+SELECT t1_1 .varchar_key
+FROM t1 AS t1_1 JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+int_key
+9
+7
+DROP TABLE t0, t1, t2;
+# End of bug#46550
=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result 2009-09-01 14:44:35 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result 2009-11-17 10:12:07 +0000
@@ -390,6 +390,83 @@ E4 Don
set optimizer_switch='default';
drop table STAFF,WORKS,PROJ;
# End of bug#45191
+#
+# Bug#46550 Azalea returning duplicate results for some IN subqueries
+# w/ semijoin=on
+#
+DROP TABLE IF EXISTS t0, t1, t2;
+CREATE TABLE t0 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t0 VALUES
+(1,'m','m'),
+(40,'h','h'),
+(1,'r','r'),
+(1,'h','h'),
+(9,'x','x'),
+(NULL,'q','q'),
+(NULL,'k','k'),
+(7,'l','l'),
+(182,'k','k'),
+(202,'a','a'),
+(7,'x','x'),
+(6,'j','j'),
+(119,'z','z'),
+(4,'d','d'),
+(5,'h','h'),
+(1,'u','u'),
+(3,'q','q'),
+(7,'a','a'),
+(3,'e','e'),
+(6,'l','l');
+CREATE TABLE t1 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t1 VALUES (7,NULL,NULL),(4,'x','x');
+CREATE TABLE t2 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t2 VALUES (123,NULL,NULL);
+SELECT int_key
+FROM t0
+WHERE varchar_nokey IN (
+SELECT t1 .varchar_key from t1
+);
+int_key
+9
+7
+SELECT t0.int_key
+FROM t0
+WHERE t0.varchar_nokey IN (
+SELECT t1_1 .varchar_key
+FROM t1 AS t1_1 JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+int_key
+9
+7
+SELECT t0.int_key
+FROM t0, t2
+WHERE t0.varchar_nokey IN (
+SELECT t1_1 .varchar_key
+FROM t1 AS t1_1 JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+int_key
+9
+7
+DROP TABLE t0, t1, t2;
+# End of bug#46550
set join_cache_level=default;
show variables like 'join_cache_level';
Variable_name Value
=== modified file 'mysql-test/t/subselect_sj.test'
--- a/mysql-test/t/subselect_sj.test 2009-09-01 14:44:35 +0000
+++ b/mysql-test/t/subselect_sj.test 2009-11-17 10:12:07 +0000
@@ -277,3 +277,83 @@ drop table STAFF,WORKS,PROJ;
--echo # End of bug#45191
+--echo #
+--echo # Bug#46550 Azalea returning duplicate results for some IN subqueries
+--echo # w/ semijoin=on
+--echo #
+
+--disable_warnings
+DROP TABLE IF EXISTS t0, t1, t2;
+--enable_warnings
+
+CREATE TABLE t0 (
+ int_key int(11) DEFAULT NULL,
+ varchar_key varchar(1) DEFAULT NULL,
+ varchar_nokey varchar(1) DEFAULT NULL,
+ KEY int_key (int_key),
+ KEY varchar_key (varchar_key,int_key)
+);
+
+INSERT INTO t0 VALUES
+(1,'m','m'),
+(40,'h','h'),
+(1,'r','r'),
+(1,'h','h'),
+(9,'x','x'),
+(NULL,'q','q'),
+(NULL,'k','k'),
+(7,'l','l'),
+(182,'k','k'),
+(202,'a','a'),
+(7,'x','x'),
+(6,'j','j'),
+(119,'z','z'),
+(4,'d','d'),
+(5,'h','h'),
+(1,'u','u'),
+(3,'q','q'),
+(7,'a','a'),
+(3,'e','e'),
+(6,'l','l');
+
+CREATE TABLE t1 (
+ int_key int(11) DEFAULT NULL,
+ varchar_key varchar(1) DEFAULT NULL,
+ varchar_nokey varchar(1) DEFAULT NULL,
+ KEY int_key (int_key),
+ KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t1 VALUES (7,NULL,NULL),(4,'x','x');
+
+CREATE TABLE t2 (
+ int_key int(11) DEFAULT NULL,
+ varchar_key varchar(1) DEFAULT NULL,
+ varchar_nokey varchar(1) DEFAULT NULL,
+ KEY int_key (int_key),
+ KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t2 VALUES (123,NULL,NULL);
+
+SELECT int_key
+FROM t0
+WHERE varchar_nokey IN (
+ SELECT t1 .varchar_key from t1
+);
+
+SELECT t0.int_key
+FROM t0
+WHERE t0.varchar_nokey IN (
+ SELECT t1_1 .varchar_key
+ FROM t1 AS t1_1 JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+
+SELECT t0.int_key
+FROM t0, t2
+WHERE t0.varchar_nokey IN (
+ SELECT t1_1 .varchar_key
+ FROM t1 AS t1_1 JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+
+DROP TABLE t0, t1, t2;
+
+--echo # End of bug#46550
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2009-11-12 12:22:31 +0000
+++ b/sql/sql_select.cc 2009-11-17 10:12:07 +0000
@@ -122,7 +122,7 @@ static uint build_bitmap_for_nested_join
static
void advance_sj_state(JOIN *join, const table_map remaining_tables,
- const JOIN_TAB *s, uint idx,
+ const JOIN_TAB *new_join_tab, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos);
@@ -3400,7 +3400,8 @@ bool convert_subq_to_sj(JOIN *parent_joi
/* 3. Remove the original subquery predicate from the WHERE/ON */
// The subqueries were replaced for Item_int(1) earlier
- subq_pred->exec_method= Item_in_subselect::SEMI_JOIN; // for subsequent executions
+ subq_pred->exec_method=
+ Item_in_subselect::SEMI_JOIN; // for subsequent executions
/*TODO: also reset the 'with_subselect' there. */
/* n. Adjust the parent_join->tables counter */
@@ -3412,7 +3413,9 @@ bool convert_subq_to_sj(JOIN *parent_joi
tl->table->map= ((table_map)1) << table_no;
SELECT_LEX *old_sl= tl->select_lex;
tl->select_lex= parent_join->select_lex;
- for(TABLE_LIST *emb= tl->embedding; emb && emb->select_lex == old_sl; emb= emb->embedding)
+ for (TABLE_LIST *emb= tl->embedding;
+ emb && emb->select_lex == old_sl;
+ emb= emb->embedding)
emb->select_lex= parent_join->select_lex;
}
parent_join->tables += subq_lex->join->tables;
@@ -13025,14 +13028,14 @@ void optimize_wo_join_buffering(JOIN *jo
advance_sj_state()
join The join we're optimizing
remaining_tables Tables not in the join prefix
- s Join tab we've just added to the join prefix
+ new_join_tab Join tab we've just added to the join prefix
idx Index of this join tab (i.e. number of tables
in the prefix minus one)
current_record_count INOUT Estimate of #records in join prefix's output
current_read_time INOUT Cost to execute the join prefix
loose_scan_pos IN A POSITION with LooseScan plan to access
- table s (produced by the last best_access_path
- call)
+ table new_join_tab
+ (produced by the last best_access_path call)
DESCRIPTION
Update semi-join optimization state after we've added another tab (table
@@ -13068,13 +13071,13 @@ void optimize_wo_join_buffering(JOIN *jo
static
void advance_sj_state(JOIN *join, table_map remaining_tables,
- const JOIN_TAB *s, uint idx,
+ const JOIN_TAB *new_join_tab, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos)
{
TABLE_LIST *emb_sj_nest;
POSITION *pos= join->positions + idx;
- remaining_tables &= ~s->table->map;
+ remaining_tables &= ~new_join_tab->table->map;
pos->prefix_cost.convert_from_cost(*current_read_time);
pos->prefix_record_count= *current_record_count;
@@ -13092,17 +13095,22 @@ void advance_sj_state(JOIN *join, table_
else
{
// FirstMatch
- pos->first_firstmatch_table= (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH)?
- MAX_TABLES : pos[-1].first_firstmatch_table;
+ pos->first_firstmatch_table=
+ (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
+ MAX_TABLES : pos[-1].first_firstmatch_table;
pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
+
// LooseScan
- pos->first_loosescan_table= (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN)?
- MAX_TABLES: pos[-1].first_loosescan_table;
+ pos->first_loosescan_table=
+ (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
+ MAX_TABLES : pos[-1].first_loosescan_table;
pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
+
// SJ-Materialization Scan
- pos->sjm_scan_need_tables= (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN)?
- 0: pos[-1].sjm_scan_need_tables;
+ pos->sjm_scan_need_tables=
+ (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
+ 0 : pos[-1].sjm_scan_need_tables;
pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
// Duplicate Weedout
@@ -13112,33 +13120,39 @@ void advance_sj_state(JOIN *join, table_
table_map handled_by_fm_or_ls= 0;
/* FirstMatch Strategy */
+ if (new_join_tab->emb_sj_nest &&
+ optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH))
{
+ const table_map outer_corr_tables=
+ new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
+ new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
+ const table_map sj_inner_tables=
+ new_join_tab->emb_sj_nest->sj_inner_tables;
+
/*
Enter condition:
1. The next join tab belongs to semi-join nest
+ (verified for the encompassing code block above).
2. We're not in a duplicate producer range yet
3. All outer tables that
- the subquery is correlated with, or
- referred to from the outer_expr
are in the join prefix
+ 4. All inner tables are still part of remaining_tables.
*/
- if (s->emb_sj_nest && // (1)
- !join->cur_sj_inner_tables && // (2)
- !(remaining_tables & // (3)
- (s->emb_sj_nest->nested_join->sj_corr_tables | // (3)
- s->emb_sj_nest->nested_join->sj_depends_on)) && // (3)
- optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH))
+ if (!join->cur_sj_inner_tables && // (2)
+ !(remaining_tables & outer_corr_tables) && // (3)
+ (sj_inner_tables == // (4)
+ ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
{
/* Start tracking potential FirstMatch range */
pos->first_firstmatch_table= idx;
- pos->firstmatch_need_tables= s->emb_sj_nest->sj_inner_tables;
+ pos->firstmatch_need_tables= sj_inner_tables;
pos->first_firstmatch_rtbl= remaining_tables;
}
- if (pos->first_firstmatch_table != MAX_TABLES && s->emb_sj_nest)
+ if (pos->first_firstmatch_table != MAX_TABLES)
{
- table_map outer_corr_tables= s->emb_sj_nest->nested_join->sj_corr_tables |
- s->emb_sj_nest->nested_join->sj_depends_on;
if (outer_corr_tables & pos->first_firstmatch_rtbl)
{
/*
@@ -13150,7 +13164,7 @@ void advance_sj_state(JOIN *join, table_
else
{
/* Record that we need all of this semi-join's inner tables, too */
- pos->firstmatch_need_tables |= s->emb_sj_nest->sj_inner_tables;
+ pos->firstmatch_need_tables|= sj_inner_tables;
}
if (!(pos->firstmatch_need_tables & remaining_tables))
@@ -13195,7 +13209,7 @@ void advance_sj_state(JOIN *join, table_
*/
if ((pos->first_loosescan_table != MAX_TABLES) && // (1)
(first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
- s->emb_sj_nest != first->table->emb_sj_nest) //(2)
+ new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
{
pos->first_loosescan_table= MAX_TABLES;
}
@@ -13207,9 +13221,10 @@ void advance_sj_state(JOIN *join, table_
if (loose_scan_pos->read_time != DBL_MAX)
{
pos->first_loosescan_table= idx;
- pos->loosescan_need_tables= s->emb_sj_nest->sj_inner_tables |
- s->emb_sj_nest->nested_join->sj_depends_on |
- s->emb_sj_nest->nested_join->sj_corr_tables;
+ pos->loosescan_need_tables=
+ new_join_tab->emb_sj_nest->sj_inner_tables |
+ new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+ new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
}
if ((pos->first_loosescan_table != MAX_TABLES) &&
@@ -13253,13 +13268,14 @@ void advance_sj_state(JOIN *join, table_
Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
LooseScan detector in best_access_path)
*/
- if ((emb_sj_nest= s->emb_sj_nest))
+ if ((emb_sj_nest= new_join_tab->emb_sj_nest))
{
join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
- if (!(remaining_tables & emb_sj_nest->sj_inner_tables & ~s->table->map))
+ if (!(remaining_tables &
+ emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
}
join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
@@ -13267,7 +13283,8 @@ void advance_sj_state(JOIN *join, table_
/* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
bool sjm_scan;
SJ_MATERIALIZATION_INFO *mat_info;
- if ((mat_info= at_sjmat_pos(join, remaining_tables, s, idx, &sjm_scan)))
+ if ((mat_info= at_sjmat_pos(join, remaining_tables,
+ new_join_tab, idx, &sjm_scan)))
{
if (sjm_scan)
{
@@ -13290,9 +13307,10 @@ void advance_sj_state(JOIN *join, table_
The simple way to model this is to remove SJM-SCAN(...) fanout once
we reach the point #2.
*/
- pos->sjm_scan_need_tables= s->emb_sj_nest->sj_inner_tables |
- s->emb_sj_nest->nested_join->sj_depends_on |
- s->emb_sj_nest->nested_join->sj_corr_tables;
+ pos->sjm_scan_need_tables=
+ new_join_tab->emb_sj_nest->sj_inner_tables |
+ new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+ new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
pos->sjm_scan_last_inner= idx;
}
else
@@ -13327,7 +13345,8 @@ void advance_sj_state(JOIN *join, table_
pos->sj_strategy= SJ_OPT_MATERIALIZE;
*current_read_time= mat_read_time;
*current_record_count= prefix_rec_count;
- join->cur_dups_producing_tables &= ~s->emb_sj_nest->sj_inner_tables;
+ join->cur_dups_producing_tables&=
+ ~new_join_tab->emb_sj_nest->sj_inner_tables;
}
}
}
@@ -13400,7 +13419,7 @@ void advance_sj_state(JOIN *join, table_
correlated
*/
TABLE_LIST *nest;
- if ((nest= s->emb_sj_nest))
+ if ((nest= new_join_tab->emb_sj_nest))
{
if (!pos->dupsweedout_tables)
pos->first_dupsweedout_table= idx;
@@ -13411,7 +13430,8 @@ void advance_sj_state(JOIN *join, table_
}
if (pos->dupsweedout_tables &&
- !((remaining_tables & ~s->table->map) & pos->dupsweedout_tables))
+ !(remaining_tables &
+ ~new_join_tab->table->map & pos->dupsweedout_tables))
{
/*
Ok, reached a state where we could put a dups weedout point.
Attachment: [text/bzr-bundle] bzr/tor.didriksen@sun.com-20091117101207-y3779rzw50cm2emc.bundle
| Thread |
|---|
| • bzr push into mysql-6.0-codebase-bugfixing branch (tor.didriksen:3711 to3712) Bug#46550 | Tor Didriksen | 17 Nov |