#At file:///home/mysql_src/bzrrepos/mysql-6.0-codebase-bugfixing-50361/ based on revid:tor.didriksen@stripped
3873 Guilhem Bichot 2010-03-05
Fix for BUG#50361 "Doublenested noncorrelated subquery with FirstMatch and join cache wrong result":
semijoin materialization was chosen, but the outer table was accidentally set to use firstmatch;
that is meaningless and made certain structures inconsistent, which led join buffering to be used for
this two-inner-table firstmatch semijoin, which isn't supported and gave wrong results.
See commit comment of sql_select.cc for more details.
@ mysql-test/r/subselect4.result
result. Without the bugfix, SELECTs would generate extra rows. If only the assertion is added, it fires.
@ mysql-test/t/subselect4.test
test for the bug. Multiple tables instead of one are used to make it easier to distinguish
tables in EXPLAIN.
@ sql/sql_select.cc
With the testcase of BUG#50361, tables do not have their first|last_sj_inner_tab set (which leads
to various wrong results in SELECT). That is because "tab" (first table in
the for() loop) is t1. This isn't normal, an outer table should not be seen in this loop, because, as
documented in setup_semijoin_dups_elimination(), a firstmatch duplicate-generating range starts with
an inner table (abbreviated "it").
Here is the path which leads to this.
- Semijoin transformation rewrites the query as "t1 SJ (t2, t3)"
- To prepare for materialization, optimize_semijoin_nests() is fed an initial Query Execution Plan (t2,t3,t1)
(semijoin inner tables first, see the first lines of choose_plan()), and ordered to create a partial Query
Execution Plan for t2, t3. It produces plan (t2,t3), with t3 said to use firstmatch. It sounds unnecessary for
advance_sj_state() to pick semijoin strategies at this stage, but it does not know that it is at this stage, so
that's how it is.
- Then choose_plan() runs for the top-level query: picks (t1, t2, t3) and decides to use materialization, marks t3
accordingly with SJ_OPT_MATERIALIZE
- Then fix_semijoin_strategies_for_picked_join_order() runs: for the first iteration, it is on t3, sees that it
should use materialization, so copies the partial QEP saved by optimize_semijoin_nests() onto the POSITION slots of
t2 and t3. This imports certain information from the partial QEP which has two effects:
* it sets sj_strategy to SJ_OPT_FIRSTMATCH for t3 instead of SJ_OPT_MATERIALIZE
* it sets first_firstmatch_table to "0" for t3; in the partial QEP, index "0" was t2, but here index "0" is t1
- Because the "if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)" in the function is not "else if", we enter
it, so set the table at index first_firstmatch_table (index 0 i.e. t1) to have SJ_OPT_FIRSTMATCH.
- This is how t1, an outer table, receives SJ_OPT_FIRSTMATCH and is the first of the firstmatch range.
- Then setup_semijoin_dups_elimination() runs, loops over t1,t2,t3, with "tab"==t1, which has emb_sj_nest==NULL,
this leads to t2 and t3 not having first|last_sj_inner_tab set
- This makes check_join_cache_usage() think that there are no semijoin nests. This is why join caching happens
even with optimizer_join_cache_level=1, though such level normally excludes semijoins. Even at higher join cache levels, where it is ok to handle semijoin with join caching, the unset first|last_sj_inner_tab make check_join_cache_usage() not recognize that it is a firstmatch range with two tables, a case for which join
caching is normally excluded (see BUG 45191) ; here the exclusion isn't enforced and a bad result ensues.
- The fix: change "if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)" to
"else if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)"; this way, once we have taken the branch of "if this is materialization", we don't risk taking the branch of "if this is firstmatch" based on the temporary wrong
pos->sj_strategy.
- Note: the usage of "tab->emb_sj_nest" in the for() loop of setup_semijoin_dups_elimination() is suspicious, but
unrelated to this bug report and reported as BUG 51457.
An assertion is added to make sure that first|last_sj_inner_tab is not left unset by
setup_semijoin_dups_elimination() for any semijoin inner table participating in a firstmatch range.
modified:
mysql-test/r/subselect4.result
mysql-test/t/subselect4.test
sql/sql_select.cc
=== modified file 'mysql-test/r/subselect4.result'
--- a/mysql-test/r/subselect4.result 2010-01-20 10:11:29 +0000
+++ b/mysql-test/r/subselect4.result 2010-03-05 15:50:28 +0000
@@ -339,3 +339,89 @@ pk
# Restore old value for Index condition pushdown
SET SESSION optimizer_switch=@old_optimizer_switch;
DROP TABLE t1,t2;
+#
+# BUG#50361 Doublenested noncorrelated subquery with FirstMatch and join cache wrong result
+#
+CREATE TABLE t1(
+id INTEGER
+);
+INSERT INTO t1 VALUES(10),(20);
+create table t2 select * from t1;
+create table t3 select * from t1;
+SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t2
+WHERE 1 IN(SELECT 1
+FROM t3));
+id
+10
+20
+explain extended SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t2
+WHERE 1 IN(SELECT 1
+FROM t3));
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 2 100.00
+1 PRIMARY t2 ALL NULL NULL NULL NULL 2 100.00 Start materialize
+1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 End materialize; Using join buffer
+Warnings:
+Note 1003 select `test`.`t1`.`id` AS `id` from `test`.`t1` semi join (`test`.`t3` join `test`.`t2`) where 1
+delete from t2;
+delete from t3;
+INSERT INTO t1 VALUES(30),(40),(50),(60),(70),(80),(90);
+insert into t2 select * from t1;
+insert into t3 select * from t1;
+create table t4 select * from t1;
+SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t2
+WHERE 1 IN(SELECT 1
+FROM t3
+WHERE 1 IN(SELECT 1
+FROM t4)));
+id
+10
+20
+30
+40
+50
+60
+70
+80
+90
+explain SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t2
+WHERE 1 IN(SELECT 1
+FROM t3
+WHERE 1 IN(SELECT 1
+FROM t4)));
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 9
+1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Start materialize
+1 PRIMARY t3 ALL NULL NULL NULL NULL 9 Using join buffer
+1 PRIMARY t4 ALL NULL NULL NULL NULL 9 End materialize; Using join buffer
+SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t1
+WHERE 1 IN(SELECT 1
+FROM t1)));
+id
+10
+20
+30
+40
+50
+60
+70
+80
+90
+drop table t1,t2,t3,t4;
=== modified file 'mysql-test/t/subselect4.test'
--- a/mysql-test/t/subselect4.test 2010-01-20 10:11:29 +0000
+++ b/mysql-test/t/subselect4.test 2010-03-05 15:50:28 +0000
@@ -325,3 +325,65 @@ WHERE
SET SESSION optimizer_switch=@old_optimizer_switch;
DROP TABLE t1,t2;
+
+--echo #
+--echo # BUG#50361 Doublenested noncorrelated subquery with FirstMatch and join cache wrong result
+--echo #
+
+CREATE TABLE t1(
+ id INTEGER
+ );
+INSERT INTO t1 VALUES(10),(20);
+create table t2 select * from t1;
+create table t3 select * from t1;
+
+SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+ FROM t2
+ WHERE 1 IN(SELECT 1
+ FROM t3));
+
+explain extended SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+ FROM t2
+ WHERE 1 IN(SELECT 1
+ FROM t3));
+
+delete from t2;
+delete from t3;
+
+INSERT INTO t1 VALUES(30),(40),(50),(60),(70),(80),(90);
+insert into t2 select * from t1;
+insert into t3 select * from t1;
+create table t4 select * from t1;
+
+SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+ FROM t2
+ WHERE 1 IN(SELECT 1
+ FROM t3
+ WHERE 1 IN(SELECT 1
+ FROM t4)));
+
+explain SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+ FROM t2
+ WHERE 1 IN(SELECT 1
+ FROM t3
+ WHERE 1 IN(SELECT 1
+ FROM t4)));
+
+SELECT *
+FROM t1
+WHERE 1 IN(SELECT 1
+ FROM t1
+ WHERE 1 IN(SELECT 1
+ FROM t1
+ WHERE 1 IN(SELECT 1
+ FROM t1)));
+
+drop table t1,t2,t3,t4;
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-02-05 15:19:43 +0000
+++ b/sql/sql_select.cc 2010-03-05 15:50:28 +0000
@@ -1333,16 +1333,24 @@ int setup_semijoin_dups_elimination(JOIN
case SJ_OPT_FIRST_MATCH:
{
JOIN_TAB *j, *jump_to= tab-1;
+#ifndef DBUG_OFF
+ bool sj_inner_tab_bounds_set= false;
+#endif
for (j= tab; j != tab + pos->n_sj_tables; j++)
{
if (!tab->emb_sj_nest)
jump_to= tab;
else
{
+ /* inner table, remember the interval of them */
j->first_sj_inner_tab= tab;
j->last_sj_inner_tab= tab + pos->n_sj_tables - 1;
+#ifndef DBUG_OFF
+ sj_inner_tab_bounds_set= true;
+#endif
}
}
+ DBUG_ASSERT(sj_inner_tab_bounds_set);
j[-1].do_firstmatch= jump_to;
i += pos->n_sj_tables;
break;
@@ -7980,18 +7988,34 @@ static void fix_semijoin_strategies_for_
remaining_tables |= s->table->map;
continue;
}
-
+
if (pos->sj_strategy == SJ_OPT_MATERIALIZE)
{
SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info;
sjm->is_used= TRUE;
sjm->is_sj_scan= FALSE;
+ /*
+ This memcpy() copies a partial QEP produced by
+ optimize_semijoin_nests() (source) into the final top-level QEP
+ (target), in order to re-use the source plan for to-be-materialized
+ inner tables. It is however possible that the source QEP had picked
+ some semijoin strategy (noted SJY), different from
+ materialization. The target QEP rules (it has seen more tables), but
+ this memcpy() is going to copy the source stale strategy SJY,
+ wrongly. Which is why sj_strategy of each table of the
+ duplicate-generating range then becomes temporarily unreliable. It is
+ fixed for the first table of that range right after the memcpy(), and
+ fixed for the rest of that range at the end of this iteration by
+ setting it to SJ_OPT_NONE). But until then, pos->sj_strategy should
+ not be read.
+ */
memcpy(pos - sjm->tables + 1, sjm->positions,
sizeof(POSITION) * sjm->tables);
first= tablenr - sjm->tables + 1;
join->best_positions[first].n_sj_tables= sjm->tables;
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE;
}
+
else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
{
POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner;
@@ -7999,7 +8023,7 @@ static void fix_semijoin_strategies_for_
sjm->is_used= TRUE;
sjm->is_sj_scan= TRUE;
first= pos->sjm_scan_last_inner - sjm->tables + 1;
- memcpy(join->best_positions + first,
+ memcpy(join->best_positions + first, // stale semijoin strategy here too
sjm->positions, sizeof(POSITION) * sjm->tables);
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
join->best_positions[first].n_sj_tables= sjm->tables;
@@ -8032,8 +8056,8 @@ static void fix_semijoin_strategies_for_
rem_tables &= ~join->best_positions[i].table->table->map;
}
}
-
- if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
+
+ else if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
{
first= pos->first_firstmatch_table;
join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH;
@@ -8066,7 +8090,7 @@ static void fix_semijoin_strategies_for_
}
}
- if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN)
+ else if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN)
{
first= pos->first_loosescan_table;
POSITION *first_pos= join->best_positions + first;
@@ -8101,7 +8125,7 @@ static void fix_semijoin_strategies_for_
first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
}
- if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
+ else if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
{
/*
Duplicate Weedout starting at pos->first_dupsweedout_table, ending at
@@ -8115,6 +8139,10 @@ static void fix_semijoin_strategies_for_
uint i_end= first + join->best_positions[first].n_sj_tables;
for (uint i= first; i < i_end; i++)
{
+ /*
+ Eliminate stale strategies. See comment in the SJ_OPT_MATERIALIZE case
+ above.
+ */
if (i != first)
join->best_positions[i].sj_strategy= SJ_OPT_NONE;
handled_tabs |= join->best_positions[i].table->table->map;
Attachment: [text/bzr-bundle] bzr/guilhem@mysql.com-20100305155028-7iyjpmblhhm9ewyr.bundle
| Thread |
|---|
| • bzr commit into mysql-6.0-codebase branch (guilhem:3873) Bug#50361 | Guilhem Bichot | 5 Mar |