God Dag Roy,
Roy Lyseng a écrit, Le 16.09.2010 13:16:
> #At file:///home/rl136806/mysql/repo/mysql-work5/ based on
> revid:jorgen.loland@stripped
>
> 3244 Roy Lyseng 2010-09-16
> Bug#55955: crash in MEMORY engine with IN(LEFT JOIN (JOIN))
>
> As indicated in the "Suggested fix", the problem is that not all
> tables in a semijoin nest has the emb_sj_nest pointer set.
> The pointer is only set if the table is contained immediately within
> the semijoin nest, but according to the documentation in sql_select.h,
> it should be set for all tables within a semijoin nest.
> This is also consistent, as a table can never be included in more than
> one semijoin nest (semijoin nests cannot be nested).
>
> I have also manually inspected all uses of emb_sj_nest, and it seems that
> all uses are still consistent (previously there was an implicit assert
> that emb_sj_nest == embedding, which no longer holds).
>
> mysql-test/include/subquery_sj.inc
> Added test case for bug#55955.
>
> mysql-test/r/subquery_sj_all.result
> Semijoin plan changed from DuplicateWeedout to Materialize.
Do you know what caused this?
> sql/sql_select.cc
> Moved setting of emb_sj_nest from pullout_semijoin_tables() to
> make_join_statistics(). pullout_semijoin_tables() only went through
> tables immediately contained in the semijoin nests, and the code
> strictly did not belong here. It is easier to loop over all join
> tabs and check whether they belong to a semijoin nest through
> some nesting.
>
> Also did a few minor cleanups, by using explicit initializers
> instead of assignment to a variable inside an expression,
> adding a set of parentheses to make syntax clearer, fixing some
> argument documentation mistakes, and removing a redundant reference
> to 'sj_corr_tables'.
> === modified file 'mysql-test/r/subquery_sj_all.result'
> @@ -2577,10 +2577,10 @@ explain select *
> from t0 where a in
> (select t2.a+t3.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a);
> id select_type table type possible_keys key key_len ref rows Extra
> -1 PRIMARY t0 ALL NULL NULL NULL NULL 10 Start temporary
> -1 PRIMARY t1 index NULL a 5 NULL 10 Using index; Using join buffer (BNL, regular
> buffers)
> +1 PRIMARY t0 ALL NULL NULL NULL NULL 10
> +1 PRIMARY t1 index NULL a 5 NULL 10 Using index; Start materialize
Do you know what caused this (see also further below) ?
> 1 PRIMARY t2 ref a a 5 test.t1.a 1 Using index
> -1 PRIMARY t3 ref a a 5 test.t1.a 1 Using where; Using index; End temporary
> +1 PRIMARY t3 ref a a 5 test.t1.a 1 Using where; Using index; End materialize
> drop table t0, t1,t2,t3;
>
> Test that MaterializeLookup strategy for semijoin,
> @@ -5124,4 +5124,28 @@ a
> 1
> 1
> drop table t1,t2,t3;
> +#
> +# Bug#55955: crash in MEMORY engine with IN(LEFT JOIN (JOIN))
> +#
> +CREATE TABLE t1 (a INT);
> +CREATE TABLE t2 (a INT);
> +CREATE TABLE t3 (a INT);
> +INSERT INTO t1 VALUES(1),(1);
> +INSERT INTO t2 VALUES(1),(1);
> +INSERT INTO t3 VALUES(2),(2);
> +explain SELECT * FROM t1
> +WHERE t1.a IN (SELECT t2.a
> +FROM t2 LEFT JOIN (t2 AS t2inner, t3) ON t2.a=t3.a);
> +id select_type table type possible_keys key key_len ref rows Extra
> +1 PRIMARY t1 ALL NULL NULL NULL NULL 2
> +1 PRIMARY t2 ALL NULL NULL NULL NULL 2 Start materialize
> +1 PRIMARY t2inner ALL NULL NULL NULL NULL 2
> +1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where; End materialize
When I filed the bug, duplicate weedout was used (and it crashed when
inserting a malformed record into the temp table, and this record was
malformed because emb_sj_nest was not set). How come we have
materialization now? Is it that setting emb_sj_nest changes something in
allowable strategies?
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2010-09-15 08:53:08 +0000
> +++ b/sql/sql_select.cc 2010-09-16 11:16:22 +0000
> @@ -4219,9 +4219,6 @@ bool find_eq_ref_candidate(TABLE *table,
> - It is accessed via eq_ref(outer_tables)
>
> POSTCONDITIONS
> - * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL
> - * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing
> - to semi-join nest they are in.
> * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly
>
> This operation is (and should be) performed at each PS execution since
> @@ -4263,7 +4260,6 @@ int pull_out_semijoin_tables(JOIN *join)
> {
> if (tbl->table)
> {
> - tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
> if (tbl->table->map & join->const_table_map)
> {
> pulled_tables |= tbl->table->map;
> @@ -4325,15 +4321,8 @@ int pull_out_semijoin_tables(JOIN *join)
> {
> if (tbl->table)
> {
> - if (inner_tables & tbl->table->map)
> + if (!(inner_tables & tbl->table->map))
this if() and the upper if() can be merged in one with &&
> {
> - /* This table is not pulled out */
> - tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
one less use of tbl->table->reginfo.join_tab, good...
> - }
> - else
> - {
> - /* This table has been pulled out of the semi-join nest */
> - tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
> /*
> Pull the table up in the same way as simplify_joins() does:
> update join_list and embedding pointers but keep next[_local]
> @@ -4920,6 +4909,29 @@ make_join_statistics(JOIN *join, TABLE_L
> if (pull_out_semijoin_tables(join))
> DBUG_RETURN(TRUE);
>
> + /*
> + Set pointer to embedding semijoin nest for all semijoined tables.
> + Note that this must be done for every table inside all semijoin nests,
> + even for tables within outer join nests embedded in semijoin nests.
> + A table can never be part of multiple semijoin nests, hence no
> + ambiguities can ever occur.
> + Note also that the pointer is not set for TABLE_LIST objects that
> + are outer join nests within semijoin nests.
> + */
> + for (s= stat; s < stat_end; s++)
> + {
> + TABLE_LIST *tables= s->table->pos_in_table_list;
> + while (tables->embedding)
> + {
> + if (tables->embedding->sj_on_expr)
> + {
> + s->emb_sj_nest= tables->embedding;
> + break;
> + }
> + tables= tables->embedding;
> + }
this while() could be, at your option:
for(TABLE_LIST *tables= s->table->pos_in_table_list ;
tables->embedding ;
tables= tables->embedding)
{ if() etc }
The entire block could be a function, at your option.
> + }
> +
> join->join_tab=stat;
> join->map2table=stat_ref;
> join->all_tables= table_vector;
By reading code I could not find a case which will break now that we set
emb_sj_nest for nested joins too.
Ok to push.