On Thu, Aug 12, 2010 at 2:15 PM, Roy Lyseng <roy.lyseng@stripped> wrote:
> #At file:///home/rl136806/mysql/repo/mysql-work5/ based on
> revid:guilhem.bichot@stripped
>
> 3228 Roy Lyseng 2010-08-12
> Bug#50489: another segfault in fix_semijoin_strategies...
>
> This particular problem occurs when we try to execute a query multiple
> times with different optimizer switch settings. On first execution,
> semijoin materialization strategy is a candidate and an
> SJ_MATERIALIZATION_INFO object is created. On second execution,
> materialization strategy is not a candidate, but there is a dangling
> pointer to the SJ_MATERIALIZATION_INFO object created in the prior
> execution. The pointer is followed, and we suffer a segfault.
>
> The solution chosen here is to refactor the SJ_MATERIALIZATION_INFO
> class so that optimization information is put into a new struct
> st_semijoin_mat which is embedded in struct st_nested_join.
>
> If semijoin materialization is an allowed strategy and materialization
> is possible for the semijoin in question, the st_semijoin_mat object
> is filled in and an st_position object is created and filled with
> information about the join strategy for the materialization.
>
> Later in the optimization, the materialization strategy is compared
> with other semijoin strategies, and, if selected, an
> SJ_MATERIALIZATION_INFO object is created and attached to the join
> nest
> representing the semijoin operation.
> The object is then filled with data relevant for semijoin execution,
> such as number of tables involved, whether this is a scan, information
> about temporary table for materialization, etc.
>
> NOTE. This bug no longer occurs on the current souce code base.
> It does however fix bug#46744, which is re-introduced by the
> fix for bug#43768, and it should also be useful because of the
> refactoring effect on the semijoin materialization strategy.
>
> mysql-test/r/optimizer_switch.result
> Added results for test case for bug#50489
>
> mysql-test/t/optimizer_switch.test
> Added test case for bug#50489
>
> sql/sql_class.h
> Optimization data are removed from class SJ_MATERIALIZATION_INFO.
>
> sql/sql_select.cc
> optimize_semijoin_nests() will no longer create
> SJ_MATERIALIZATION_INFO objects. Instead it populates the
> st_semijoin_mat struct inside the nested_join object with estimated
> cost for the materialized semijoin.
>
> fix_semijoin_strategies_for_picked_join_order() is given a boolean
> return value. The return value is necessary because the function may
> create SJ_MATERIALIZATION_INFO objects, hence it must be able to
> return error conditions. Notice that these objects are created and
> populated only if a semijoin materialization strategy is actually
> chosen. Reset of sjm.positions is performed in this function.
>
> advance_sj_state() has been slightly simplified: the "emb_sj_nest"
> is now assigned when the function is entered, in addition to changes
> that was necessitated by the splitting up of
> SJ_MATERIALIZATION_INFO.
>
> Reset of the pointer sj_nest->sj_mat_info is added to
> JOIN::destroy().
>
> sql/sql_test.cc
> New interface for function print_sjm, made necessary by the
> splitting
> of SJ_MATERIALIZATION_INFO.
>
> sql/table.h
> Defines struct st_semijoin_mat containing estimated cost for a
> materialized semijoin operation.
> The struct is added as a member in struct st_nested_join.
>
> modified:
> mysql-test/r/optimizer_switch.result
> mysql-test/t/optimizer_switch.test
> sql/sql_class.h
> sql/sql_select.cc
> sql/sql_test.cc
> sql/sql_test.h
> sql/table.h
> === modified file 'mysql-test/r/optimizer_switch.result'
> --- a/mysql-test/r/optimizer_switch.result 2010-06-18 08:45:53 +0000
> +++ b/mysql-test/r/optimizer_switch.result 2010-08-12 12:13:31 +0000
> @@ -138,3 +138,69 @@ drop view v1;
> drop procedure p1;
> set SESSION optimizer_switch='default';
> # End of bug#46744
> +#
> +# Bug#50489: another segfault in fix_semijoin_strategies...
> +#
> +CREATE TABLE it (
> +id INT NOT NULL,
> +expr_key INT NOT NULL,
> +expr_nokey INT NOT NULL,
> +expr_padder INT DEFAULT NULL,
> +KEY expr_key(expr_key)
> +);
> +INSERT INTO it VALUES (135,218264606,218264606,100);
> +INSERT INTO it VALUES (201,810783319,810783319,200);
> +CREATE TABLE ot (
> +id INT NOT NULL,
> +expr_key INT NOT NULL,
> +expr_nokey INT NOT NULL,
> +KEY expr_key(expr_key)
> +);
> +CREATE PROCEDURE run_n_times(x int)
> +BEGIN
> +DECLARE c int;
> +WHILE x DO
> +SET x = x-1;
> +SELECT COUNT(expr_key) INTO c FROM ot
> +WHERE expr_key IN (SELECT expr_nokey FROM it)
> +AND ot.expr_key<100000000;
> +END WHILE;
> +END|
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +SET optimizer_switch="firstmatch=off,materialization=off";
> +call run_n_times(1);
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +DROP PROCEDURE run_n_times;
> +CREATE PROCEDURE run_n_times(x int)
> +BEGIN
> +DECLARE c int;
> +WHILE x DO
> +SET x = x-1;
> +SELECT COUNT(expr_key) INTO c FROM ot
> +WHERE expr_key IN (SELECT expr_nokey FROM it)
> +AND ot.expr_key<100000000;
> +END WHILE;
> +END|
> +SET optimizer_switch="firstmatch=off,materialization=off";
> +call run_n_times(1);
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +DROP PROCEDURE run_n_times;
> +CREATE PROCEDURE run_n_times(x int)
> +BEGIN
> +DECLARE c int;
> +WHILE x DO
> +SET x = x-1;
> +SELECT COUNT(expr_key) INTO c FROM ot
> +WHERE expr_key IN (SELECT expr_nokey FROM it)
> +AND ot.expr_key<100000000;
> +END WHILE;
> +END|
> +SET optimizer_switch="semijoin=off,materialization=off";
> +call run_n_times(1);
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +DROP PROCEDURE run_n_times;
> +DROP TABLE it, ot;
>
> === modified file 'mysql-test/t/optimizer_switch.test'
> --- a/mysql-test/t/optimizer_switch.test 2010-06-18 08:45:53 +0000
> +++ b/mysql-test/t/optimizer_switch.test 2010-08-12 12:13:31 +0000
> @@ -148,3 +148,84 @@ drop procedure p1;
> set SESSION optimizer_switch='default';
>
> --echo # End of bug#46744
> +
> +--echo #
> +--echo # Bug#50489: another segfault in fix_semijoin_strategies...
> +--echo #
> +
> +CREATE TABLE it (
> + id INT NOT NULL,
> + expr_key INT NOT NULL,
> + expr_nokey INT NOT NULL,
> + expr_padder INT DEFAULT NULL,
> + KEY expr_key(expr_key)
> +);
> +INSERT INTO it VALUES (135,218264606,218264606,100);
> +INSERT INTO it VALUES (201,810783319,810783319,200);
> +CREATE TABLE ot (
> + id INT NOT NULL,
> + expr_key INT NOT NULL,
> + expr_nokey INT NOT NULL,
> + KEY expr_key(expr_key)
> +);
>
How about:
delimiter |;
let create_proc=
CREATE PROCEDURE run_n_times(x int)
BEGIN
DECLARE c int;
WHILE x DO
SET x = x-1;
SELECT COUNT(expr_key) INTO c FROM ot
WHERE expr_key IN (SELECT expr_nokey FROM it)
AND ot.expr_key<100000000;
END WHILE;
END|
delimiter ;|
eval $create_proc;
Then do 'eval $create_proc;' below as well, and you don't have to repeat the
same procedure body three times.
> +delimiter |;
> +CREATE PROCEDURE run_n_times(x int)
> +BEGIN
> +DECLARE c int;
> +WHILE x DO
> + SET x = x-1;
> + SELECT COUNT(expr_key) INTO c FROM ot
> + WHERE expr_key IN (SELECT expr_nokey FROM it)
> + AND ot.expr_key<100000000;
> +END WHILE;
> +END|
> +delimiter ;|
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +SET optimizer_switch="firstmatch=off,materialization=off";
> +call run_n_times(1);
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +DROP PROCEDURE run_n_times;
> +
> +# Re-create procedure to avoid caching effects
> +delimiter |;
> +CREATE PROCEDURE run_n_times(x int)
> +BEGIN
> +DECLARE c int;
> +WHILE x DO
> + SET x = x-1;
> + SELECT COUNT(expr_key) INTO c FROM ot
> + WHERE expr_key IN (SELECT expr_nokey FROM it)
> + AND ot.expr_key<100000000;
> +END WHILE;
> +END|
> +delimiter ;|
> +SET optimizer_switch="firstmatch=off,materialization=off";
> +call run_n_times(1);
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +DROP PROCEDURE run_n_times;
> +
> +# Re-create procedure to avoid caching effects
> +delimiter |;
> +CREATE PROCEDURE run_n_times(x int)
> +BEGIN
> +DECLARE c int;
> +WHILE x DO
> + SET x = x-1;
> + SELECT COUNT(expr_key) INTO c FROM ot
> + WHERE expr_key IN (SELECT expr_nokey FROM it)
> + AND ot.expr_key<100000000;
> +END WHILE;
> +END|
> +delimiter ;|
> +SET optimizer_switch="semijoin=off,materialization=off";
> +call run_n_times(1);
> +SET optimizer_switch="default";
> +call run_n_times(1);
> +DROP PROCEDURE run_n_times;
> +
> +DROP TABLE it, ot;
> +
> +# End of Bug#50489
>
> === modified file 'sql/sql_class.h'
> --- a/sql/sql_class.h 2010-07-23 18:14:59 +0000
> +++ b/sql/sql_class.h 2010-08-12 12:13:31 +0000
> @@ -3359,63 +3359,34 @@ public:
> struct st_table_ref;
>
>
> -/*
> - Optimizer and executor structure for the materialized semi-join info.
> This
> - structure contains
> +/**
> + Executor structure for the materialized semi-join info, which contains
> - The sj-materialization temporary table
> - Members needed to make index lookup or a full scan of the temptable.
> */
> class SJ_MATERIALIZATION_INFO : public Sql_alloc
> {
> public:
> - /* Optimal join sub-order */
> - struct st_position *positions;
> -
> uint tables; /* Number of tables in the sj-nest */
>
> - /* Expected #rows in the materialized table */
> - double rows;
> -
> - /*
> - Cost to materialize - execute the sub-join and write rows into
> temp.table
> - */
> - COST_VECT materialization_cost;
> -
> - /* Cost to make one lookup in the temptable */
> - COST_VECT lookup_cost;
> -
> - /* Cost of scanning the materialized table */
> - COST_VECT scan_cost;
> -
> - /* --- Execution structures ---------- */
> -
> - /*
> - TRUE <=> This structure is used for execution. We don't necessarily
> pick
> - sj-materialization, so some of SJ_MATERIALIZATION_INFO structures are
> not
> - used by materialization
> - */
> - bool is_used;
> -
> - bool materialized; /* TRUE <=> materialization already performed */
> + bool materialized; /* TRUE <=> materialization has been performed */
> /*
> TRUE - the temptable is read with full scan
> FALSE - we use the temptable for index lookups
> */
> - bool is_sj_scan;
> + bool is_scan;
>
> /* The temptable and its related info */
> - TMP_TABLE_PARAM sjm_table_param;
> - List<Item> sjm_table_cols;
> + TMP_TABLE_PARAM table_param;
> + List<Item> table_cols;
> TABLE *table;
>
> /* Structure used to make index lookups */
> struct st_table_ref *tab_ref;
> Item *in_equality; /* See create_subquery_equalities() */
> - /* True if data types allow the MaterializeScan semijoin strategy */
> - bool sjm_scan_allowed;
>
> Item *join_cond; /* See comments in make_join_select() */
> - Copy_field *copy_field; /* Needed for SJ_Materialization scan */
> + Copy_field *copy_field; /* Needed for materialization scan */
> };
>
>
>
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2010-08-12 09:02:11 +0000
> +++ b/sql/sql_select.cc 2010-08-12 12:13:31 +0000
> @@ -272,8 +272,7 @@ join_read_record_no_init(JOIN_TAB *tab);
> static
> bool subquery_types_allow_materialization(Item_in_subselect *predicate);
> static
> -bool semijoin_types_allow_materialization(TABLE_LIST *sj_nest,
> - bool *sjm_scan_allowed);
> +bool semijoin_types_allow_materialization(TABLE_LIST *sj_nest);
> static
> bool types_allow_materialization(Item *outer, Item *inner);
> static
> @@ -994,10 +993,10 @@ bool subquery_types_allow_materializatio
>
> /**
> @brief Check if semijoin's compared types allow materialization.
> + Set scan_allowed to TRUE if MaterializeScan strategy allowed.
>
This comment applies to the next [inout] argument.
>
> @param sj_nest Semi-join nest containing information about correlated
> expressions.
> - @param[out] sjm_scan_allowed TRUE if MaterializeScan strategy
> allowed.
>
> @return TRUE if materialization is allowed, FALSE otherwise
>
> @@ -1040,8 +1039,7 @@ bool subquery_types_allow_materializatio
> */
>
> static
> -bool semijoin_types_allow_materialization(TABLE_LIST *sj_nest,
> - bool *sjm_scan_allowed)
> +bool semijoin_types_allow_materialization(TABLE_LIST *sj_nest)
> {
> DBUG_ENTER("semijoin_types_allow_materialization");
>
> @@ -1051,7 +1049,7 @@ bool semijoin_types_allow_materializatio
> List_iterator<Item> it1(sj_nest->nested_join->sj_outer_exprs);
> List_iterator<Item> it2(sj_nest->nested_join->sj_inner_exprs);
>
> - *sjm_scan_allowed= FALSE;
> + sj_nest->nested_join->sjm.scan_allowed= FALSE;
>
> bool all_are_fields= TRUE;
> Item *outer, *inner;
> @@ -1062,7 +1060,7 @@ bool semijoin_types_allow_materializatio
> if (!types_allow_materialization(outer, inner))
> DBUG_RETURN(FALSE);
> }
> - *sjm_scan_allowed= all_are_fields;
> + sj_nest->nested_join->sjm.scan_allowed= all_are_fields;
> DBUG_PRINT("info",("semijoin_types_allow_materialization: ok,
> allowed"));
DBUG_RETURN(TRUE);
> }
> @@ -3250,6 +3248,12 @@ JOIN::destroy()
> if (exec_tmp_table2)
> free_tmp_table(thd, exec_tmp_table2);
> destroy_sj_tmp_tables(this);
> + List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
> + TABLE_LIST *sj_nest;
> + while ((sj_nest= sj_list_it++))
> + {
> + sj_nest->sj_mat_info= NULL;
> + }
> delete_dynamic(&keyuse);
> delete procedure;
> DBUG_RETURN(error);
> @@ -4890,14 +4894,16 @@ static bool optimize_semijoin_nests(JOIN
> DBUG_ENTER("optimize_semijoin_nests");
> List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
> TABLE_LIST *sj_nest;
> - /*
> - The statement may have been executed with 'semijoin=on' earlier.
> - We need to verify that 'semijoin=on' still holds.
> - */
> - if (join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SEMIJOIN) &&
> - join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MATERIALIZATION))
> +
> + while ((sj_nest= sj_list_it++))
> {
> - while ((sj_nest= sj_list_it++))
> + /* As a precaution, reset pointers that were used in prior execution
> */
> + sj_nest->sj_mat_info= NULL;
> + sj_nest->nested_join->sjm.positions= NULL;
> +
> + /* Calculate the cost of materialization if materialization is
> allowed. */
> + if (join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SEMIJOIN)
> &&
> +
> join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MATERIALIZATION))
> {
> /* semi-join nests with only constant tables are not valid */
> DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map);
> @@ -4905,14 +4911,12 @@ static bool optimize_semijoin_nests(JOIN
> Try semijoin materialization if the semijoin is classified as
> non-trivially-correlated.
> */
> - sj_nest->sj_mat_info= NULL;
> if (sj_nest->nested_join->sj_corr_tables)
> continue;
> /*
> Check whether data types allow execution with materialization.
> */
> - bool sjm_scan_allowed;
> - if (semijoin_types_allow_materialization(sj_nest,
> &sjm_scan_allowed))
> + if (semijoin_types_allow_materialization(sj_nest))
> {
> join->emb_sjm_nest= sj_nest;
> if (choose_plan(join, all_table_map & ~join->const_table_map))
> @@ -4921,22 +4925,14 @@ static bool optimize_semijoin_nests(JOIN
> The best plan to run the subquery is now in join->best_positions,
> save it.
> */
> - uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
> - SJ_MATERIALIZATION_INFO* sjm;
> - if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
> - !(sjm->positions=
> (POSITION*)join->thd->alloc(sizeof(POSITION)*
> - n_tables)))
> - DBUG_RETURN(TRUE); /* purecov: inspected */
> - sjm->tables= n_tables;
> - sjm->is_used= FALSE;
> - sjm->sjm_scan_allowed= sjm_scan_allowed;
> -
> + const uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
> double subjoin_out_rows, subjoin_read_time;
> get_partial_join_cost(join, n_tables,
> &subjoin_read_time, &subjoin_out_rows);
>
> - sjm->materialization_cost.convert_from_cost(subjoin_read_time);
> - sjm->rows= subjoin_out_rows;
> + sj_nest->nested_join->sjm.materialization_cost
> + .convert_from_cost(subjoin_read_time);
> + sj_nest->nested_join->sjm.rows= subjoin_out_rows;
>
> List<Item> &inner_expr_list=
> sj_nest->nested_join->sj_inner_exprs;
> /*
> @@ -4955,7 +4951,7 @@ static bool optimize_semijoin_nests(JOIN
> nest (the payoff is probably less here?)
> */
> {
> - for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
> + for (uint i=0 ; i < join->const_tables + n_tables ; i++)
> {
> JOIN_TAB *tab= join->best_positions[i].table;
> join->map2table[tab->table->tablenr]= tab;
> @@ -4971,10 +4967,16 @@ static bool optimize_semijoin_nests(JOIN
> double rows= 1.0;
> while ((tableno = tm_it.next_bit()) !=
> Table_map_iterator::BITMAP_END)
> rows *= join->map2table[tableno]->table->quick_condition_rows;
> - sjm->rows= min(sjm->rows, rows);
> + sj_nest->nested_join->sjm.rows=
> min(sj_nest->nested_join->sjm.rows,
> + rows);
> }
> - memcpy(sjm->positions, join->best_positions + join->const_tables,
> - sizeof(POSITION) * n_tables);
> + if (!(sj_nest->nested_join->sjm.positions=
> +
> (st_position*)join->thd->alloc(sizeof(st_position)*n_tables)))
> + DBUG_RETURN(TRUE);
> +
> + memcpy(sj_nest->nested_join->sjm.positions,
> + join->best_positions + join->const_tables,
> + sizeof(st_position) * n_tables);
>
> /*
> Calculate temporary table parameters and usage costs
> @@ -4990,19 +4992,19 @@ static bool optimize_semijoin_nests(JOIN
> Let materialization cost include the cost to write the data into
> the
> temporary table:
> */
> - sjm->materialization_cost.add_io(subjoin_out_rows, lookup_cost);
> + sj_nest->nested_join->sjm.materialization_cost
> + .add_io(subjoin_out_rows, lookup_cost);
>
> /*
> Set the cost to do a full scan of the temptable (will need this
> to
> consider doing sjm-scan):
> */
> - sjm->scan_cost.zero();
> - if (sjm->rows > 0.0)
> - sjm->scan_cost.add_io(sjm->rows, lookup_cost);
> -
> - sjm->lookup_cost.convert_from_cost(lookup_cost);
> - sj_nest->sj_mat_info= sjm;
> - DBUG_EXECUTE("opt", print_sjm(sjm););
> + sj_nest->nested_join->sjm.scan_cost.zero();
> + if (sj_nest->nested_join->sjm.rows > 0.0)
> + sj_nest->nested_join->sjm.scan_cost
> + .add_io(sj_nest->nested_join->sjm.rows, lookup_cost);
> +
> +
> sj_nest->nested_join->sjm.lookup_cost.convert_from_cost(lookup_cost);
> }
> }
> }
> @@ -7520,7 +7522,7 @@ semijoin_order_allows_materialization(co
> */
> const TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
> if (!emb_sj_nest ||
> - !emb_sj_nest->sj_mat_info ||
> + !emb_sj_nest->nested_join->sjm.positions ||
> (remaining_tables & emb_sj_nest->sj_inner_tables))
> return SJ_OPT_NONE;
>
> @@ -7541,7 +7543,7 @@ semijoin_order_allows_materialization(co
> */
> if (remaining_tables & emb_sj_nest->nested_join->sj_depends_on)
> {
> - if (emb_sj_nest->sj_mat_info->sjm_scan_allowed)
> + if (emb_sj_nest->nested_join->sjm.scan_allowed)
> return SJ_OPT_MATERIALIZE_SCAN;
> return SJ_OPT_NONE;
> }
> @@ -8235,16 +8237,20 @@ prev_record_reads(JOIN *join, uint idx,
> join and semi-join order from left to right.
> */
>
> -static void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
> +static bool fix_semijoin_strategies_for_picked_join_order(JOIN *join)
>
You should say something about @retval above.
> {
> uint table_count=join->tables;
> uint tablenr;
> table_map remaining_tables= 0;
> table_map handled_tabs= 0;
> +
> + DBUG_ENTER("fix_semijoin_strategies_for_picked_join_order");
> +
> for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1;
> tablenr--)
> {
> POSITION *pos= join->best_positions + tablenr;
> JOIN_TAB *s= pos->table;
> + TABLE_LIST *emb_sj_nest= s->emb_sj_nest;
> uint first;
> LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which
> doesn't use it
>
> @@ -8256,9 +8262,13 @@ static void fix_semijoin_strategies_for_
>
> if (pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP)
> {
> - SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info;
> - sjm->is_used= TRUE;
> - sjm->is_sj_scan= FALSE;
> + const uint tables= my_count_bits(emb_sj_nest->sj_inner_tables);
>
s/tables/num_tables/
Similarly below.
> + SJ_MATERIALIZATION_INFO* sjm_exec;
> + if (!(sjm_exec= new SJ_MATERIALIZATION_INFO))
>
if (!(sjm_exec= new (join->thd->mem_root) SJ_MATERIALIZATION_INFO))
so that you don't have to do pthread_getspecific() to find the memroot.
Similarly below.
> + DBUG_RETURN(TRUE);
> + s->emb_sj_nest->sj_mat_info= sjm_exec;
> + sjm_exec->tables= tables;
> + sjm_exec->is_scan= FALSE;
> /*
> This memcpy() copies a partial QEP produced by
> optimize_semijoin_nests() (source) into the final top-level QEP
> @@ -8274,23 +8284,31 @@ static void fix_semijoin_strategies_for_
> setting it to SJ_OPT_NONE). But until then, pos->sj_strategy should
> not be read.
> */
> - memcpy(pos - sjm->tables + 1, sjm->positions,
> - sizeof(POSITION) * sjm->tables);
> - first= tablenr - sjm->tables + 1;
> - join->best_positions[first].n_sj_tables= sjm->tables;
> + memcpy(pos - tables + 1, emb_sj_nest->nested_join->sjm.positions,
> + sizeof(POSITION) * tables);
> + first= tablenr - tables + 1;
> + join->best_positions[first].n_sj_tables= tables;
> join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_LOOKUP;
> +
> + DBUG_EXECUTE("opt", print_sjm(emb_sj_nest););
> }
> else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
> {
> POSITION *first_inner= join->best_positions +
> pos->sjm_scan_last_inner;
> - SJ_MATERIALIZATION_INFO *sjm=
> first_inner->table->emb_sj_nest->sj_mat_info;
> - sjm->is_used= TRUE;
> - sjm->is_sj_scan= TRUE;
> - first= pos->sjm_scan_last_inner - sjm->tables + 1;
> + TABLE_LIST *mat_nest= first_inner->table->emb_sj_nest;
> + const uint tables= my_count_bits(mat_nest->sj_inner_tables);
> + SJ_MATERIALIZATION_INFO* sjm_exec;
> + if (!(sjm_exec= new SJ_MATERIALIZATION_INFO))
> + DBUG_RETURN(TRUE);
> + mat_nest->sj_mat_info= sjm_exec;
> + sjm_exec->tables= tables;
> + sjm_exec->is_scan= TRUE;
> + first= pos->sjm_scan_last_inner - tables + 1;
> memcpy(join->best_positions + first, // stale semijoin strategy here
> too
> - sjm->positions, sizeof(POSITION) * sjm->tables);
> + mat_nest->nested_join->sjm.positions,
> + sizeof(POSITION) * tables);
> join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
> - join->best_positions[first].n_sj_tables= sjm->tables;
> + join->best_positions[first].n_sj_tables= tables;
> /*
> Do what advance_sj_state did: re-run best_access_path for every
> table
> in the [last_inner_table + 1; pos..) range
> @@ -8303,22 +8321,24 @@ static void fix_semijoin_strategies_for_
> prefix_rec_count=
> join->best_positions[first-1].prefix_record_count;
>
> /* Add materialization record count*/
> - prefix_rec_count *= sjm->rows;
> + prefix_rec_count *= mat_nest->nested_join->sjm.rows;
>
> uint i;
> table_map rem_tables= remaining_tables;
> - for (i= tablenr; i != (first + sjm->tables - 1); i--)
> + for (i= tablenr; i != (first + tables - 1); i--)
> rem_tables |= join->best_positions[i].table->table->map;
>
> POSITION dummy;
> join->cur_sj_inner_tables= 0;
> - for (i= first + sjm->tables; i <= tablenr; i++)
> + for (i= first + tables; i <= tablenr; i++)
> {
> best_access_path(join, join->best_positions[i].table, rem_tables,
> i, FALSE,
> prefix_rec_count, join->best_positions + i,
> &dummy);
> prefix_rec_count *= join->best_positions[i].records_read;
> rem_tables &= ~join->best_positions[i].table->table->map;
> }
> +
> + DBUG_EXECUTE("opt", print_sjm(emb_sj_nest););
> }
> else if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
> {
> @@ -8413,6 +8433,13 @@ static void fix_semijoin_strategies_for_
> pos->sj_strategy= SJ_OPT_NONE;
> remaining_tables |= s->table->map;
> }
> +
> + List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
> + TABLE_LIST *sj_nest;
> + while ((sj_nest= sj_list_it++))
> + sj_nest->nested_join->sjm.positions= NULL;
> +
> + DBUG_RETURN(FALSE);
> }
>
>
> @@ -8426,8 +8453,8 @@ static void fix_semijoin_strategies_for_
>
> DESCRIPTION
> Setup join structures according the picked join order
> - - finalize semi-join strategy choices (see
> - fix_semijoin_strategies_for_picked_join_order)
> + - finalize semi-join strategy choices
> + (see fix_semijoin_strategies_for_picked_join_order)
> - create join->join_tab array and put there the JOIN_TABs in the join
> order
> - create data structures describing ref access methods.
>
> @@ -8454,7 +8481,8 @@ static bool get_best_combination(JOIN *j
>
> used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read
>
> - fix_semijoin_strategies_for_picked_join_order(join);
> + if (fix_semijoin_strategies_for_picked_join_order(join))
> + DBUG_RETURN(TRUE);
>
> for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
> {
> @@ -9232,8 +9260,8 @@ static bool make_join_select(JOIN *join,
> - If we're looking at the first SJM table, reset used_tables
> to refer to only allowed tables
> */
> - if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info
> &&
> - tab->emb_sj_nest->sj_mat_info->is_used &&
> + if (tab->emb_sj_nest &&
> + tab->emb_sj_nest->sj_mat_info &&
> !(used_tables & tab->emb_sj_nest->sj_inner_tables))
> {
> save_used_tables= used_tables;
> @@ -10316,14 +10344,14 @@ end_sj_materialize(JOIN *join, JOIN_TAB
> {
> TABLE *table= sjm->table;
>
> - List_iterator<Item> it(sjm->sjm_table_cols);
> + List_iterator<Item> it(sjm->table_cols);
> Item *item;
> while ((item= it++))
> {
> if (item->is_null())
> DBUG_RETURN(NESTED_LOOP_OK);
> }
> - fill_record(thd, table->field, sjm->sjm_table_cols, 1);
> + fill_record(thd, table->field, sjm->table_cols, 1);
> if (thd->is_error())
> DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
> if ((error= table->file->ha_write_row(table->record[0])))
> @@ -10331,8 +10359,8 @@ end_sj_materialize(JOIN *join, JOIN_TAB
> /* create_myisam_from_heap will generate error if needed */
> if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
> create_myisam_from_heap(thd, table,
> - sjm->sjm_table_param.start_recinfo,
> - &sjm->sjm_table_param.recinfo, error,
> + sjm->table_param.start_recinfo,
> + &sjm->table_param.recinfo, error,
> TRUE, NULL))
> DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
> }
> @@ -10451,16 +10479,16 @@ bool setup_sj_materialization(JOIN_TAB *
> /*
> Set up the table to write to, do as select_union::create_result_table
> does
> */
> - sjm->sjm_table_param.init();
> - sjm->sjm_table_param.field_count= item_list.elements;
> - sjm->sjm_table_param.bit_fields_as_long= TRUE;
> + sjm->table_param.init();
> + sjm->table_param.field_count= item_list.elements;
> + sjm->table_param.bit_fields_as_long= TRUE;
> List_iterator<Item> it(item_list);
> Item *right_expr;
> while((right_expr= it++))
> - sjm->sjm_table_cols.push_back(right_expr);
> + sjm->table_cols.push_back(right_expr);
>
> - if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param,
> - sjm->sjm_table_cols, (ORDER*) 0,
> + if (!(sjm->table= create_tmp_table(thd, &sjm->table_param,
> + sjm->table_cols, (ORDER*) 0,
> TRUE /* distinct */,
> 1, /*save_sum_fields*/
> thd->variables.option_bits |
> TMP_TABLE_ALL_COLUMNS,
> @@ -10473,7 +10501,7 @@ bool setup_sj_materialization(JOIN_TAB *
> tab->join->sjm_info_list.push_back(sjm);
>
> sjm->materialized= FALSE;
> - if (!sjm->is_sj_scan)
> + if (!sjm->is_scan)
> {
> KEY *tmp_key; /* The only index on the temporary table. */
> uint tmp_key_parts; /* Number of keyparts in tmp_key. */
> @@ -10573,9 +10601,9 @@ bool setup_sj_materialization(JOIN_TAB *
> temptable record, we copy its columns to their corresponding columns
> in the record buffers for the source tables.
> */
> - sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements];
> + sjm->copy_field= new Copy_field[sjm->table_cols.elements];
> it.rewind();
> - for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
> + for (uint i=0; i < sjm->table_cols.elements; i++)
> {
> bool dummy;
> Item_equal *item_eq;
> @@ -12364,8 +12392,7 @@ Item *eliminate_item_equal(Item *cond, C
> */
> TABLE_LIST *emb_nest=
> item_field->field->table->pos_in_table_list->embedding;
> - if (!item_const && emb_nest && emb_nest->sj_mat_info
> &&
> - emb_nest->sj_mat_info->is_used)
> + if (!item_const && emb_nest && emb_nest->sj_mat_info)
> {
> /*
> Find the first equal expression that refers to a table that is
> @@ -13454,7 +13481,7 @@ void advance_sj_state(JOIN *join, table_
> double *current_record_count, double
> *current_read_time,
> POSITION *loose_scan_pos)
> {
> - TABLE_LIST *emb_sj_nest;
> + TABLE_LIST *emb_sj_nest= new_join_tab->emb_sj_nest;
> POSITION *pos= join->positions + idx;
> remaining_tables &= ~new_join_tab->table->map;
>
> @@ -13504,13 +13531,11 @@ void advance_sj_state(JOIN *join, table_
>
> table_map handled_by_fm_or_ls= 0;
> /* FirstMatch Strategy */
> - if (new_join_tab->emb_sj_nest &&
> + if (emb_sj_nest &&
> join->thd->optimizer_switch_flag(OPTIMIZER_SWITCH_FIRSTMATCH))
> {
> - const table_map outer_corr_tables=
> - new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
> - const table_map sj_inner_tables=
> - new_join_tab->emb_sj_nest->sj_inner_tables;
> + const table_map outer_corr_tables=
> emb_sj_nest->nested_join->sj_depends_on;
> + const table_map sj_inner_tables= emb_sj_nest->sj_inner_tables;
> /*
> Enter condition:
> 1. The next join tab belongs to semi-join nest
> @@ -13595,7 +13620,7 @@ void advance_sj_state(JOIN *join, table_
> */
> if ((pos->first_loosescan_table != MAX_TABLES) && // (1)
> (first->table->emb_sj_nest->sj_inner_tables & remaining_tables)
> &&
> //(2)
> - new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
> + emb_sj_nest != first->table->emb_sj_nest) //(2)
> {
> pos->first_loosescan_table= MAX_TABLES;
> }
> @@ -13607,9 +13632,8 @@ void advance_sj_state(JOIN *join, table_
> if (loose_scan_pos->read_time != DBL_MAX)
> {
> pos->first_loosescan_table= idx;
> - pos->loosescan_need_tables=
> - new_join_tab->emb_sj_nest->sj_inner_tables |
> - new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
> + pos->loosescan_need_tables= emb_sj_nest->sj_inner_tables |
> +
> emb_sj_nest->nested_join->sj_depends_on;
> }
>
> if ((pos->first_loosescan_table != MAX_TABLES) &&
> @@ -13658,7 +13682,7 @@ void advance_sj_state(JOIN *join, table_
> Update join->cur_sj_inner_tables (Used by FirstMatch in this function
> and
> LooseScan detector in best_access_path)
> */
> - if ((emb_sj_nest= new_join_tab->emb_sj_nest))
> + if (emb_sj_nest)
> {
> join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
> pos->dups_producing_tables |= emb_sj_nest->sj_inner_tables;
> @@ -13695,16 +13719,15 @@ void advance_sj_state(JOIN *join, table_
> we reach the point #2.
> */
> pos->sjm_scan_need_tables=
> - new_join_tab->emb_sj_nest->sj_inner_tables |
> - new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
> - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
> + emb_sj_nest->sj_inner_tables |
> + emb_sj_nest->nested_join->sj_depends_on |
> + emb_sj_nest->nested_join->sj_corr_tables;
> pos->sjm_scan_last_inner= idx;
> }
> else if (sjm_strategy == SJ_OPT_MATERIALIZE_LOOKUP)
> {
> COST_VECT prefix_cost;
> - SJ_MATERIALIZATION_INFO *mat_info= emb_sj_nest->sj_mat_info;
> - signed int first_tab= (int)idx - mat_info->tables;
> + int first_tab= (int)idx - my_count_bits(emb_sj_nest->sj_inner_tables);
> double prefix_rec_count;
> if (first_tab < (int)join->const_tables)
> {
> @@ -13718,8 +13741,9 @@ void advance_sj_state(JOIN *join, table_
> }
>
> double mat_read_time= prefix_cost.total_cost();
> - mat_read_time += mat_info->materialization_cost.total_cost() +
> - prefix_rec_count *
> mat_info->lookup_cost.total_cost();
> + mat_read_time +=
> + emb_sj_nest->nested_join->sjm.materialization_cost.total_cost() +
> + prefix_rec_count *
> emb_sj_nest->nested_join->sjm.lookup_cost.total_cost();
>
> if (mat_read_time < *current_read_time || pos->dups_producing_tables)
> {
> @@ -13732,8 +13756,7 @@ void advance_sj_state(JOIN *join, table_
> pos->sj_strategy= SJ_OPT_MATERIALIZE_LOOKUP;
> *current_read_time= mat_read_time;
> *current_record_count= prefix_rec_count;
> - pos->dups_producing_tables &=
> - ~new_join_tab->emb_sj_nest->sj_inner_tables;
> + pos->dups_producing_tables &= ~emb_sj_nest->sj_inner_tables;
> }
> }
>
> @@ -13743,11 +13766,11 @@ void advance_sj_state(JOIN *join, table_
> {
> TABLE_LIST *mat_nest=
> join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
> - SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info;
> + const uint tables= my_count_bits(mat_nest->sj_inner_tables);
>
> double prefix_cost;
> double prefix_rec_count;
> - int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables;
> + int first_tab= pos->sjm_scan_last_inner + 1 - tables;
> /* Get the prefix cost */
> if (first_tab == (int)join->const_tables)
> {
> @@ -13761,18 +13784,19 @@ void advance_sj_state(JOIN *join, table_
> }
>
> /* Add materialization cost */
> - prefix_cost += mat_info->materialization_cost.total_cost() +
> - prefix_rec_count * mat_info->scan_cost.total_cost();
> - prefix_rec_count *= mat_info->rows;
> + prefix_cost+=
> + mat_nest->nested_join->sjm.materialization_cost.total_cost() +
> + prefix_rec_count *
> mat_nest->nested_join->sjm.scan_cost.total_cost();
> + prefix_rec_count*= mat_nest->nested_join->sjm.rows;
>
> uint i;
> table_map rem_tables= remaining_tables;
> - for (i= idx; i != (first_tab + mat_info->tables - 1); i--)
> + for (i= idx; i != (first_tab + tables - 1); i--)
> rem_tables |= join->positions[i].table->table->map;
>
> POSITION curpos, dummy;
> /* Need to re-run best-access-path as we prefix_rec_count has changed
> */
> - for (i= first_tab + mat_info->tables; i <= idx; i++)
> + for (i= first_tab + tables; i <= idx; i++)
> {
> best_access_path(join, join->positions[i].table, rem_tables, i,
> FALSE,
> prefix_rec_count, &curpos, &dummy);
> @@ -13804,14 +13828,13 @@ void advance_sj_state(JOIN *join, table_
> Duplicate weedout can be applied after all ON-correlated and
> correlated
> */
> - TABLE_LIST *nest;
> - if ((nest= new_join_tab->emb_sj_nest))
> + if (emb_sj_nest)
> {
> if (!pos->dupsweedout_tables)
> pos->first_dupsweedout_table= idx;
>
> - pos->dupsweedout_tables |= nest->sj_inner_tables |
> - nest->nested_join->sj_depends_on;
> + pos->dupsweedout_tables|= emb_sj_nest->sj_inner_tables |
> + emb_sj_nest->nested_join->sj_depends_on;
> }
>
> if (pos->dupsweedout_tables &&
> @@ -16742,7 +16765,7 @@ sub_select_sjm(JOIN *join, JOIN_TAB *joi
> */
> sjm->materialized= TRUE;
> join_tab->read_record.read_record= join_no_more_records;
> - if (sjm->is_sj_scan)
> + if (sjm->is_scan)
> {
> /* Initialize full scan */
> JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
> @@ -16753,12 +16776,12 @@ sub_select_sjm(JOIN *join, JOIN_TAB *joi
> last_tab->read_first_record= join_read_record_no_init;
> last_tab->read_record.copy_field= sjm->copy_field;
> last_tab->read_record.copy_field_end= sjm->copy_field +
> - sjm->sjm_table_cols.elements;
> + sjm->table_cols.elements;
> last_tab->read_record.read_record= rr_sequential_and_unpack;
> }
> }
>
> - if (sjm->is_sj_scan)
> + if (sjm->is_scan)
> {
> /* Do full scan of the materialized table */
> JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
>
> === modified file 'sql/sql_test.cc'
> --- a/sql/sql_test.cc 2010-07-13 17:29:44 +0000
> +++ b/sql/sql_test.cc 2010-08-12 12:13:31 +0000
> @@ -377,21 +377,22 @@ print_plan(JOIN* join, uint idx, double
> }
>
>
> -void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
> +void print_sjm(TABLE_LIST *emb_sj_nest)
> {
> DBUG_LOCK_FILE;
> + SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
> fprintf(DBUG_FILE, "\nsemi-join nest{\n");
> fprintf(DBUG_FILE, " tables { \n");
> for (uint i= 0;i < sjm->tables; i++)
> {
> fprintf(DBUG_FILE, " %s%s\n",
> - sjm->positions[i].table->table->alias,
> +
> emb_sj_nest->nested_join->sjm.positions[i].table->table->alias,
> (i == sjm->tables -1)? "": ",");
> }
> fprintf(DBUG_FILE, " }\n");
> fprintf(DBUG_FILE, " materialize_cost= %g\n",
> - sjm->materialization_cost.total_cost());
> - fprintf(DBUG_FILE, " rows= %g\n", sjm->rows);
> +
> emb_sj_nest->nested_join->sjm.materialization_cost.total_cost());
> + fprintf(DBUG_FILE, " rows= %g\n", emb_sj_nest->nested_join->sjm.rows);
> fprintf(DBUG_FILE, "}\n");
> DBUG_UNLOCK_FILE;
> }
>
> === modified file 'sql/sql_test.h'
> --- a/sql/sql_test.h 2010-07-13 17:29:44 +0000
> +++ b/sql/sql_test.h 2010-08-12 12:13:31 +0000
> @@ -30,7 +30,7 @@ void TEST_join(JOIN *join);
> void print_plan(JOIN* join,uint idx, double record_count, double
> read_time,
> double current_read_time, const char *info);
> void dump_TABLE_LIST_graph(SELECT_LEX *select_lex, TABLE_LIST* tl);
> -void print_sjm(SJ_MATERIALIZATION_INFO *sjm);
> +void print_sjm(TABLE_LIST *emb_sj_nest);
> void print_keyuse_array(DYNAMIC_ARRAY *keyuse_array);
> #endif
> void mysql_print_status();
>
> === modified file 'sql/table.h'
> --- a/sql/table.h 2010-07-23 18:14:59 +0000
> +++ b/sql/table.h 2010-08-12 12:13:31 +0000
> @@ -1939,6 +1939,27 @@ public:
> Natural_join_column *get_natural_column_ref();
> };
>
> +/**
> + st_semijoin_mat collects data used when calculating the cost of
> + executing a semijoin operation using a materialization strategy.
> + It is used during optimization phase only.
> +*/
> +
> +typedef struct st_semijoin_mat
>
No typedef necessary (see new style guidelines)
How about 'class Semijoin_mat'
> +{
> + /* Optimal join order calculated for inner tables of this semijoin op.
> */
> + struct st_position *positions;
> + /* True if data types allow the MaterializeScan semijoin strategy */
> + bool scan_allowed;
> + /* Expected #rows in the materialized table */
> + double rows;
>
double num_expected_rows;
> + /* Materialization cost - execute sub-join and write rows to temp.table
> */
> + COST_VECT materialization_cost;
> + /* Cost to make one lookup in the temptable */
> + COST_VECT lookup_cost;
> + /* Cost of scanning the materialized table */
> + COST_VECT scan_cost;
> +} st_semijoin_mat;
>
> typedef struct st_nested_join
> {
> @@ -1971,6 +1992,7 @@ typedef struct st_nested_join
> of the semi-join, respectively.
> */
> List<Item> sj_outer_exprs, sj_inner_exprs;
> + st_semijoin_mat sjm;
> /**
> True if this join nest node is completely covered by the query
> execution
> plan. This means two things.
>
>
>
> --
> MySQL Code Commits Mailing List
> For list archives: http://lists.mysql.com/commits
> To unsubscribe:
> http://lists.mysql.com/commits?unsub=1
>