#At file:///home/rl136806/mysql/repo/mysql-work5/ based on revid:roy.lyseng@stripped
3348 Roy Lyseng 2011-03-15
Bug#11822517: Refactor call interface of choose_plan()
The main purposes of this patch is
1. Make the interface of choose_plan() and greedy_search() simpler.
2. Prepare for a simpler test for dependencies when doing
semi join materialization together with outer join (WL#5561).
The fix involves replacing the join_tables argument to choose_plan()
with a join nest pointer, which is NULL when the entire query will
be optimized. Thus, the set of relevant tables is instead calculated
inside this function.
We also clarify that we need to pass as remaining_tables to
best_access_path() not only the tables remaining to be optimized,
but also all tables outside of the current semi join nest.
sql/sql_select.cc
optimize_semijoin_nests() is now taking the map of all query tables
from the passed JOIN object instead of an argument.
best_access_path() - meaning of argument remaining_tables is changed so
that it actually is interpreted as all tables not added to the current
partial plan.
choose_plan() - argument join_tables is replaced with sjm_nest -
pointer to semi join nest to generate a plan for. The tables to include
in the plan is calculated inside the function instead.
greedy_search() - now calling best_access_path() with all tables not
included in the current partial plan.
Not calling advance_sj_state() when calculating plan for a materialized
semi join nest.
sql/sql_select.h
Added all_table_map (map of tables in query) to JOIN object.
Commented const_table_map, found_const_table_map and outer_join
modified:
sql/sql_select.cc
sql/sql_select.h
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2011-03-03 09:43:14 +0000
+++ b/sql/sql_select.cc 2011-03-15 13:13:54 +0000
@@ -66,7 +66,7 @@ struct st_sargable_param;
static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, Item *conds,
DYNAMIC_ARRAY *keyuse);
-static bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
+static bool optimize_semijoin_nests(JOIN *join);
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
JOIN_TAB *join_tab,
uint tables, Item *conds,
@@ -77,7 +77,7 @@ static int sort_keyuse(Key_use *a, Key_u
static void set_position(JOIN *join, uint index, JOIN_TAB *table, Key_use *key);
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
table_map used_tables);
-static bool choose_plan(JOIN *join, table_map join_tables);
+static bool choose_plan(JOIN *join, TABLE_LIST *sjm_nest);
static void best_access_path(JOIN *join, JOIN_TAB *s,
table_map remaining_tables, uint idx,
bool disable_jbuf, double record_count,
@@ -4609,7 +4609,7 @@ make_join_statistics(JOIN *join, TABLE_L
TABLE *table;
TABLE_LIST *tables= tables_arg;
uint i,table_count,const_count,key;
- table_map found_const_table_map, all_table_map, found_ref, refs;
+ table_map found_const_table_map, found_ref, refs;
TABLE **table_vector;
JOIN_TAB *stat,*stat_end,*s,**stat_ref;
Key_use *keyuse, *start_keyuse;
@@ -4636,7 +4636,8 @@ make_join_statistics(JOIN *join, TABLE_L
join->best_ref=stat_vector;
stat_end=stat+table_count;
- found_const_table_map= all_table_map=0;
+ found_const_table_map=0;
+ join->all_table_map= 0;
const_count=0;
for (s= stat, i= 0;
@@ -4656,7 +4657,7 @@ make_join_statistics(JOIN *join, TABLE_L
table->reginfo.join_tab=s;
table->reginfo.not_exists_optimize=0;
bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
- all_table_map|= table->map;
+ join->all_table_map|= table->map;
s->join=join;
s->dependent= tables->dep_tables;
@@ -5082,13 +5083,13 @@ make_join_statistics(JOIN *join, TABLE_L
if (join->const_tables != join->tables)
optimize_keyuse(join, keyuse_array);
- if (optimize_semijoin_nests(join, all_table_map))
+ if (optimize_semijoin_nests(join))
DBUG_RETURN(TRUE); /* purecov: inspected */
/* Find an optimal join order of the non-constant tables. */
if (join->const_tables != join->tables)
{
- if (choose_plan(join, all_table_map & ~join->const_table_map))
+ if (choose_plan(join, NULL))
goto error;
}
else
@@ -5120,7 +5121,6 @@ error:
SYNOPSIS
optimize_semijoin_nests()
join The join to optimize semi-join nests for
- all_table_map Bitmap of all tables in the join
DESCRIPTION
Optimize each of the semi-join nests that can be run with
@@ -5139,7 +5139,7 @@ error:
TRUE Out of memory error
*/
-static bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
+static bool optimize_semijoin_nests(JOIN *join)
{
DBUG_ENTER("optimize_semijoin_nests");
List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
@@ -5168,8 +5168,7 @@ static bool optimize_semijoin_nests(JOIN
*/
if (semijoin_types_allow_materialization(sj_nest))
{
- join->emb_sjm_nest= sj_nest;
- if (choose_plan(join, all_table_map & ~join->const_table_map))
+ if (choose_plan(join, sj_nest))
DBUG_RETURN(TRUE); /* purecov: inspected */
/*
The best plan to run the subquery is now in join->best_positions,
@@ -5258,7 +5257,6 @@ static bool optimize_semijoin_nests(JOIN
}
}
}
- join->emb_sjm_nest= NULL;
DBUG_RETURN(FALSE);
}
@@ -6927,11 +6925,18 @@ public:
'join->positions' of length 'idx'. The chosen access method for 's' and its
cost are stored in 'join->positions[idx]'.
+ Notice also that when calculating a plan for a materialized semi join nest,
+ this function must not consider key references between tables inside the
+ semi join nest and those outside of it. The way to control this is to add
+ the set of outside tables to the @c remaining_tables argument.
+
@param join pointer to the structure providing all context info
for the query
@param s the table to be joined by the function
@param thd thread for the connection that submitted the query
- @param remaining_tables set of tables not included into the partial plan yet
+ @param remaining_tables set of tables not included into the partial plan yet.
+ also tables outside of the join nest when calculating
+ a materialized semi join nest.
@param idx the length of the partial plan
@param disable_jbuf TRUE<=> Don't use join buffering
@param record_count estimate for the number of records returned by the
@@ -7526,10 +7531,15 @@ best_access_path(JOIN *join,
it. Each specific optimization procedure stores the final optimal plan in
the array 'join->best_positions', and the cost of the plan in
'join->best_read'.
-
- @param join pointer to the structure providing all context info for
- the query
- @param join_tables set of the tables in the query
+ The function can be invoked to produce a plan for all tables in the query
+ (in this case, the const tables are usually filtered out), or it can be
+ invoked to produce a plan for a materialization of a semi join nest.
+ Call with non-NULL sjm_nest pointer when producing a plan for a semi join
+ nest to be materialized and a NULL pointer when producing a full query plan.
+
+ @param join pointer to structure providing all context info for query
+ @param sjm_nest semi join nest that we are producing plan for, or NULL
+ which means produce plan for full query.
@retval
FALSE ok
@@ -7537,24 +7547,31 @@ best_access_path(JOIN *join,
TRUE Fatal error
*/
-static bool
-choose_plan(JOIN *join, table_map join_tables)
+static bool choose_plan(JOIN *join, TABLE_LIST *sjm_nest)
{
uint search_depth= join->thd->variables.optimizer_search_depth;
uint prune_level= join->thd->variables.optimizer_prune_level;
bool straight_join= test(join->select_options & SELECT_STRAIGHT_JOIN);
+ table_map join_tables;
DBUG_ENTER("choose_plan");
+ join->emb_sjm_nest= sjm_nest;
join->cur_embedding_map= 0;
reset_nj_counters(join->join_list);
qsort2_cmp jtab_sort_func;
- if (join->emb_sjm_nest)
+ if (sjm_nest)
{
/* We're optimizing semi-join materialization nest, so put the
tables from this semi-join as first
*/
jtab_sort_func= join_tab_cmp_embedded_first;
+ join_tables= sjm_nest->sj_inner_tables;
+ /*
+ Inner tables of semi join nest are not allowed to be identified as
+ const tables.
+ */
+ DBUG_ASSERT(join_tables == (join_tables & ~join->const_table_map));
}
else
{
@@ -7567,6 +7584,7 @@ choose_plan(JOIN *join, table_map join_t
records accessed.
*/
jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+ join_tables= join->all_table_map & ~join->const_table_map;
}
my_qsort2(join->best_ref + join->const_tables,
join->tables - join->const_tables, sizeof(JOIN_TAB*),
@@ -7594,6 +7612,9 @@ choose_plan(JOIN *join, table_map join_t
*/
if (join->thd->lex->is_single_level_stmt())
join->thd->status_var.last_query_cost= join->best_read;
+
+ join->emb_sjm_nest= NULL;
+
DBUG_RETURN(FALSE);
}
@@ -7986,16 +8007,14 @@ greedy_search(JOIN *join,
DBUG_ENTER("greedy_search");
/* number of tables that remain to be optimized */
- n_tables= size_remain= my_count_bits(remaining_tables &
- (join->emb_sjm_nest?
- join->emb_sjm_nest->sj_inner_tables :
- ~(table_map)0));
+ n_tables= size_remain= my_count_bits(remaining_tables);
do {
/* Find the extension of the current QEP with the lowest cost */
join->best_read= DBL_MAX;
- if (best_extension_by_limited_search(join, remaining_tables, idx, record_count,
- read_time, search_depth, prune_level))
+ if (best_extension_by_limited_search(join, remaining_tables, idx,
+ record_count, read_time,
+ search_depth, prune_level))
DBUG_RETURN(TRUE);
/*
'best_read < DBL_MAX' means that optimizer managed to find
@@ -8242,18 +8261,25 @@ best_extension_by_limited_search(JOIN
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
"part_plan"););
+ /*
+ When calculating a plan for a materialized semi join nest,
+ best_access_plan() needs to know not only the remaining tables within the
+ join nest, but also all tables outside of this nest, because there may
+ be key references between the semi join nest and the outside tables
+ that should not be considered when materializing the semi join nest.
+ */
+ const table_map excluded_tables=
+ join->emb_sjm_nest ?
+ join->all_table_map & ~join->emb_sjm_nest->sj_inner_tables :
+ 0;
- table_map allowed_tables= ~(table_map)0;
- if (join->emb_sjm_nest)
- allowed_tables= join->emb_sjm_nest->sj_inner_tables;
-
- bool has_sj= !join->select_lex->sj_nests.is_empty();
+ const bool has_sj=
+ !(join->select_lex->sj_nests.is_empty() || join->emb_sjm_nest);
for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
{
table_map real_table_bit= s->table->map;
if ((remaining_tables & real_table_bit) &&
- (allowed_tables & real_table_bit) &&
!(remaining_tables & s->dependent) &&
(!idx || !check_interleaving_with_nj(s)))
{
@@ -8262,7 +8288,8 @@ best_extension_by_limited_search(JOIN
/* Find the best access method from 's' to the current partial plan */
POSITION loose_scan_pos;
- best_access_path(join, s, remaining_tables, idx, FALSE, record_count,
+ best_access_path(join, s, excluded_tables | remaining_tables,
+ idx, FALSE, record_count,
join->positions + idx, &loose_scan_pos);
/* Compute the cost of extending the plan with 's' */
@@ -8277,6 +8304,8 @@ best_extension_by_limited_search(JOIN
cost (takes 9% of time in a 20-table plan search), hence the if()
above, which is also more efficient than the same if() inside
advance_sj_state() would be.
+ Besides, never call advance_sj_state() when calculating the plan
+ for a materialized semi join nest.
*/
advance_sj_state(join, remaining_tables, s, idx,
¤t_record_count, ¤t_read_time,
@@ -8333,7 +8362,7 @@ best_extension_by_limited_search(JOIN
}
}
- if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
+ if (search_depth > 1 && (remaining_tables & ~real_table_bit))
{ /* Recursively expand the current partial plan */
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
if (best_extension_by_limited_search(join,
@@ -13932,6 +13961,15 @@ void advance_sj_state(JOIN *join, table_
TABLE_LIST *emb_sj_nest= new_join_tab->emb_sj_nest;
POSITION *pos= join->positions + idx;
+ /*
+ Semi join nests cannot be nested, hence we never need to advance the
+ semi join state of a materialized semi join query.
+ In fact, doing this may cause undesirable effects because all tables
+ within a semi join nest have emb_sj_nest |= NULL, which triggers several
+ of the actions inside this function.
+ */
+ DBUG_ASSERT(join->emb_sjm_nest == NULL);
+
/* Add this table to the join prefix */
remaining_tables &= ~new_join_tab->table->map;
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2011-03-01 14:57:53 +0000
+++ b/sql/sql_select.h 2011-03-15 13:13:54 +0000
@@ -1640,11 +1640,10 @@ public:
bool first_record,full_join, no_field_update;
bool group; /**< If query contains GROUP BY clause */
bool do_send_rows;
- table_map const_table_map,found_const_table_map;
- /*
- Bitmap of all inner tables from outer joins
- */
- table_map outer_join;
+ table_map all_table_map; /**< Set of tables contained in query */
+ table_map const_table_map; /**< Set of tables found to be const */
+ table_map found_const_table_map; /**< Tables that are const and non-empty */
+ table_map outer_join; /**< Bitmap of all inner tables from outer joins */
/* Number of records produced after join + group operation */
ha_rows send_records;
ha_rows found_records,examined_rows,row_limit;
Attachment: [text/bzr-bundle] bzr/roy.lyseng@oracle.com-20110315131354-nsw9y9qpc4t2y4dm.bundle