2715 Sergey Petrunia 2008-11-09
WL#3985: Subqueries: smart choice between semi-join and materialization
- More comments
- Code cleanup
modified:
sql/sql_select.cc
2714 Sergey Petrunia 2008-11-09
WL#3985: Subqueries: smart choice between semi-join and materialization
- More comments
- Code cleanup
modified:
sql/mysql_priv.h
sql/sql_select.cc
sql/sql_select.h
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2008-11-09 17:13:57 +0000
+++ b/sql/sql_select.cc 2008-11-09 19:39:26 +0000
@@ -1910,10 +1910,6 @@ JOIN::optimize()
(select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
(select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0);
- if (!select_lex->sj_nests.is_empty())
- setup_semijoin_dups_elimination(this, select_opts_for_readinfo,
- no_jbuf_after);
-
// No cache for MATCH == 'Don't use join buffering when we use MATCH'.
if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
DBUG_RETURN(1);
@@ -9619,6 +9615,10 @@ make_join_readinfo(JOIN *join, ulonglong
bool sorted= 1;
DBUG_ENTER("make_join_readinfo");
+ if (!join->select_lex->sj_nests.is_empty() &&
+ setup_semijoin_dups_elimination(join, options, no_jbuf_after))
+ DBUG_RETURN(TRUE);
+
for (i=join->const_tables ; i < join->tables ; i++)
{
JOIN_TAB *tab=join->join_tab+i;
@@ -12485,15 +12485,26 @@ void advance_sj_state(JOIN *join, table_
/* LooseScan Strategy */
{
POSITION *first=join->positions+pos->first_loosescan_table;
- /* Check that there's no interleaving w/ other tables */
- if ((pos->first_loosescan_table != MAX_TABLES) &&
- (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) &&
- s->emb_sj_nest != first->table->emb_sj_nest)
+ /*
+ LooseScan strategy can't handle interleaving between tables from the
+ semi-join that LooseScan is handling and any other tables.
+
+ If we were considering LooseScan for the join prefix (1)
+ and the table we're adding creates an interleaving (2)
+ then
+ stop considering loose scan
+ */
+ if ((pos->first_loosescan_table != MAX_TABLES) && // (1)
+ (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
+ s->emb_sj_nest != first->table->emb_sj_nest) //(2)
{
pos->first_loosescan_table= MAX_TABLES;
}
- /* Start loosescan nest if we're at the first LooseScan table */
+ /*
+ If we got an option to use LooseScan for the current table, start
+ considering LooseScan strateg
+ */
if (loose_scan_pos->read_time != DBL_MAX)
{
pos->first_loosescan_table= idx;
@@ -12502,19 +12513,18 @@ void advance_sj_state(JOIN *join, table_
s->emb_sj_nest->nested_join->sj_corr_tables;
}
- /*
- Ok have put all loose scan's inner and outer correlated tables into the
- prefix.
- */
if ((pos->first_loosescan_table != MAX_TABLES) &&
!(remaining_tables & pos->loosescan_need_tables))
{
- /* At the last LooseScan table */
+ /*
+ Ok we have LooseScan plan and also have all LooseScan sj-nest's
+ inner tables and outer correlated tables into the prefix.
+ */
+
first=join->positions + pos->first_loosescan_table;
uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables);
/* Got a complete LooseScan range. Calculate its cost */
double reopt_cost, reopt_rec_count, sj_inner_fanout;
-
/*
The same problem as with FirstMatch - we need to save POSITIONs
somewhere but reserving space for all cases would require too
@@ -12527,7 +12537,10 @@ void advance_sj_state(JOIN *join, table_
&reopt_rec_count,
&reopt_cost, &sj_inner_fanout);
/*
- We don't yet know what are the other strategies, so pick the
+ We don't yet have any other strategies that could handle this
+ semi-join nest (the other options are Duplicate Elimination or
+ Materialization, which need at least the same set of tables in
+ the join prefix to be considered) so unconditionally pick the
LooseScan.
*/
pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
@@ -12604,7 +12617,7 @@ void advance_sj_state(JOIN *join, table_
mat_read_time += mat_info->materialization_cost.total_cost() +
prefix_rec_count * mat_info->lookup_cost.total_cost();
- if (mat_read_time < *current_read_time)
+ if (mat_read_time < *current_read_time || join->cur_dups_producing_tables)
{
/*
NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
@@ -12663,6 +12676,14 @@ void advance_sj_state(JOIN *join, table_
prefix_cost += curpos.read_time;
}
+ /*
+ Use the strategy if
+ * it is cheaper then what we've had, or
+ * we haven't picked any other semi-join strategy yet
+ In the second case, we pick this strategy unconditionally because
+ comparing cost without semi-join duplicate removal with cost with
+ duplicate removal is not an apples-to-apples comparison.
+ */
if (prefix_cost < *current_read_time || join->cur_dups_producing_tables)
{
pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
@@ -12765,8 +12786,12 @@ void advance_sj_state(JOIN *join, table_
dups_cost += write_cost + full_lookup_cost;
/*
- Duplicate Weedout is the catch-all default, so we should pick it if
- there is unhandled sj-fanout.
+ Use the strategy if
+ * it is cheaper then what we've had, or
+ * we haven't picked any other semi-join strategy yet
+ The second part is necessary because this strategy is the last one
+ to consider (it needs "the most" tables in the prefix) and we can't
+ leave duplicate-producing tables unhandled by any strategy.
*/
if (dups_cost < *current_read_time || join->cur_dups_producing_tables)
{
| Thread |
|---|
| • bzr push into mysql-6.0-opt-subqueries branch (sergefp:2714 to 2715)WL#3985 | Sergey Petrunia | 9 Nov |