4109 Roy Lyseng 2012-08-20
Bug#14272788: Query with MaterializeScan and materialized subquery
returns too many rows
Patch #9 - Move fields fropm Semijoin_mat_exec to Semijoin_mat_optimize.
Some fields in the Semijoin_mat_exec class are actually optimizer related
and should be moved into struct Semijoin_mat_optimize.
The field mat_fields has been moved and the field emb_sj_nest has been
deleted.
sql/item_cmpfunc.cc
Item::equality_substitution_transformer() accepts an argument that is
a semi-join nest pointer instead of a Semijoin_mat_exec pointer.
sql/sql_class.h
Deleted fields mat_fields and emb_sj_nest from class Semijoin_mat_exec.
sql/sql_optimizer.cc
JOIN::update_equalities_for_sjm() must loop over select_lex->sj_nests
instead of JOIN::sjm_exec_list.
Local variable sjm is renamed to sjm_exec.
sql/sql_select.cc
In JOIN::get_best_combination(), assign sjm_exec pointer to embedding
semi-join nest.
Local variable sjm is renamed to sjm_exec.
In JOIN::setup_materialized_table(), populate Semijoin_mat_optimize::
mat_fields instead of Semijoin_mat_exec::mat_fields.
sql/table.h
Added field mat_fields to struct Semijoin_mat_optimize.
modified:
sql/item_cmpfunc.cc
sql/sql_class.h
sql/sql_optimizer.cc
sql/sql_select.cc
sql/table.h
4108 Roy Lyseng 2012-08-20
Bug#14272788: Query with MaterializeScan and materialized subquery
returns too many rows
Patch #8 - Fix problem with how subqueries are transformed to semi-join.
There was a problem in how JOIN::flatten_subqueries() determined whether
to transform a subquery to semi-join or not. When checking number of
tables in query block after a transformation, the tables already added
were not considered.
This is now changed so that the first loop decides whether a subquery
qualifies for transformation or not, and the second and third loops
reuse the decision from the first loop. In addition, if a join nest
with fewer tables follows one with more tables, which was not transformed,
the smaller nest is now considered. In addition, the condition
"< MAX_TABLES" was checked, but it should be "<= MAX_TABLES".
sql/item_subselect.h
Added the sj_chosen member variable that tells whether a subquery is
chosen for semi-join transformation.
sql/sql_optimizer.cc
Changes to JOIN::flatten_subqueries(), see above.
modified:
sql/item_subselect.h
sql/sql_optimizer.cc
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc 2012-08-13 20:13:27 +0000
+++ b/sql/item_cmpfunc.cc 2012-08-20 10:33:40 +0000
@@ -6452,7 +6452,7 @@ Item_field* Item_equal::get_subst_item(c
Item* Item_equal::equality_substitution_transformer(uchar *arg)
{
- Semijoin_mat_exec *sjm= reinterpret_cast<Semijoin_mat_exec *>(arg);
+ TABLE_LIST *sj_nest= reinterpret_cast<TABLE_LIST *>(arg);
List_iterator<Item_field> it(fields);
List<Item_field> added_fields;
Item_field *item;
@@ -6465,13 +6465,13 @@ Item* Item_equal::equality_substitution_
continue;
// Iterate over the fields selected from the subquery
- List_iterator<Item> mit(*sjm->subq_exprs);
+ List_iterator<Item> mit(sj_nest->nested_join->sj_inner_exprs);
Item *existing;
uint fieldno= 0;
while ((existing= mit++))
{
if (existing->real_item()->eq(item, false))
- added_fields.push_back(sjm->mat_fields[fieldno]);
+ added_fields.push_back(sj_nest->nested_join->sjm.mat_fields[fieldno]);
fieldno++;
}
}
@@ -6493,17 +6493,18 @@ Item* Item_equal::equality_substitution_
*/
Item* Item_func_eq::equality_substitution_transformer(uchar *arg)
{
- Semijoin_mat_exec *sjm= reinterpret_cast<Semijoin_mat_exec *>(arg);
+ TABLE_LIST *sj_nest= reinterpret_cast<TABLE_LIST *>(arg);
// Iterate over the fields selected from the subquery
- List_iterator<Item> mit(*sjm->subq_exprs);
+ List_iterator<Item> mit(sj_nest->nested_join->sj_inner_exprs);
Item *existing;
uint fieldno= 0;
while ((existing= mit++))
{
if (existing->real_item()->eq(args[1], false) &&
- (args[0]->used_tables() & ~sjm->emb_sj_nest->sj_inner_tables))
- current_thd->change_item_tree(args+1, sjm->mat_fields[fieldno]);
+ (args[0]->used_tables() & ~sj_nest->sj_inner_tables))
+ current_thd->change_item_tree(args+1,
+ sj_nest->nested_join->sjm.mat_fields[fieldno]);
fieldno++;
}
return this;
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2012-08-13 20:13:27 +0000
+++ b/sql/sql_class.h 2012-08-20 10:33:40 +0000
@@ -4467,26 +4467,21 @@ struct st_table_ref;
class Semijoin_mat_exec : public Sql_alloc
{
public:
- Semijoin_mat_exec(bool is_scan, TABLE_LIST *const emb_sj_nest,
- uint table_count, uint mat_table_index,
- uint inner_table_index)
- :is_scan(is_scan), emb_sj_nest(emb_sj_nest),
- table_count(table_count),
- mat_table_index(mat_table_index), inner_table_index(inner_table_index),
- subq_exprs(&emb_sj_nest->nested_join->sj_inner_exprs),
- table_param(), table(NULL), mat_fields(NULL)
+ Semijoin_mat_exec(bool is_scan, uint table_count, uint mat_table_index,
+ uint inner_table_index, List<Item> *const subq_exprs)
+ :is_scan(is_scan), table_count(table_count),
+ mat_table_index(mat_table_index), inner_table_index(inner_table_index),
+ subq_exprs(subq_exprs), table_param(), table(NULL)
{}
~Semijoin_mat_exec()
{}
const bool is_scan; ///< TRUE if executing a scan, FALSE if lookup
- TABLE_LIST *const emb_sj_nest;///< Semi-join nest for operation
const uint table_count; ///< Number of tables in the sj-nest
const uint mat_table_index; ///< Index in join_tab for materialized table
const uint inner_table_index; ///< Index in join_tab for first inner table
List<Item> *const subq_exprs; ///< List of expressions describing temp. table
TMP_TABLE_PARAM table_param; ///< The temptable and its related info
TABLE *table; ///< Reference to temporary table
- Item_field **mat_fields; ///< Fields in materialized table
};
=== modified file 'sql/sql_optimizer.cc'
--- a/sql/sql_optimizer.cc 2012-08-20 07:13:10 +0000
+++ b/sql/sql_optimizer.cc 2012-08-20 10:33:40 +0000
@@ -2903,28 +2903,31 @@ static void update_depend_map(JOIN *join
bool JOIN::update_equalities_for_sjm()
{
- if (sjm_exec_list.is_empty())
+ if (select_lex->sj_nests.is_empty())
return false;
- List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
- Semijoin_mat_exec *sjm;
- while ((sjm= it++))
+ List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
+ TABLE_LIST *sj_nest;
+ while ((sj_nest= sj_list_it++))
{
- TABLE_LIST *const emb_sj_nest= sjm->emb_sj_nest;
- DBUG_ASSERT(!emb_sj_nest->outer_join_nest());
+ if (sj_nest->sj_mat_exec == NULL)
+ continue;
+
+ DBUG_ASSERT(!sj_nest->outer_join_nest());
/*
The table cannot actually be an outer join inner table yet, this is
- just a preparatory step (ie emb_sj_nest->outer_join_nest() is NULL)
+ just a preparatory step (ie sj_nest->outer_join_nest() is NULL)
*/
- Item *cond= emb_sj_nest->outer_join_nest() ?
- emb_sj_nest->outer_join_nest()->join_cond() :
+ Item *cond= sj_nest->outer_join_nest() ?
+ sj_nest->outer_join_nest()->join_cond() :
conds;
if (!cond)
continue;
uchar *dummy= NULL;
cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
- &Item::equality_substitution_transformer, (uchar *)sjm);
+ &Item::equality_substitution_transformer,
+ (uchar *)sj_nest);
if (cond == NULL)
return true;
cond->update_used_tables();
@@ -2933,10 +2936,13 @@ bool JOIN::update_equalities_for_sjm()
for (uint i= const_tables; i < primary_tables; i++)
{
JOIN_TAB *const mat_tab= join_tab + i;
- Semijoin_mat_exec *const sjm= mat_tab->sj_mat_exec;
- if (sjm == NULL)
+ Semijoin_mat_exec *const sjm_exec= mat_tab->sj_mat_exec;
+ if (sjm_exec == NULL)
continue;
+ TABLE_LIST *const sj_nest=
+ (join_tab + sjm_exec->inner_table_index)->emb_sj_nest;
+
// Loop over all primary tables that follow the materialized table
for (uint j= i + 1; j < primary_tables; j++)
{
@@ -2946,7 +2952,7 @@ bool JOIN::update_equalities_for_sjm()
keyuse->key == tab->position->key->key;
keyuse++)
{
- List_iterator<Item> it(*sjm->subq_exprs);
+ List_iterator<Item> it(*sjm_exec->subq_exprs);
Item *old;
uint fieldno= 0;
while ((old= it++))
@@ -2957,7 +2963,7 @@ bool JOIN::update_equalities_for_sjm()
Replace the expression selected from the subquery with the
corresponding column of the materialized temporary table.
*/
- keyuse->val= sjm->mat_fields[fieldno];
+ keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
keyuse->used_tables= keyuse->val->used_tables();
break;
}
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2012-08-15 08:13:09 +0000
+++ b/sql/sql_select.cc 2012-08-20 10:33:40 +0000
@@ -1369,25 +1369,28 @@ bool JOIN::get_best_combination()
DBUG_ASSERT(outer_target < inner_target);
POSITION *const pos_table= best_positions + tableno;
+ TABLE_LIST *const sj_nest= pos_table->table->emb_sj_nest;
+
// Handle this many inner tables of materialized semi-join
remaining_sjm_inner= pos_table->n_sj_tables;
- Semijoin_mat_exec *const sjm=
+ Semijoin_mat_exec *const sjm_exec=
new (thd->mem_root)
Semijoin_mat_exec((pos_table->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
- pos_table->table->emb_sj_nest, remaining_sjm_inner,
- outer_target, inner_target);
- if (!sjm)
+ remaining_sjm_inner, outer_target, inner_target,
+ &sj_nest->nested_join->sj_inner_exprs);
+ if (!sjm_exec)
DBUG_RETURN(true);
- (join_tab + outer_target)->sj_mat_exec= sjm;
+ (join_tab + outer_target)->sj_mat_exec= sjm_exec;
+ sj_nest->sj_mat_exec= sjm_exec;
if (setup_materialized_table(join_tab + outer_target, sjm_index,
pos_table, best_positions + sjm_index))
DBUG_RETURN(true);
- all_tables[outer_target]= sjm->table;
- map2table[sjm->table->tablenr]= join_tab + outer_target;
+ all_tables[outer_target]= sjm_exec->table;
+ map2table[sjm_exec->table->tablenr]= join_tab + outer_target;
outer_target++;
sjm_index++;
@@ -2565,8 +2568,9 @@ bool JOIN::setup_materialized_table(JOIN
{
DBUG_ENTER("JOIN::setup_materialized_table");
TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
- Semijoin_mat_exec *const sjm= tab->sj_mat_exec;
- const uint field_count= sjm->subq_exprs->elements;
+ Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
+ Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec;
+ const uint field_count= sjm_exec->subq_exprs->elements;
DBUG_ASSERT(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
@@ -2574,9 +2578,9 @@ bool JOIN::setup_materialized_table(JOIN
/*
Set up the table to write to, do as select_union::create_result_table does
*/
- sjm->table_param.init();
- sjm->table_param.field_count= field_count;
- sjm->table_param.bit_fields_as_long= true;
+ sjm_exec->table_param.init();
+ sjm_exec->table_param.field_count= field_count;
+ sjm_exec->table_param.bit_fields_as_long= true;
char buffer[NAME_LEN];
const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
@@ -2588,8 +2592,8 @@ bool JOIN::setup_materialized_table(JOIN
memcpy(name, buffer, len);
name[len] = '\0';
TABLE *table;
- if (!(table= create_tmp_table(thd, &sjm->table_param,
- *sjm->subq_exprs, NULL,
+ if (!(table= create_tmp_table(thd, &sjm_exec->table_param,
+ *sjm_exec->subq_exprs, NULL,
true /* distinct */,
true /*save_sum_fields*/,
thd->variables.option_bits |
@@ -2597,23 +2601,23 @@ bool JOIN::setup_materialized_table(JOIN
HA_POS_ERROR /*rows_limit */,
name)))
DBUG_RETURN(true); /* purecov: inspected */
- sjm->table= table;
+ sjm_exec->table= table;
table->tablenr= tableno;
table->map= (table_map)1 << tableno;
table->file->extra(HA_EXTRA_WRITE_CACHE);
table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
table->reginfo.join_tab= tab;
sj_tmp_tables.push_back(table);
- sjm_exec_list.push_back(sjm);
+ sjm_exec_list.push_back(sjm_exec);
- if (!(sjm->mat_fields=
+ if (!(sjm_opt->mat_fields=
(Item_field **) alloc_root(thd->mem_root,
field_count * sizeof(Item_field **))))
DBUG_RETURN(true);
for (uint fieldno= 0; fieldno < field_count; fieldno++)
{
- if (!(sjm->mat_fields[fieldno]= new Item_field(table->field[fieldno])))
+ if (!(sjm_opt->mat_fields[fieldno]= new Item_field(table->field[fieldno])))
DBUG_RETURN(true);
}
@@ -2649,14 +2653,14 @@ bool JOIN::setup_materialized_table(JOIN
a proper ref access for this table.
*/
Key_use_array *keyuse=
- create_keyuse_for_table(thd, table, field_count, sjm->mat_fields,
+ create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields,
emb_sj_nest->nested_join->sj_outer_exprs);
if (!keyuse)
DBUG_RETURN(true);
double fanout= (tab == join_tab + tab->join->const_tables) ?
1.0 : (tab-1)->position->prefix_record_count;
- if (!sjm->is_scan)
+ if (!sjm_exec->is_scan)
{
sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index
tab->keys.set_bit(0); // There is one index - use it always
=== modified file 'sql/table.h'
--- a/sql/table.h 2012-08-15 08:13:09 +0000
+++ b/sql/table.h 2012-08-20 10:33:40 +0000
@@ -2116,20 +2116,22 @@ public:
struct Semijoin_mat_optimize
{
- /* Optimal join order calculated for inner tables of this semijoin op. */
+ /// Optimal join order calculated for inner tables of this semijoin op.
struct st_position *positions;
- /** True if data types allow the MaterializeLookup semijoin strategy */
+ /// True if data types allow the MaterializeLookup semijoin strategy
bool lookup_allowed;
- /** True if data types allow the MaterializeScan semijoin strategy */
+ /// True if data types allow the MaterializeScan semijoin strategy
bool scan_allowed;
- /* Expected #rows in the materialized table */
+ /// Expected #rows in the materialized table
double expected_rowcount;
- /* Materialization cost - execute sub-join and write rows to temp.table */
+ /// Materialization cost - execute sub-join and write rows to temp.table
Cost_estimate materialization_cost;
- /* Cost to make one lookup in the temptable */
+ /// Cost to make one lookup in the temptable
Cost_estimate lookup_cost;
- /* Cost of scanning the materialized table */
+ /// Cost of scanning the materialized table
Cost_estimate scan_cost;
+ /// Array of pointers to fields in the materialized table.
+ Item_field **mat_fields;
};
/**
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.6 branch (roy.lyseng:4108 to 4109) Bug#14272788 | Roy Lyseng | 21 Aug |