From: Roy Lyseng Date: August 21 2012 7:13am Subject: bzr push into mysql-5.6 branch (roy.lyseng:4108 to 4109) Bug#14272788 List-Archive: http://lists.mysql.com/commits/144579 X-Bug: 14272788 Message-Id: <201208210713.q7L7Dexx011785@khepri07.no.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4109 Roy Lyseng 2012-08-20 Bug#14272788: Query with MaterializeScan and materialized subquery returns too many rows Patch #9 - Move fields fropm Semijoin_mat_exec to Semijoin_mat_optimize. Some fields in the Semijoin_mat_exec class are actually optimizer related and should be moved into struct Semijoin_mat_optimize. The field mat_fields has been moved and the field emb_sj_nest has been deleted. sql/item_cmpfunc.cc Item::equality_substitution_transformer() accepts an argument that is a semi-join nest pointer instead of a Semijoin_mat_exec pointer. sql/sql_class.h Deleted fields mat_fields and emb_sj_nest from class Semijoin_mat_exec. sql/sql_optimizer.cc JOIN::update_equalities_for_sjm() must loop over select_lex->sj_nests instead of JOIN::sjm_exec_list. Local variable sjm is renamed to sjm_exec. sql/sql_select.cc In JOIN::get_best_combination(), assign sjm_exec pointer to embedding semi-join nest. Local variable sjm is renamed to sjm_exec. In JOIN::setup_materialized_table(), populate Semijoin_mat_optimize:: mat_fields instead of Semijoin_mat_exec::mat_fields. sql/table.h Added field mat_fields to struct Semijoin_mat_optimize. modified: sql/item_cmpfunc.cc sql/sql_class.h sql/sql_optimizer.cc sql/sql_select.cc sql/table.h 4108 Roy Lyseng 2012-08-20 Bug#14272788: Query with MaterializeScan and materialized subquery returns too many rows Patch #8 - Fix problem with how subqueries are transformed to semi-join. There was a problem in how JOIN::flatten_subqueries() determined whether to transform a subquery to semi-join or not. When checking number of tables in query block after a transformation, the tables already added were not considered. This is now changed so that the first loop decides whether a subquery qualifies for transformation or not, and the second and third loops reuse the decision from the first loop. In addition, if a join nest with fewer tables follows one with more tables, which was not transformed, the smaller nest is now considered. In addition, the condition "< MAX_TABLES" was checked, but it should be "<= MAX_TABLES". sql/item_subselect.h Added the sj_chosen member variable that tells whether a subquery is chosen for semi-join transformation. sql/sql_optimizer.cc Changes to JOIN::flatten_subqueries(), see above. modified: sql/item_subselect.h sql/sql_optimizer.cc === modified file 'sql/item_cmpfunc.cc' --- a/sql/item_cmpfunc.cc 2012-08-13 20:13:27 +0000 +++ b/sql/item_cmpfunc.cc 2012-08-20 10:33:40 +0000 @@ -6452,7 +6452,7 @@ Item_field* Item_equal::get_subst_item(c Item* Item_equal::equality_substitution_transformer(uchar *arg) { - Semijoin_mat_exec *sjm= reinterpret_cast(arg); + TABLE_LIST *sj_nest= reinterpret_cast(arg); List_iterator it(fields); List added_fields; Item_field *item; @@ -6465,13 +6465,13 @@ Item* Item_equal::equality_substitution_ continue; // Iterate over the fields selected from the subquery - List_iterator mit(*sjm->subq_exprs); + List_iterator mit(sj_nest->nested_join->sj_inner_exprs); Item *existing; uint fieldno= 0; while ((existing= mit++)) { if (existing->real_item()->eq(item, false)) - added_fields.push_back(sjm->mat_fields[fieldno]); + added_fields.push_back(sj_nest->nested_join->sjm.mat_fields[fieldno]); fieldno++; } } @@ -6493,17 +6493,18 @@ Item* Item_equal::equality_substitution_ */ Item* Item_func_eq::equality_substitution_transformer(uchar *arg) { - Semijoin_mat_exec *sjm= reinterpret_cast(arg); + TABLE_LIST *sj_nest= reinterpret_cast(arg); // Iterate over the fields selected from the subquery - List_iterator mit(*sjm->subq_exprs); + List_iterator mit(sj_nest->nested_join->sj_inner_exprs); Item *existing; uint fieldno= 0; while ((existing= mit++)) { if (existing->real_item()->eq(args[1], false) && - (args[0]->used_tables() & ~sjm->emb_sj_nest->sj_inner_tables)) - current_thd->change_item_tree(args+1, sjm->mat_fields[fieldno]); + (args[0]->used_tables() & ~sj_nest->sj_inner_tables)) + current_thd->change_item_tree(args+1, + sj_nest->nested_join->sjm.mat_fields[fieldno]); fieldno++; } return this; === modified file 'sql/sql_class.h' --- a/sql/sql_class.h 2012-08-13 20:13:27 +0000 +++ b/sql/sql_class.h 2012-08-20 10:33:40 +0000 @@ -4467,26 +4467,21 @@ struct st_table_ref; class Semijoin_mat_exec : public Sql_alloc { public: - Semijoin_mat_exec(bool is_scan, TABLE_LIST *const emb_sj_nest, - uint table_count, uint mat_table_index, - uint inner_table_index) - :is_scan(is_scan), emb_sj_nest(emb_sj_nest), - table_count(table_count), - mat_table_index(mat_table_index), inner_table_index(inner_table_index), - subq_exprs(&emb_sj_nest->nested_join->sj_inner_exprs), - table_param(), table(NULL), mat_fields(NULL) + Semijoin_mat_exec(bool is_scan, uint table_count, uint mat_table_index, + uint inner_table_index, List *const subq_exprs) + :is_scan(is_scan), table_count(table_count), + mat_table_index(mat_table_index), inner_table_index(inner_table_index), + subq_exprs(subq_exprs), table_param(), table(NULL) {} ~Semijoin_mat_exec() {} const bool is_scan; ///< TRUE if executing a scan, FALSE if lookup - TABLE_LIST *const emb_sj_nest;///< Semi-join nest for operation const uint table_count; ///< Number of tables in the sj-nest const uint mat_table_index; ///< Index in join_tab for materialized table const uint inner_table_index; ///< Index in join_tab for first inner table List *const subq_exprs; ///< List of expressions describing temp. table TMP_TABLE_PARAM table_param; ///< The temptable and its related info TABLE *table; ///< Reference to temporary table - Item_field **mat_fields; ///< Fields in materialized table }; === modified file 'sql/sql_optimizer.cc' --- a/sql/sql_optimizer.cc 2012-08-20 07:13:10 +0000 +++ b/sql/sql_optimizer.cc 2012-08-20 10:33:40 +0000 @@ -2903,28 +2903,31 @@ static void update_depend_map(JOIN *join bool JOIN::update_equalities_for_sjm() { - if (sjm_exec_list.is_empty()) + if (select_lex->sj_nests.is_empty()) return false; - List_iterator it(sjm_exec_list); - Semijoin_mat_exec *sjm; - while ((sjm= it++)) + List_iterator sj_list_it(select_lex->sj_nests); + TABLE_LIST *sj_nest; + while ((sj_nest= sj_list_it++)) { - TABLE_LIST *const emb_sj_nest= sjm->emb_sj_nest; - DBUG_ASSERT(!emb_sj_nest->outer_join_nest()); + if (sj_nest->sj_mat_exec == NULL) + continue; + + DBUG_ASSERT(!sj_nest->outer_join_nest()); /* The table cannot actually be an outer join inner table yet, this is - just a preparatory step (ie emb_sj_nest->outer_join_nest() is NULL) + just a preparatory step (ie sj_nest->outer_join_nest() is NULL) */ - Item *cond= emb_sj_nest->outer_join_nest() ? - emb_sj_nest->outer_join_nest()->join_cond() : + Item *cond= sj_nest->outer_join_nest() ? + sj_nest->outer_join_nest()->join_cond() : conds; if (!cond) continue; uchar *dummy= NULL; cond= cond->compile(&Item::equality_substitution_analyzer, &dummy, - &Item::equality_substitution_transformer, (uchar *)sjm); + &Item::equality_substitution_transformer, + (uchar *)sj_nest); if (cond == NULL) return true; cond->update_used_tables(); @@ -2933,10 +2936,13 @@ bool JOIN::update_equalities_for_sjm() for (uint i= const_tables; i < primary_tables; i++) { JOIN_TAB *const mat_tab= join_tab + i; - Semijoin_mat_exec *const sjm= mat_tab->sj_mat_exec; - if (sjm == NULL) + Semijoin_mat_exec *const sjm_exec= mat_tab->sj_mat_exec; + if (sjm_exec == NULL) continue; + TABLE_LIST *const sj_nest= + (join_tab + sjm_exec->inner_table_index)->emb_sj_nest; + // Loop over all primary tables that follow the materialized table for (uint j= i + 1; j < primary_tables; j++) { @@ -2946,7 +2952,7 @@ bool JOIN::update_equalities_for_sjm() keyuse->key == tab->position->key->key; keyuse++) { - List_iterator it(*sjm->subq_exprs); + List_iterator it(*sjm_exec->subq_exprs); Item *old; uint fieldno= 0; while ((old= it++)) @@ -2957,7 +2963,7 @@ bool JOIN::update_equalities_for_sjm() Replace the expression selected from the subquery with the corresponding column of the materialized temporary table. */ - keyuse->val= sjm->mat_fields[fieldno]; + keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno]; keyuse->used_tables= keyuse->val->used_tables(); break; } === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2012-08-15 08:13:09 +0000 +++ b/sql/sql_select.cc 2012-08-20 10:33:40 +0000 @@ -1369,25 +1369,28 @@ bool JOIN::get_best_combination() DBUG_ASSERT(outer_target < inner_target); POSITION *const pos_table= best_positions + tableno; + TABLE_LIST *const sj_nest= pos_table->table->emb_sj_nest; + // Handle this many inner tables of materialized semi-join remaining_sjm_inner= pos_table->n_sj_tables; - Semijoin_mat_exec *const sjm= + Semijoin_mat_exec *const sjm_exec= new (thd->mem_root) Semijoin_mat_exec((pos_table->sj_strategy == SJ_OPT_MATERIALIZE_SCAN), - pos_table->table->emb_sj_nest, remaining_sjm_inner, - outer_target, inner_target); - if (!sjm) + remaining_sjm_inner, outer_target, inner_target, + &sj_nest->nested_join->sj_inner_exprs); + if (!sjm_exec) DBUG_RETURN(true); - (join_tab + outer_target)->sj_mat_exec= sjm; + (join_tab + outer_target)->sj_mat_exec= sjm_exec; + sj_nest->sj_mat_exec= sjm_exec; if (setup_materialized_table(join_tab + outer_target, sjm_index, pos_table, best_positions + sjm_index)) DBUG_RETURN(true); - all_tables[outer_target]= sjm->table; - map2table[sjm->table->tablenr]= join_tab + outer_target; + all_tables[outer_target]= sjm_exec->table; + map2table[sjm_exec->table->tablenr]= join_tab + outer_target; outer_target++; sjm_index++; @@ -2565,8 +2568,9 @@ bool JOIN::setup_materialized_table(JOIN { DBUG_ENTER("JOIN::setup_materialized_table"); TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest; - Semijoin_mat_exec *const sjm= tab->sj_mat_exec; - const uint field_count= sjm->subq_exprs->elements; + Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm; + Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec; + const uint field_count= sjm_exec->subq_exprs->elements; DBUG_ASSERT(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP || inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN); @@ -2574,9 +2578,9 @@ bool JOIN::setup_materialized_table(JOIN /* Set up the table to write to, do as select_union::create_result_table does */ - sjm->table_param.init(); - sjm->table_param.field_count= field_count; - sjm->table_param.bit_fields_as_long= true; + sjm_exec->table_param.init(); + sjm_exec->table_param.field_count= field_count; + sjm_exec->table_param.bit_fields_as_long= true; char buffer[NAME_LEN]; const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "", @@ -2588,8 +2592,8 @@ bool JOIN::setup_materialized_table(JOIN memcpy(name, buffer, len); name[len] = '\0'; TABLE *table; - if (!(table= create_tmp_table(thd, &sjm->table_param, - *sjm->subq_exprs, NULL, + if (!(table= create_tmp_table(thd, &sjm_exec->table_param, + *sjm_exec->subq_exprs, NULL, true /* distinct */, true /*save_sum_fields*/, thd->variables.option_bits | @@ -2597,23 +2601,23 @@ bool JOIN::setup_materialized_table(JOIN HA_POS_ERROR /*rows_limit */, name))) DBUG_RETURN(true); /* purecov: inspected */ - sjm->table= table; + sjm_exec->table= table; table->tablenr= tableno; table->map= (table_map)1 << tableno; table->file->extra(HA_EXTRA_WRITE_CACHE); table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); table->reginfo.join_tab= tab; sj_tmp_tables.push_back(table); - sjm_exec_list.push_back(sjm); + sjm_exec_list.push_back(sjm_exec); - if (!(sjm->mat_fields= + if (!(sjm_opt->mat_fields= (Item_field **) alloc_root(thd->mem_root, field_count * sizeof(Item_field **)))) DBUG_RETURN(true); for (uint fieldno= 0; fieldno < field_count; fieldno++) { - if (!(sjm->mat_fields[fieldno]= new Item_field(table->field[fieldno]))) + if (!(sjm_opt->mat_fields[fieldno]= new Item_field(table->field[fieldno]))) DBUG_RETURN(true); } @@ -2649,14 +2653,14 @@ bool JOIN::setup_materialized_table(JOIN a proper ref access for this table. */ Key_use_array *keyuse= - create_keyuse_for_table(thd, table, field_count, sjm->mat_fields, + create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields, emb_sj_nest->nested_join->sj_outer_exprs); if (!keyuse) DBUG_RETURN(true); double fanout= (tab == join_tab + tab->join->const_tables) ? 1.0 : (tab-1)->position->prefix_record_count; - if (!sjm->is_scan) + if (!sjm_exec->is_scan) { sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index tab->keys.set_bit(0); // There is one index - use it always === modified file 'sql/table.h' --- a/sql/table.h 2012-08-15 08:13:09 +0000 +++ b/sql/table.h 2012-08-20 10:33:40 +0000 @@ -2116,20 +2116,22 @@ public: struct Semijoin_mat_optimize { - /* Optimal join order calculated for inner tables of this semijoin op. */ + /// Optimal join order calculated for inner tables of this semijoin op. struct st_position *positions; - /** True if data types allow the MaterializeLookup semijoin strategy */ + /// True if data types allow the MaterializeLookup semijoin strategy bool lookup_allowed; - /** True if data types allow the MaterializeScan semijoin strategy */ + /// True if data types allow the MaterializeScan semijoin strategy bool scan_allowed; - /* Expected #rows in the materialized table */ + /// Expected #rows in the materialized table double expected_rowcount; - /* Materialization cost - execute sub-join and write rows to temp.table */ + /// Materialization cost - execute sub-join and write rows to temp.table Cost_estimate materialization_cost; - /* Cost to make one lookup in the temptable */ + /// Cost to make one lookup in the temptable Cost_estimate lookup_cost; - /* Cost of scanning the materialized table */ + /// Cost of scanning the materialized table Cost_estimate scan_cost; + /// Array of pointers to fields in the materialized table. + Item_field **mat_fields; }; /** No bundle (reason: useless for push emails).