From: Roy Lyseng Date: August 24 2012 11:46am Subject: bzr push into mysql-5.6 branch (roy.lyseng:4113 to 4114) Bug#14272788 List-Archive: http://lists.mysql.com/commits/144626 X-Bug: 14272788 Message-Id: <201208241146.q7OBkpVw013544@khepri07.no.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4114 Roy Lyseng 2012-08-24 Bug#14272788: Query with MaterializeScan and materialized subquery returns too many rows Patch #14 - Answering Oystein's review comments. sql/item.cc sql/item_cmpfunc.cc sql/sql_lex.h sql/sql_optimizer.h sql/sql_select.cc sql/sql_select.h sql/table.h Changed comments. sql/sql_optimizer.cc Removed a loop in JOIN::update_equalities_for_sjm(), made a new way of accessing the materialized temporary table of a semi-join. Changed comments. modified: sql/item.cc sql/item_cmpfunc.cc sql/sql_lex.h sql/sql_optimizer.cc sql/sql_optimizer.h sql/sql_select.cc sql/sql_select.h sql/table.h 4113 Roy Lyseng 2012-08-22 Bug#14272788: Query with MaterializeScan and materialized subquery returns too many rows Patch #13 - Make JOIN_TAB::get_sj_strategy() truly inline. Make get_sj_strategy(), accomplished by moving definition of struct st_position above struct st_join_table in sql/sql_select.h. sql/sql_select.cc Deleted implementation of JOIN_TAB::get_sj_strategy(). sql/sql_select.h Moved definition of struct st_position above struct st_join_table, implemented function JOIN_TAB::get_sj_strategy() inline. modified: sql/sql_select.cc sql/sql_select.h === modified file 'sql/item.cc' --- a/sql/item.cc 2012-07-02 11:27:40 +0000 +++ b/sql/item.cc 2012-08-24 11:37:44 +0000 @@ -1029,6 +1029,7 @@ void Item_name_string::copy(const char * This function is called when: - Comparing items in the WHERE clause (when doing where optimization) - When trying to find an ORDER BY/GROUP BY item in the SELECT part + - When matching fields in multiple equality objects (Item_equal) */ bool Item::eq(const Item *item, bool binary_cmp) const === modified file 'sql/item_cmpfunc.cc' --- a/sql/item_cmpfunc.cc 2012-08-20 10:33:40 +0000 +++ b/sql/item_cmpfunc.cc 2012-08-24 11:37:44 +0000 @@ -6374,7 +6374,7 @@ Item_field* Item_equal::get_subst_item(c It's a field from a materialized semijoin. We can substitute it only with a field from the same semijoin. - Example: suppose we have a join order: + Example: suppose we have a join_tab order: ot1 ot2 ot3 SJM(it1 it2 it3) @@ -6414,14 +6414,14 @@ Item_field* Item_equal::get_subst_item(c The field is not in a materialized semijoin nest. We can return the first field in the multiple equality. - Example: suppose we have a join order with MaterializeLookup: + Example: suppose we have a join_tab order with MaterializeLookup: ot1 ot2 SJM(it1 it2) Here we should always pick the first field in the multiple equality, as this will be present before all other dependent fields. - Example: suppose we have a join order with MaterializeScan: + Example: suppose we have a join_tab order with MaterializeScan: ot1 ot2 SJM(it1 it2) === modified file 'sql/sql_lex.h' --- a/sql/sql_lex.h 2012-08-06 08:18:13 +0000 +++ b/sql/sql_lex.h 2012-08-24 11:37:44 +0000 @@ -710,6 +710,7 @@ public: List top_join_list; /* join list of the top level */ List *join_list; /* list for the currently parsed join */ TABLE_LIST *embedding; /* table embedding to the above list */ + /// List of semi-join nests generated for this query block List sj_nests; //Dynamic_array sj_nests; psergey-5: /* === modified file 'sql/sql_optimizer.cc' --- a/sql/sql_optimizer.cc 2012-08-22 09:13:48 +0000 +++ b/sql/sql_optimizer.cc 2012-08-24 11:37:44 +0000 @@ -968,9 +968,9 @@ JOIN::optimize() * information it need for later execution of pushed queries. * * Currently pushed joins are only implemented by NDB. - * It only make sense to try pushing if > 1 tables. + * It only make sense to try pushing if > 1 non-const tables. */ - if (primary_tables - const_tables > 1) + if (!plan_is_const() && !plan_is_single_table()) { const AQP::Join_plan plan(this); if (ha_make_pushed_joins(thd, &plan)) @@ -2015,8 +2015,8 @@ static Item *eliminate_item_equal(Item * /* item_field should be compared with the head of the multiple equality list. - item_field may refer to a table that is within a semijoin - materialization nest. In that case, the join order may look like: + item_field may refer to a table that is within a semijoin materialization + nest. In that case, the order of the join_tab entries may look like: ot1 ot2 ot5 SJM(it3 it4) @@ -2877,7 +2877,7 @@ static void update_depend_map(JOIN *join /** - Update keyuse references and equalities after semi-join materialization + Update equalities and keyuse references after semi-join materialization strategy is chosen. @details @@ -2889,34 +2889,42 @@ static void update_depend_map(JOIN *join multiple equality, replace the reference to the expression selected from the subquery with the corresponding column in the temporary table. - For the MaterializeScan semi-join strategy, all primary tables after the - materialized temporary table must be inspected for keyuse objects that point - to expressions from the subquery tables. These references must be replaced - with references to corresponding columns in the materialized temporary - table instead. + This is needed to properly reflect the equalities that involve injected + semi-join equalities when materialization strategy is chosen. + @see eliminate_item_equal() for how these equalities are used to generate + correct equality predicates. + + The MaterializeScan semi-join strategy requires some additional processing: + All primary tables after the materialized temporary table must be inspected + for keyuse objects that point to expressions from the subquery tables. + These references must be replaced with references to corresponding columns + in the materialized temporary table instead. Those primary tables using + ref access will thus be made to depend on the materialized temporary table + instead of the subquery tables. - This is needed to reflect the current join plan, and in particular to - generate correct equality predicates, @see eliminate_item_equal(). + Only the injected semi-join equalities need this treatment, other predicates + will be handled correctly by the regular item substitution process. @return False if success, true if error */ bool JOIN::update_equalities_for_sjm() { - if (select_lex->sj_nests.is_empty()) - return false; - List_iterator sj_list_it(select_lex->sj_nests); TABLE_LIST *sj_nest; while ((sj_nest= sj_list_it++)) { - if (sj_nest->sj_mat_exec == NULL) + Semijoin_mat_exec *sjm_exec= sj_nest->sj_mat_exec; + if (sjm_exec == NULL) continue; + // This is a semi-join nest with materialization strategy chosen. DBUG_ASSERT(!sj_nest->outer_join_nest()); /* - The table cannot actually be an outer join inner table yet, this is - just a preparatory step (ie sj_nest->outer_join_nest() is NULL) + A materialized semi-join nest cannot actually be an inner part of an + outer join yet, this is just a preparatory step, + ie sj_nest->outer_join_nest() is always NULL here. + @todo: Enable outer joining here later. */ Item *cond= sj_nest->outer_join_nest() ? sj_nest->outer_join_nest()->join_cond() : @@ -2930,21 +2938,11 @@ bool JOIN::update_equalities_for_sjm() (uchar *)sj_nest); if (cond == NULL) return true; - cond->update_used_tables(); - } - for (uint i= const_tables; i < primary_tables; i++) - { - JOIN_TAB *const mat_tab= join_tab + i; - Semijoin_mat_exec *const sjm_exec= mat_tab->sj_mat_exec; - if (sjm_exec == NULL) - continue; - - TABLE_LIST *const sj_nest= - (join_tab + sjm_exec->inner_table_index)->emb_sj_nest; + cond->update_used_tables(); // Loop over all primary tables that follow the materialized table - for (uint j= i + 1; j < primary_tables; j++) + for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++) { JOIN_TAB *const tab= join_tab + j; for (Key_use *keyuse= tab->position->key; === modified file 'sql/sql_optimizer.h' --- a/sql/sql_optimizer.h 2012-08-13 20:13:27 +0000 +++ b/sql/sql_optimizer.h 2012-08-24 11:37:44 +0000 @@ -72,7 +72,7 @@ public: - "tmp_tables" is 0, 1 or 2 and counts the maximum possible number of intermediate tables in post-processing (ie sorting and duplicate removal). Later, tmp_tables will be adjusted to the correct number of - intermediate tables. + intermediate tables, @see JOIN::make_tmp_tables_info. - The remaining tables (ie. tables - primary_tables - tmp_tables) are input tables to materialized semi-join operations. The tables are ordered as follows in the join_tab array: @@ -80,11 +80,12 @@ public: 2. non-const primary tables 3. intermediate sort/group tables 4. possible holes in array - 5. materialized semi-join tables + 5. semi-joined tables used with materialization strategy */ uint tables; ///< Total number of tables in query block uint primary_tables; ///< Number of primary input tables in query block uint const_tables; ///< Number of primary tables deemed constant + uint tmp_tables; ///< Number of temporary tables used by query uint send_group_parts; /** Indicates that grouping will be performed on the result set during @@ -391,11 +392,6 @@ public: List sjm_exec_list; /* end of allocation caching storage */ - /** - Number of tmp tables actually used by the query. - @see JOIN::make_tmp_tables_info - */ - uint8 tmp_tables; /** TRUE <=> ref_pointer_array is set to items3. */ bool set_group_rpa; /** Exec time only: TRUE <=> current group has been sent */ @@ -418,6 +414,7 @@ public: tables= 0; primary_tables= 0; const_tables= 0; + tmp_tables= 0; const_table_map= 0; join_list= 0; implicit_grouping= FALSE; @@ -468,7 +465,6 @@ public: /* can help debugging (makes smaller test cases): */ DBUG_EXECUTE_IF("no_const_tables",no_const_tables= TRUE;); first_select= sub_select; - tmp_tables= 0; set_group_rpa= false; group_sent= 0; } === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2012-08-22 10:24:34 +0000 +++ b/sql/sql_select.cc 2012-08-24 11:37:44 +0000 @@ -1299,6 +1299,7 @@ void calc_used_field_length(THD *thd, JO @details - create join->join_tab array and copy from existing JOIN_TABs in join order + - create helper structs for materialized semi-join handling - finalize semi-join strategy choices - Number of intermediate tables "tmp_tables" is calculated. - "tables" and "primary_tables" are recalculated. @@ -1337,9 +1338,10 @@ bool JOIN::get_best_combination() Rearrange queries with materialized semi-join nests so that the semi-join nest is replaced with a reference to a materialized temporary table and all materialized subquery tables are placed after the intermediate tables. - After the following loop, "outer_target" is the position of first - materialized temporary table (if there is one) and "inner_target" is - the position of the first subquery table (if any). + After the following loop, "inner_target" is the position of the first + subquery table (if any). "outer_target" is the position of first outer + table, and will later be used to track the position of any materialized + temporary tables. */ uint outer_target= 0; uint inner_target= primary_tables + tmp_tables; @@ -2545,13 +2547,13 @@ no_join_cache: /** - Setup the materialized table for one semi-join nest + Setup the materialized table for a semi-join nest @param tab join_tab for the materialized semi-join table @param tableno table number of materialized table @param inner_pos information about the first inner table of the subquery @param sjm_pos information about the materialized semi-join table, - to be filled in with data. + to be filled with data. @details Setup execution structures for one semi-join materialization nest: @@ -2568,7 +2570,7 @@ bool JOIN::setup_materialized_table(JOIN POSITION *sjm_pos) { DBUG_ENTER("JOIN::setup_materialized_table"); - TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest; + const TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest; Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm; Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec; const uint field_count= sjm_exec->subq_exprs->elements; @@ -2596,10 +2598,10 @@ bool JOIN::setup_materialized_table(JOIN if (!(table= create_tmp_table(thd, &sjm_exec->table_param, *sjm_exec->subq_exprs, NULL, true /* distinct */, - true /*save_sum_fields*/, + true /* save_sum_fields */, thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, - HA_POS_ERROR /*rows_limit */, + HA_POS_ERROR /* rows_limit */, name))) DBUG_RETURN(true); /* purecov: inspected */ sjm_exec->table= table; === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2012-08-22 10:24:34 +0000 +++ b/sql/sql_select.h 2012-08-24 11:37:44 +0000 @@ -548,7 +548,8 @@ public: st_join_table *first_inner; /**< first inner table for including outerjoin*/ bool found; /**< true after all matches or null complement*/ bool not_null_compl;/**< true before null complement is added */ - bool materialized; /**< true if materialized from derived/SJ */ + /// For a materializable derived or SJ table: true if has been materialized + bool materialized; st_join_table *last_inner; /**< last table table for embedding outer join*/ st_join_table *first_upper; /**< first inner table for embedding outer join*/ st_join_table *first_unmatched; /**< used for optimization purposes only */ @@ -585,7 +586,11 @@ public: */ READ_RECORD::Setup_func save_read_first_record;/* to save read_first_record */ READ_RECORD::Read_func save_read_record;/* to save read_record.read_record */ - /// Non-NULL if table is temporary table materialized from subquery + /** + Struct needed for materialization of semi-join. Set for a materialized + temporary table, and NULL for all other join_tabs (except when + materialization is in progress, @see join_materialize_semijoin()). + */ Semijoin_mat_exec *sj_mat_exec; double worst_seeks; key_map const_keys; /**< Keys with constant part */ @@ -832,8 +837,10 @@ public: DBUG_ASSERT(first_sj_inner_tab->position->sj_strategy != SJ_OPT_NONE); return first_sj_inner_tab->position->sj_strategy; } - - /// @returns (sub)query block id for an inner table of materialized semi-join + /** + @returns query block id for an inner table of materialized semi-join, and + 0 for all other tables. + */ uint sjm_query_block_id() const; bool and_with_condition(Item *tmp_cond, uint line); === modified file 'sql/table.h' --- a/sql/table.h 2012-08-22 09:13:48 +0000 +++ b/sql/table.h 2012-08-24 11:37:44 +0000 @@ -2185,7 +2185,7 @@ typedef struct st_nested_join */ table_map sj_corr_tables; /** - Subquery block id if this struct is generated from a subquery transform. + Query block id if this struct is generated from a subquery transform. */ uint query_block_id; /* No bundle (reason: useless for push emails).