List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:August 24 2012 11:46am
Subject:bzr push into mysql-5.6 branch (roy.lyseng:4113 to 4114) Bug#14272788
View as plain text  
 4114 Roy Lyseng	2012-08-24
      Bug#14272788: Query with MaterializeScan and materialized subquery
                    returns too many rows
      
      Patch #14 - Answering Oystein's review comments.
      
      sql/item.cc
      sql/item_cmpfunc.cc
      sql/sql_lex.h
      sql/sql_optimizer.h
      sql/sql_select.cc
      sql/sql_select.h
      sql/table.h
        Changed comments.
      
      sql/sql_optimizer.cc
        Removed a loop in JOIN::update_equalities_for_sjm(), made a new
        way of accessing the materialized temporary table of a semi-join.
        Changed comments.

    modified:
      sql/item.cc
      sql/item_cmpfunc.cc
      sql/sql_lex.h
      sql/sql_optimizer.cc
      sql/sql_optimizer.h
      sql/sql_select.cc
      sql/sql_select.h
      sql/table.h
 4113 Roy Lyseng	2012-08-22
      Bug#14272788: Query with MaterializeScan and materialized subquery
                    returns too many rows
      
      Patch #13 - Make JOIN_TAB::get_sj_strategy() truly inline.
      
      Make get_sj_strategy(), accomplished by moving definition of struct
      st_position above struct st_join_table in sql/sql_select.h.
      
      sql/sql_select.cc
        Deleted implementation of JOIN_TAB::get_sj_strategy().
      
      sql/sql_select.h
        Moved definition of struct st_position above struct st_join_table,
        implemented function JOIN_TAB::get_sj_strategy() inline.

    modified:
      sql/sql_select.cc
      sql/sql_select.h
=== modified file 'sql/item.cc'
--- a/sql/item.cc	2012-07-02 11:27:40 +0000
+++ b/sql/item.cc	2012-08-24 11:37:44 +0000
@@ -1029,6 +1029,7 @@ void Item_name_string::copy(const char *
   This function is called when:
   - Comparing items in the WHERE clause (when doing where optimization)
   - When trying to find an ORDER BY/GROUP BY item in the SELECT part
+  - When matching fields in multiple equality objects (Item_equal)
 */
 
 bool Item::eq(const Item *item, bool binary_cmp) const

=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2012-08-20 10:33:40 +0000
+++ b/sql/item_cmpfunc.cc	2012-08-24 11:37:44 +0000
@@ -6374,7 +6374,7 @@ Item_field* Item_equal::get_subst_item(c
       It's a field from a materialized semijoin. We can substitute it only
       with a field from the same semijoin.
 
-      Example: suppose we have a join order:
+      Example: suppose we have a join_tab order:
 
        ot1 ot2 <subquery> ot3 SJM(it1  it2  it3)
 
@@ -6414,14 +6414,14 @@ Item_field* Item_equal::get_subst_item(c
       The field is not in a materialized semijoin nest. We can return
       the first field in the multiple equality.
 
-      Example: suppose we have a join order with MaterializeLookup:
+      Example: suppose we have a join_tab order with MaterializeLookup:
 
         ot1 ot2 <subquery> SJM(it1 it2)
 
       Here we should always pick the first field in the multiple equality,
       as this will be present before all other dependent fields.
 
-      Example: suppose we have a join order with MaterializeScan:
+      Example: suppose we have a join_tab order with MaterializeScan:
 
         <subquery> ot1 ot2 SJM(it1 it2)
 

=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h	2012-08-06 08:18:13 +0000
+++ b/sql/sql_lex.h	2012-08-24 11:37:44 +0000
@@ -710,6 +710,7 @@ public:
   List<TABLE_LIST> top_join_list; /* join list of the top level          */
   List<TABLE_LIST> *join_list;    /* list for the currently parsed join  */
   TABLE_LIST *embedding;          /* table embedding to the above list   */
+  /// List of semi-join nests generated for this query block
   List<TABLE_LIST> sj_nests;
   //Dynamic_array<TABLE_LIST*> sj_nests; psergey-5:
   /*

=== modified file 'sql/sql_optimizer.cc'
--- a/sql/sql_optimizer.cc	2012-08-22 09:13:48 +0000
+++ b/sql/sql_optimizer.cc	2012-08-24 11:37:44 +0000
@@ -968,9 +968,9 @@ JOIN::optimize()
    * information it need for later execution of pushed queries.
    *
    * Currently pushed joins are only implemented by NDB.
-   * It only make sense to try pushing if > 1 tables.
+   * It only make sense to try pushing if > 1 non-const tables.
    */
-  if (primary_tables - const_tables > 1)
+  if (!plan_is_const() && !plan_is_single_table())
   {
     const AQP::Join_plan plan(this);
     if (ha_make_pushed_joins(thd, &plan))
@@ -2015,8 +2015,8 @@ static Item *eliminate_item_equal(Item *
     /*
       item_field should be compared with the head of the multiple equality
       list.
-      item_field may refer to a table that is within a semijoin
-      materialization nest. In that case, the join order may look like:
+      item_field may refer to a table that is within a semijoin materialization
+      nest. In that case, the order of the join_tab entries may look like:
 
         ot1 ot2 <subquery> ot5 SJM(it3 it4)
 
@@ -2877,7 +2877,7 @@ static void update_depend_map(JOIN *join
 
 
 /**
-  Update keyuse references and equalities after semi-join materialization
+  Update equalities and keyuse references after semi-join materialization
   strategy is chosen.
 
   @details
@@ -2889,34 +2889,42 @@ static void update_depend_map(JOIN *join
     multiple equality, replace the reference to the expression selected
     from the subquery with the corresponding column in the temporary table.
 
-    For the MaterializeScan semi-join strategy, all primary tables after the
-    materialized temporary table must be inspected for keyuse objects that point
-    to expressions from the subquery tables. These references must be replaced
-    with references to corresponding columns in the materialized temporary
-    table instead.
+    This is needed to properly reflect the equalities that involve injected
+    semi-join equalities when materialization strategy is chosen.
+    @see eliminate_item_equal() for how these equalities are used to generate
+    correct equality predicates.
+
+    The MaterializeScan semi-join strategy requires some additional processing:
+    All primary tables after the materialized temporary table must be inspected
+    for keyuse objects that point to expressions from the subquery tables.
+    These references must be replaced with references to corresponding columns
+    in the materialized temporary table instead. Those primary tables using
+    ref access will thus be made to depend on the materialized temporary table
+    instead of the subquery tables.
 
-    This is needed to reflect the current join plan, and in particular to
-    generate correct equality predicates, @see eliminate_item_equal().
+    Only the injected semi-join equalities need this treatment, other predicates
+    will be handled correctly by the regular item substitution process.
 
   @return False if success, true if error
 */
 
 bool JOIN::update_equalities_for_sjm()
 {
-  if (select_lex->sj_nests.is_empty())
-    return false;
-
   List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
   TABLE_LIST *sj_nest;
   while ((sj_nest= sj_list_it++))
   {
-    if (sj_nest->sj_mat_exec == NULL)
+    Semijoin_mat_exec *sjm_exec= sj_nest->sj_mat_exec;
+    if (sjm_exec == NULL)
       continue;
 
+    // This is a semi-join nest with materialization strategy chosen.
     DBUG_ASSERT(!sj_nest->outer_join_nest());
     /*
-      The table cannot actually be an outer join inner table yet, this is
-      just a preparatory step (ie sj_nest->outer_join_nest() is NULL)
+      A materialized semi-join nest cannot actually be an inner part of an
+      outer join yet, this is just a preparatory step,
+      ie sj_nest->outer_join_nest() is always NULL here.
+      @todo: Enable outer joining here later.
     */
     Item *cond= sj_nest->outer_join_nest() ?
                   sj_nest->outer_join_nest()->join_cond() :
@@ -2930,21 +2938,11 @@ bool JOIN::update_equalities_for_sjm()
                         (uchar *)sj_nest);
     if (cond == NULL)
       return true;
-    cond->update_used_tables();
-  }
 
-  for (uint i= const_tables; i < primary_tables; i++)
-  {
-    JOIN_TAB *const mat_tab= join_tab + i;
-    Semijoin_mat_exec *const sjm_exec= mat_tab->sj_mat_exec;
-    if (sjm_exec == NULL)
-      continue;
-
-    TABLE_LIST *const sj_nest=
-      (join_tab + sjm_exec->inner_table_index)->emb_sj_nest;
+    cond->update_used_tables();
 
     // Loop over all primary tables that follow the materialized table
-    for (uint j= i + 1; j < primary_tables; j++)
+    for (uint j= sjm_exec->mat_table_index + 1; j < primary_tables; j++)
     {
       JOIN_TAB *const tab= join_tab + j;
       for (Key_use *keyuse= tab->position->key;

=== modified file 'sql/sql_optimizer.h'
--- a/sql/sql_optimizer.h	2012-08-13 20:13:27 +0000
+++ b/sql/sql_optimizer.h	2012-08-24 11:37:44 +0000
@@ -72,7 +72,7 @@ public:
     - "tmp_tables" is 0, 1 or 2 and counts the maximum possible number of
       intermediate tables in post-processing (ie sorting and duplicate removal).
       Later, tmp_tables will be adjusted to the correct number of
-      intermediate tables.
+      intermediate tables, @see JOIN::make_tmp_tables_info.
     - The remaining tables (ie. tables - primary_tables - tmp_tables) are
       input tables to materialized semi-join operations.
     The tables are ordered as follows in the join_tab array:
@@ -80,11 +80,12 @@ public:
      2. non-const primary tables
      3. intermediate sort/group tables
      4. possible holes in array
-     5. materialized semi-join tables
+     5. semi-joined tables used with materialization strategy
   */
   uint     tables;         ///< Total number of tables in query block
   uint     primary_tables; ///< Number of primary input tables in query block
   uint     const_tables;   ///< Number of primary tables deemed constant
+  uint     tmp_tables;     ///< Number of temporary tables used by query
   uint     send_group_parts;
   /**
     Indicates that grouping will be performed on the result set during
@@ -391,11 +392,6 @@ public:
   List<Semijoin_mat_exec> sjm_exec_list;
   /* end of allocation caching storage */
 
-  /**
-    Number of tmp tables actually used by the query.
-    @see JOIN::make_tmp_tables_info
-  */
-  uint8 tmp_tables;
   /** TRUE <=> ref_pointer_array is set to items3. */
   bool set_group_rpa;
   /** Exec time only: TRUE <=> current group has been sent */
@@ -418,6 +414,7 @@ public:
     tables= 0;
     primary_tables= 0;
     const_tables= 0;
+    tmp_tables= 0;
     const_table_map= 0;
     join_list= 0;
     implicit_grouping= FALSE;
@@ -468,7 +465,6 @@ public:
     /* can help debugging (makes smaller test cases): */
     DBUG_EXECUTE_IF("no_const_tables",no_const_tables= TRUE;);
     first_select= sub_select;
-    tmp_tables= 0;
     set_group_rpa= false;
     group_sent= 0;
   }

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2012-08-22 10:24:34 +0000
+++ b/sql/sql_select.cc	2012-08-24 11:37:44 +0000
@@ -1299,6 +1299,7 @@ void calc_used_field_length(THD *thd, JO
 
   @details
     - create join->join_tab array and copy from existing JOIN_TABs in join order
+    - create helper structs for materialized semi-join handling
     - finalize semi-join strategy choices
     - Number of intermediate tables "tmp_tables" is calculated.
     - "tables" and "primary_tables" are recalculated.
@@ -1337,9 +1338,10 @@ bool JOIN::get_best_combination()
     Rearrange queries with materialized semi-join nests so that the semi-join
     nest is replaced with a reference to a materialized temporary table and all
     materialized subquery tables are placed after the intermediate tables.
-    After the following loop, "outer_target" is the position of first
-    materialized temporary table (if there is one) and "inner_target" is
-    the position of the first subquery table (if any).
+    After the following loop, "inner_target" is the position of the first
+    subquery table (if any). "outer_target" is the position of first outer
+    table, and will later be used to track the position of any materialized
+    temporary tables. 
   */
   uint outer_target= 0;                   
   uint inner_target= primary_tables + tmp_tables;
@@ -2545,13 +2547,13 @@ no_join_cache:
 
 
 /**
-  Setup the materialized table for one semi-join nest
+  Setup the materialized table for a semi-join nest
 
   @param tab       join_tab for the materialized semi-join table
   @param tableno   table number of materialized table
   @param inner_pos information about the first inner table of the subquery
   @param sjm_pos   information about the materialized semi-join table,
-                   to be filled in with data.
+                   to be filled with data.
 
   @details
     Setup execution structures for one semi-join materialization nest:
@@ -2568,7 +2570,7 @@ bool JOIN::setup_materialized_table(JOIN
                                     POSITION *sjm_pos)
 {
   DBUG_ENTER("JOIN::setup_materialized_table");
-  TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
+  const TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
   Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
   Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec;
   const uint field_count= sjm_exec->subq_exprs->elements;
@@ -2596,10 +2598,10 @@ bool JOIN::setup_materialized_table(JOIN
   if (!(table= create_tmp_table(thd, &sjm_exec->table_param, 
                                 *sjm_exec->subq_exprs, NULL, 
                                 true /* distinct */, 
-                                true /*save_sum_fields*/,
+                                true /* save_sum_fields */,
                                 thd->variables.option_bits |
                                 TMP_TABLE_ALL_COLUMNS, 
-                                HA_POS_ERROR /*rows_limit */, 
+                                HA_POS_ERROR /* rows_limit */, 
                                 name)))
     DBUG_RETURN(true); /* purecov: inspected */
   sjm_exec->table= table;

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2012-08-22 10:24:34 +0000
+++ b/sql/sql_select.h	2012-08-24 11:37:44 +0000
@@ -548,7 +548,8 @@ public:
   st_join_table *first_inner;   /**< first inner table for including outerjoin*/
   bool           found;         /**< true after all matches or null complement*/
   bool           not_null_compl;/**< true before null complement is added    */
-  bool           materialized;  /**< true if materialized from derived/SJ    */
+  /// For a materializable derived or SJ table: true if has been materialized
+  bool           materialized;
   st_join_table *last_inner;    /**< last table table for embedding outer join*/
   st_join_table *first_upper;  /**< first inner table for embedding outer join*/
   st_join_table *first_unmatched; /**< used for optimization purposes only   */
@@ -585,7 +586,11 @@ public:
   */  
   READ_RECORD::Setup_func save_read_first_record;/* to save read_first_record */
   READ_RECORD::Read_func save_read_record;/* to save read_record.read_record */
-  /// Non-NULL if table is temporary table materialized from subquery
+  /**
+    Struct needed for materialization of semi-join. Set for a materialized
+    temporary table, and NULL for all other join_tabs (except when
+    materialization is in progress, @see join_materialize_semijoin()).
+  */
   Semijoin_mat_exec *sj_mat_exec;          
   double	worst_seeks;
   key_map	const_keys;			/**< Keys with constant part */
@@ -832,8 +837,10 @@ public:
     DBUG_ASSERT(first_sj_inner_tab->position->sj_strategy != SJ_OPT_NONE);
     return first_sj_inner_tab->position->sj_strategy;
   }
-
-  /// @returns (sub)query block id for an inner table of materialized semi-join
+  /**
+     @returns query block id for an inner table of materialized semi-join, and
+              0 for all other tables.
+  */
   uint sjm_query_block_id() const;
 
   bool and_with_condition(Item *tmp_cond, uint line);

=== modified file 'sql/table.h'
--- a/sql/table.h	2012-08-22 09:13:48 +0000
+++ b/sql/table.h	2012-08-24 11:37:44 +0000
@@ -2185,7 +2185,7 @@ typedef struct st_nested_join
   */
   table_map         sj_corr_tables;
   /**
-    Subquery block id if this struct is generated from a subquery transform.
+    Query block id if this struct is generated from a subquery transform.
   */
   uint query_block_id;
   /*

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.6 branch (roy.lyseng:4113 to 4114) Bug#14272788Roy Lyseng27 Aug