List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:October 13 2008 10:27pm
Subject:bzr commit into mysql-6.0-opt-subqueries branch (sergefp:2698) WL#3985
View as plain text  
#At file:///home/spetrunia/dev/mysql-6.0-subq-r16-review-feedback/

 2698 Sergey Petrunia	2008-10-14
      WL#3985: Subqueries: smart choice between semi-join and materialization
      - Code cleanup, adding comments
modified:
  sql/sql_lex.h
  sql/sql_select.cc

per-file messages:
  sql/sql_lex.h
    WL#3985: Subqueries: smart choice between semi-join and materialization
    - Added st_select_lex::is_part_of_union().
  sql/sql_select.cc
    WL#3985: Subqueries: smart choice between semi-join and materialization
    - Code cleanup, adding comments
=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h	2008-08-18 18:12:46 +0000
+++ b/sql/sql_lex.h	2008-10-13 22:27:26 +0000
@@ -819,7 +819,7 @@ public:
   }
 
   void clear_index_hints(void) { index_hints= NULL; }
-
+  bool is_part_of_union() { return master_unit()->is_union(); }
 private:  
   /* current index hint kind. used in filling up index_hints */
   enum index_hint_type current_index_hint_type;

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2008-10-01 22:17:03 +0000
+++ b/sql/sql_select.cc	2008-10-13 22:27:26 +0000
@@ -552,143 +552,143 @@ JOIN::prepare(Item ***rref_pointer_array
       DBUG_RETURN(-1);				/* purecov: inspected */
     thd->lex->allow_sum_func= save_allow_sum_func;
   }
+  
+  /*
+    Perform early unconditional subquery tranformations:
+     - Convert subquery predicate into semi-join, or
+     - Mark the subquery for execution using materialization, or
+     - Perform IN->EXISTS tranformation, or
+     - Perform more/less ALL/ANY -> MIN/MAX rewrite
+     - Substitute trivial scalar-context subquery with its value
 
-  if (!thd->lex->view_prepare_mode)
-  {
-    Item_subselect *subselect;
-    Item_in_subselect *in_subs= NULL;
+    TODO: for PS, make the whole block execute only on the first execution
+  */
+  Item_subselect *subselect;
+  if (!thd->lex->view_prepare_mode && 
+      (subselect= select_lex->master_unit()->item) && 
+      subselect->substype() == Item_subselect::IN_SUBS)
+  {
+    Item_in_subselect *in_subs= (Item_in_subselect*)subselect;
+    bool do_semijoin= !test(thd->variables.optimizer_switch &
+                            OPTIMIZER_SWITCH_NO_SEMIJOIN);
+    DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
     /*
-      Are we in a subquery predicate?
-      TODO: the block below will be executed for every PS execution without need.
-    */
-    if ((subselect= select_lex->master_unit()->item))
-    {
-      bool do_semijoin= !test(thd->variables.optimizer_switch &
-                              OPTIMIZER_SWITCH_NO_SEMIJOIN);
-      if (subselect->substype() == Item_subselect::IN_SUBS)
-        in_subs= (Item_in_subselect*)subselect;
-
-      DBUG_PRINT("info", ("Checking if subq can be converted to semi-join"));
-      /*
-        Check if we're in subquery that is a candidate for flattening into a
-        semi-join (which is done done in flatten_subqueries()). The
-        requirements are:
-          1. Subquery predicate is an IN/=ANY subq predicate
-          2. Subquery is a single SELECT (not a UNION)
-          3. Subquery does not have GROUP BY or ORDER BY
-          4. Subquery does not use aggregate functions or HAVING
-          5. Subquery predicate is at the AND-top-level of ON/WHERE clause
-          6. No execution method was already chosen (by a prepared statement).
-
-          (*). We are not in a subquery of a single table UPDATE/DELETE that 
-               doesn't have a JOIN (TODO: We should handle this at some
-               point by switching to multi-table UPDATE/DELETE)
-
-          (**). We're not in a confluent table-less subquery, like
-                "SELECT 1". 
-      */
-      if (in_subs &&                                                    // 1
-          !select_lex->master_unit()->first_select()->next_select() &&  // 2
-          !select_lex->group_list.elements && !order &&                 // 3
-          !having && !select_lex->with_sum_func &&                      // 4
-          thd->thd_marker.emb_on_expr_nest &&                           // 5
-          select_lex->outer_select()->join &&                           // (*)
-          select_lex->master_unit()->first_select()->leaf_tables &&     // (**) 
-          do_semijoin &&
-          in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED)   // 6
-      {
-        DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
-        in_subs->types_allow_materialization= 
-          subquery_types_allow_materialization(thd, in_subs,
-                                               &in_subs->sjm_scan_allowed);
-
-        if (thd->stmt_arena->state != Query_arena::PREPARED)
-        {
-          SELECT_LEX *current= thd->lex->current_select;
-          thd->lex->current_select= current->return_after_parsing();
-          char const *save_where= thd->where;
-          thd->where= "IN/ALL/ANY subquery";
-          
-          bool failure= !in_subs->left_expr->fixed &&
-                         in_subs->left_expr->fix_fields(thd, 
-                                                        &in_subs->left_expr);
-          thd->lex->current_select= current;
-          thd->where= save_where;
-          in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
-          if (failure)
-            DBUG_RETURN(-1);
-          /*
-            Check that the right part of the subselect contains no more than one
-            column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...)
-          */
-          if (subselect->substype() == Item_subselect::IN_SUBS &&
-             (select_lex->item_list.elements != 
-              ((Item_in_subselect*)subselect)->left_expr->cols()))
-          {
-            my_error(ER_OPERAND_COLUMNS, MYF(0), ((Item_in_subselect*)subselect)->left_expr->cols());
-            DBUG_RETURN(-1);
-          }
-        }
-
-        /* Register the subquery for further processing */
-        select_lex->outer_select()->join->sj_subselects.append(thd->mem_root, in_subs);
-        in_subs->expr_join_nest= thd->thd_marker.emb_on_expr_nest;
-      }
-      else
-      {
-        DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
-        bool do_materialize= !test(thd->variables.optimizer_switch &
-                                   OPTIMIZER_SWITCH_NO_MATERIALIZATION);
+      Check if we're in subquery that is a candidate for flattening into a
+      semi-join (which is done done in flatten_subqueries()). The
+      requirements are:
+        1. Subquery predicate is an IN/=ANY subq predicate
+        2. Subquery is a single SELECT (not a UNION)
+        3. Subquery does not have GROUP BY or ORDER BY
+        4. Subquery does not use aggregate functions or HAVING
+        5. Subquery predicate is at the AND-top-level of ON/WHERE clause
+        6. We are not in a subquery of a single table UPDATE/DELETE that 
+             doesn't have a JOIN (TODO: We should handle this at some
+             point by switching to multi-table UPDATE/DELETE)
+        7. We're not in a confluent table-less subquery, like "SELECT 1".
+        8. No execution method was already chosen (by a prepared statement).
+    */
+    if (do_semijoin &&
+        in_subs &&                                                    // 1
+        !select_lex->is_part_of_union() &&                            // 2
+        !select_lex->group_list.elements && !order &&                 // 3
+        !having && !select_lex->with_sum_func &&                      // 4
+        thd->thd_marker.emb_on_expr_nest &&                           // 5
+        select_lex->outer_select()->join &&                           // 6
+        select_lex->master_unit()->first_select()->leaf_tables &&     // 7
+        in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED)   // 8
+    {
+      DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
+      in_subs->types_allow_materialization= 
+        subquery_types_allow_materialization(thd, in_subs,
+                                             &in_subs->sjm_scan_allowed);
+
+      if (thd->stmt_arena->state != Query_arena::PREPARED)
+      {
+        SELECT_LEX *current= thd->lex->current_select;
+        thd->lex->current_select= current->return_after_parsing();
+        char const *save_where= thd->where;
+        thd->where= "IN/ALL/ANY subquery";
+        
+        bool failure= !in_subs->left_expr->fixed &&
+                       in_subs->left_expr->fix_fields(thd, 
+                                                      &in_subs->left_expr);
+        thd->lex->current_select= current;
+        thd->where= save_where;
+        in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
+        if (failure)
+          DBUG_RETURN(-1);
         /*
-          Check if the subquery predicate can be executed via materialization.
-          The required conditions are:
-          1. Subquery predicate is an IN/=ANY subq predicate
-          2. Subquery is a single SELECT (not a UNION)
-          3. Subquery is not a table-less query. In this case there is no
-             point in materializing.
-          4. Subquery predicate is a top-level predicate
-             (this implies it is not negated)
-             TODO: this is a limitation that should be lifeted once we
-             implement correct NULL semantics (WL#3830)
-          5. Subquery is non-correlated
-             TODO:
-             This is an overly restrictive condition. It can be extended to:
-             (Subquery is non-correlated ||
-              Subquery is correlated to any query outer to IN predicate ||
-              (Subquery is correlated to the immediate outer query &&
-               Subquery !contains {GROUP BY, ORDER BY [LIMIT],
-               aggregate functions}) && subquery predicate is not under "NOT IN"))
-          6. No execution method was already chosen (by a prepared statement).
-
-          (*) The subquery must be part of a SELECT statement. The current
-               condition also excludes multi-table update statements.
-
-          We have to determine whether we will perform subquery materialization
-          before calling the IN=>EXISTS transformation, so that we know whether to
-          perform the whole transformation or only that part of it which wraps
-          Item_in_subselect in an Item_in_optimizer.
+          Check if the left and right expressions have the same # of
+          columns, i.e. we don't have a case like 
+            (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...)
+
+          TODO why do we have this duplicated in IN->EXISTS transformers?
         */
-        if (do_materialize && 
-            in_subs  &&                                                   // 1
-            !select_lex->master_unit()->first_select()->next_select() &&  // 2
-            select_lex->master_unit()->first_select()->leaf_tables &&     // 3
-            thd->lex->sql_command == SQLCOM_SELECT &&                     // *
-            subquery_types_allow_materialization(thd, in_subs, NULL))
-        {
-          if (in_subs->is_top_level_item() &&                             // 4
-              !in_subs->is_correlated &&                                  // 5
-              in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
-            in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
-        }
-
-        Item_subselect::trans_res trans_res;
-        if ((trans_res= subselect->select_transformer(this)) !=
-            Item_subselect::RES_OK)
+        if (select_lex->item_list.elements != in_subs->left_expr->cols())
         {
-          select_lex->fix_prepare_information(thd, &conds, &having);
-          DBUG_RETURN((trans_res == Item_subselect::RES_ERROR));
+          my_error(ER_OPERAND_COLUMNS, MYF(0), in_subs->left_expr->cols());
+          DBUG_RETURN(-1);
         }
       }
+
+      /* Register the subquery for further processing */
+      select_lex->outer_select()->join->sj_subselects.append(thd->mem_root, in_subs);
+      in_subs->expr_join_nest= thd->thd_marker.emb_on_expr_nest;
+    }
+    else
+    {
+      DBUG_PRINT("info", ("Subquery can't be converted to semi-join"));
+      bool do_materialize= !test(thd->variables.optimizer_switch &
+                                 OPTIMIZER_SWITCH_NO_MATERIALIZATION);
+      /*
+        Check if the subquery predicate can be executed via materialization.
+        The required conditions are:
+        1. Subquery predicate is an IN/=ANY subq predicate
+        2. Subquery is a single SELECT (not a UNION)
+        3. Subquery is not a table-less query. In this case there is no
+           point in materializing.
+        4. Subquery predicate is a top-level predicate
+           (this implies it is not negated)
+           TODO: this is a limitation that should be lifeted once we
+           implement correct NULL semantics (WL#3830)
+        5. Subquery is non-correlated
+           TODO:
+           This is an overly restrictive condition. It can be extended to:
+           (Subquery is non-correlated ||
+            Subquery is correlated to any query outer to IN predicate ||
+            (Subquery is correlated to the immediate outer query &&
+             Subquery !contains {GROUP BY, ORDER BY [LIMIT],
+             aggregate functions}) && subquery predicate is not under "NOT IN"))
+        6. No execution method was already chosen (by a prepared statement).
+
+        (*) The subquery must be part of a SELECT statement. The current
+             condition also excludes multi-table update statements.
+
+        We have to determine whether we will perform subquery materialization
+        before calling the IN=>EXISTS transformation, so that we know whether to
+        perform the whole transformation or only that part of it which wraps
+        Item_in_subselect in an Item_in_optimizer.
+      */
+      if (do_materialize && 
+          in_subs  &&                                                   // 1
+          !select_lex->is_part_of_union() &&                            // 2
+          select_lex->master_unit()->first_select()->leaf_tables &&     // 3
+          thd->lex->sql_command == SQLCOM_SELECT &&                     // *
+          subquery_types_allow_materialization(thd, in_subs, NULL))
+      {
+        if (in_subs->is_top_level_item() &&                             // 4
+            !in_subs->is_correlated &&                                  // 5
+            in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
+          in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
+      }
+
+      Item_subselect::trans_res trans_res;
+      if ((trans_res= subselect->select_transformer(this)) !=
+          Item_subselect::RES_OK)
+      {
+        select_lex->fix_prepare_information(thd, &conds, &having);
+        DBUG_RETURN((trans_res == Item_subselect::RES_ERROR));
+      }
     }
   }
 
@@ -852,6 +852,7 @@ err:
     TRUE   Yes
     FALSE  No, or this is an invalid subquery
 */
+
 static 
 bool subquery_types_allow_materialization(THD *thd,  
                                           Item_in_subselect *in_subs,
@@ -991,8 +992,6 @@ static void save_index_subquery_explain_
 }
 
 
-
-
 /*
   Check if the table's rowid is included in the temptable
 
@@ -3000,7 +2999,7 @@ mysql_select(THD *thd, Item ***rref_poin
     err= 1;
     goto err;
   }
-  /* dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables); */
+  /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
 
   if ((err= join->optimize()))
   {
@@ -5656,14 +5655,13 @@ public:
 
   uint best_max_loose_keypart;
 
-
   Loose_scan_opt():
     try_loosescan(FALSE),
     bound_sj_equalities(0),
     quick_uses_applicable_index(FALSE)
   {
-    LINT_INIT(quick_max_loose_keypart); // Protected by sj_insideout_quick_*
-    /* Protected by best_loose_scan_cost!= DBL_MAX */
+    LINT_INIT(quick_max_loose_keypart); /* Protected by sj_insideout_quick_* */
+    /* The following are protected by best_loose_scan_cost!= DBL_MAX */
     LINT_INIT(best_loose_scan_key);
     LINT_INIT(best_loose_scan_records);
     LINT_INIT(best_max_loose_keypart);
@@ -5751,12 +5749,12 @@ public:
            (handled_col|bound_col)* (other_col|bound_col)
 
     */
-    if (try_loosescan &&                                    // (1)
-        (handled_sj_equalities | bound_sj_equalities) ==      // (2)
-        PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) &&  // (2)
-        (PREV_BITS(key_part_map, max_loose_keypart+1) &           // (3)
-         (found_part | loose_scan_keyparts)) ==                     // (3)
-         (found_part | loose_scan_keyparts) &&                     // (3)
+    if (try_loosescan &&                                       // (1)
+        (handled_sj_equalities | bound_sj_equalities) ==         // (2)
+        PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) &&     // (2)
+        (PREV_BITS(key_part_map, max_loose_keypart+1) &        // (3)
+         (found_part | loose_scan_keyparts)) ==                // (3)
+         (found_part | loose_scan_keyparts) &&                 // (3)
         !key_uses_partial_cols(s->table, key))
     {
       /* Ok, can use the strategy */
@@ -7536,33 +7534,21 @@ prev_record_reads(JOIN *join, uint idx, 
 }
 
 
-/**
-  Set up join struct according to best position.
-*/
-
-static bool
-get_best_combination(JOIN *join)
-{
-  uint i,tablenr;
-  table_map used_tables;
-  JOIN_TAB *join_tab,*j;
-  KEYUSE *keyuse;
-  uint table_count;
-  THD *thd=join->thd;
-  DBUG_ENTER("get_best_combination");
-
-  table_count=join->tables;
-  if (!(join->join_tab=join_tab=
-	(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
-    DBUG_RETURN(TRUE);
+/*
+  Fix semi-join strategies for the picked join order
 
-  join->full_join=0;
+  SYNOPSIS
+    fix_semijoin_strategies_for_picked_join_order()
+      join  The join with the picked join order
 
-  used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
+  DESCRIPTION
+     psergey-todo: comments:   
+*/
 
-  /*
-    Prepare semi-join processing info for plan refimenent stage:
-  */
+static void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
+{
+  uint table_count=join->tables;
+  uint tablenr;
   table_map remaining_tables= 0;
   table_map handled_tabs= 0;
   for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--)
@@ -7713,6 +7699,49 @@ get_best_combination(JOIN *join)
       pos->sj_strategy= SJ_OPT_NONE;
     remaining_tables |= s->table->map;
   }
+}
+
+
+/*
+  Set up join struct according to best position.
+  
+  SYNOPSIS
+    get_best_combination()
+      join  The join to process
+
+  DESCRIPTION
+    Setup join structures accroding the picked join order:
+    - finalize semi-join strategy choices (see
+      fix_semijoin_strategies_for_picked_join_order)
+    - create join->join_tab array and put there the JOIN_TABs in the join order
+    - create ref access data structures
+
+  RETURN 
+    FALSE  OK
+    TRUE   Out of memory
+*/
+
+static bool
+get_best_combination(JOIN *join)
+{
+  uint i,tablenr;
+  table_map used_tables;
+  JOIN_TAB *join_tab,*j;
+  KEYUSE *keyuse;
+  uint table_count;
+  THD *thd=join->thd;
+  DBUG_ENTER("get_best_combination");
+
+  table_count=join->tables;
+  if (!(join->join_tab=join_tab=
+	(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
+    DBUG_RETURN(TRUE);
+
+  join->full_join=0;
+
+  used_tables= OUTER_REF_TABLE_BIT;		// Outer row is already read
+
+  fix_semijoin_strategies_for_picked_join_order(join);
 
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
   {
@@ -9093,8 +9122,27 @@ static uint make_join_orderinfo(JOIN *jo
 
 
 /*
-  Next_select_func-compatible function that writes data to semi-join
-  temporary table. (different from the standard
+  end_select-compatible function that writes the record into a sjm temptable
+  
+  SYNOPSIS
+    end_sj_materialize()
+      join            The join 
+      join_tab        Last join table
+      end_of_records  FALSE <=> This call is made to pass the callee another
+                                record combination
+                      TRUE  <=> EOF (no action)
+
+  DESCRIPTION
+    This function is used by semi-join materialization to capture suquery's
+    resultset and write it into the temptable (that is, materialize it).
+
+  NOTE
+    This function is used only for semi-join materialization. Non-semijoin
+    materialization uses different mechanism.
+
+  RETURN 
+    NESTED_LOOP_OK
+    NESTED_LOOP_ERROR
 */
 
 static enum_nested_loop_state 
@@ -9112,11 +9160,11 @@ end_sj_materialize(JOIN *join, JOIN_TAB 
     Item *item;
     while ((item= it++))
     {
-      if (item->is_null()) //psergey-sjm-todo: check if this is the right call
+      if (item->is_null())
         DBUG_RETURN(NESTED_LOOP_OK);
     }
     fill_record(thd, table->field, sjm->sjm_table_cols, 1);
-    if (thd->is_error()) /* psergey-todo: do we need this? */
+    if (thd->is_error())
       DBUG_RETURN(NESTED_LOOP_ERROR);
     if ((error= table->file->ha_write_row(table->record[0])))
     {
@@ -9164,7 +9212,7 @@ void remove_sj_conds(Item **tree)
 
 
 /*
-
+  psergey-todo: comments.
 */
 
 Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, 
@@ -9197,12 +9245,15 @@ Item *create_subq_in_equalities(THD *thd
 
 
 /*
-  Setup semi-join materialization for one nest
+  Setup semi-join materialization strategy for one semi-join nest
   
+  SYNOPSIS
+
   setup_sj_materialization()
     tab  The first tab in the semi-join
 
-  //TODO: copy a part of this to join::reinit or whatever.
+  DESCRIPTION
+    //TODO: copy a part of this to join::reinit or whatever. (pserey-todo: still need this?)
 */
 
 bool setup_sj_materialization(JOIN_TAB *tab)
@@ -9216,8 +9267,7 @@ bool setup_sj_materialization(JOIN_TAB *
   List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
 
   /* 
-    Set up the table to write to, do as select_union::create_result_table
-    does
+    Set up the table to write to, do as select_union::create_result_table does
   */
   sjm->sjm_table_param.init();
   sjm->sjm_table_param.field_count= item_list.elements;
@@ -9325,7 +9375,7 @@ bool setup_sj_materialization(JOIN_TAB *
                               sjm->table->field[i], FALSE);
     }
     /*
-      Dont neeed this because we're using different approach here:
+      Dont need this because we're using different approach here:
       we copy materialized table columns to 
        Do not create/attach IN-equalities. They are already appropriately
        attached into JOIN_TABs.
@@ -9347,7 +9397,7 @@ bool setup_sj_materialization(JOIN_TAB *
 
 
 /*
-  Plan refinement stage: do various set ups for the executioner
+  Plan refinement stage: do various setup things for the executor
 
   SYNOPSIS
     make_join_readinfo()
@@ -9358,9 +9408,9 @@ bool setup_sj_materialization(JOIN_TAB *
 
   DESCRIPTION
     Plan refinement stage: do various set ups for the executioner
-      - set up use of join buffering
+      - setup join buffering use
       - push index conditions
-      - increment counters
+      - increment relevant counters
       - etc
 
   RETURN 
@@ -15110,11 +15160,19 @@ int rr_sequential_and_unpack(READ_RECORD
 
 
 /*
-  Semi-join materialization function
+  Semi-join materialization join function
+
+  SYNOPSIS
+
+  DESCRIPTION
+    This function is called for
+    (psergey-todo: comment)
+  RETURN
+    
 */
 
 enum_nested_loop_state
-sub_select_sjm(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
+sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
 {
   int res;
   enum_nested_loop_state rc;

Thread
bzr commit into mysql-6.0-opt-subqueries branch (sergefp:2698) WL#3985Sergey Petrunia14 Oct