List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:March 15 2011 1:14pm
Subject:bzr commit into mysql-trunk branch (roy.lyseng:3348) Bug#11822517
View as plain text  
#At file:///home/rl136806/mysql/repo/mysql-work5/ based on revid:roy.lyseng@stripped

 3348 Roy Lyseng	2011-03-15
      Bug#11822517: Refactor call interface of choose_plan()
      
        The main purposes of this patch is
         1. Make the interface of choose_plan() and greedy_search() simpler.
         2. Prepare for a simpler test for dependencies when doing
            semi join materialization together with outer join (WL#5561).
         The fix involves replacing the join_tables argument to choose_plan()
         with a join nest pointer, which is NULL when the entire query will
         be optimized. Thus, the set of relevant tables is instead calculated
         inside this function. 
         We also clarify that we need to pass as remaining_tables to
         best_access_path() not only the tables remaining to be optimized,
         but also all tables outside of the current semi join nest.
      
      sql/sql_select.cc
        optimize_semijoin_nests() is now taking the map of all query tables
        from the passed JOIN object instead of an argument.
      
        best_access_path() - meaning of argument remaining_tables is changed so
        that it actually is interpreted as all tables not added to the current
        partial plan.
      
        choose_plan() - argument join_tables is replaced with sjm_nest -
        pointer to semi join nest to generate a plan for. The tables to include
        in the plan is calculated inside the function instead.
      
        greedy_search() - now calling best_access_path() with all tables not
        included in the current partial plan.
        Not calling advance_sj_state() when calculating plan for a materialized
        semi join nest.
      
      sql/sql_select.h
        Added all_table_map (map of tables in query) to JOIN object.
        Commented const_table_map, found_const_table_map and outer_join

    modified:
      sql/sql_select.cc
      sql/sql_select.h
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2011-03-03 09:43:14 +0000
+++ b/sql/sql_select.cc	2011-03-15 13:13:54 +0000
@@ -66,7 +66,7 @@ struct st_sargable_param;
 static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
 static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, Item *conds,
 				 DYNAMIC_ARRAY *keyuse);
-static bool optimize_semijoin_nests(JOIN *join, table_map all_table_map);
+static bool optimize_semijoin_nests(JOIN *join);
 static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
                                 JOIN_TAB *join_tab,
                                 uint tables, Item *conds,
@@ -77,7 +77,7 @@ static int sort_keyuse(Key_use *a, Key_u
 static void set_position(JOIN *join, uint index, JOIN_TAB *table, Key_use *key);
 static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, Key_use *org_keyuse,
 			       table_map used_tables);
-static bool choose_plan(JOIN *join, table_map join_tables);
+static bool choose_plan(JOIN *join, TABLE_LIST *sjm_nest);
 static void best_access_path(JOIN  *join, JOIN_TAB *s, 
                              table_map remaining_tables, uint idx, 
                              bool disable_jbuf, double record_count,
@@ -4609,7 +4609,7 @@ make_join_statistics(JOIN *join, TABLE_L
   TABLE *table;
   TABLE_LIST *tables= tables_arg;
   uint i,table_count,const_count,key;
-  table_map found_const_table_map, all_table_map, found_ref, refs;
+  table_map found_const_table_map, found_ref, refs;
   TABLE **table_vector;
   JOIN_TAB *stat,*stat_end,*s,**stat_ref;
   Key_use *keyuse, *start_keyuse;
@@ -4636,7 +4636,8 @@ make_join_statistics(JOIN *join, TABLE_L
   join->best_ref=stat_vector;
 
   stat_end=stat+table_count;
-  found_const_table_map= all_table_map=0;
+  found_const_table_map=0;
+  join->all_table_map= 0;
   const_count=0;
 
   for (s= stat, i= 0;
@@ -4656,7 +4657,7 @@ make_join_statistics(JOIN *join, TABLE_L
     table->reginfo.join_tab=s;
     table->reginfo.not_exists_optimize=0;
     bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
-    all_table_map|= table->map;
+    join->all_table_map|= table->map;
     s->join=join;
 
     s->dependent= tables->dep_tables;
@@ -5082,13 +5083,13 @@ make_join_statistics(JOIN *join, TABLE_L
   if (join->const_tables != join->tables)
     optimize_keyuse(join, keyuse_array);
    
-  if (optimize_semijoin_nests(join, all_table_map))
+  if (optimize_semijoin_nests(join))
     DBUG_RETURN(TRUE); /* purecov: inspected */
 
   /* Find an optimal join order of the non-constant tables. */
   if (join->const_tables != join->tables)
   {
-    if (choose_plan(join, all_table_map & ~join->const_table_map))
+    if (choose_plan(join, NULL))
       goto error;
   }
   else
@@ -5120,7 +5121,6 @@ error:
   SYNOPSIS
     optimize_semijoin_nests()
       join           The join to optimize semi-join nests for
-      all_table_map  Bitmap of all tables in the join
 
   DESCRIPTION
     Optimize each of the semi-join nests that can be run with
@@ -5139,7 +5139,7 @@ error:
     TRUE   Out of memory error
 */
 
-static bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
+static bool optimize_semijoin_nests(JOIN *join)
 {
   DBUG_ENTER("optimize_semijoin_nests");
   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
@@ -5168,8 +5168,7 @@ static bool optimize_semijoin_nests(JOIN
       */
       if (semijoin_types_allow_materialization(sj_nest))
       {
-        join->emb_sjm_nest= sj_nest;
-        if (choose_plan(join, all_table_map & ~join->const_table_map))
+        if (choose_plan(join, sj_nest))
           DBUG_RETURN(TRUE); /* purecov: inspected */
         /*
           The best plan to run the subquery is now in join->best_positions,
@@ -5258,7 +5257,6 @@ static bool optimize_semijoin_nests(JOIN
       }
     }
   }
-  join->emb_sjm_nest= NULL;
   DBUG_RETURN(FALSE);
 }
 
@@ -6927,11 +6925,18 @@ public:
   'join->positions' of length 'idx'. The chosen access method for 's' and its
   cost are stored in 'join->positions[idx]'.
 
+  Notice also that when calculating a plan for a materialized semi join nest,
+  this function must not consider key references between tables inside the
+  semi join nest and those outside of it. The way to control this is to add
+  the set of outside tables to the @c remaining_tables argument.
+
   @param join             pointer to the structure providing all context info
                           for the query
   @param s                the table to be joined by the function
   @param thd              thread for the connection that submitted the query
-  @param remaining_tables set of tables not included into the partial plan yet
+  @param remaining_tables set of tables not included into the partial plan yet.
+                          also tables outside of the join nest when calculating
+                          a materialized semi join nest.
   @param idx              the length of the partial plan
   @param disable_jbuf     TRUE<=> Don't use join buffering
   @param record_count     estimate for the number of records returned by the
@@ -7526,10 +7531,15 @@ best_access_path(JOIN      *join,
   it. Each specific optimization procedure stores the final optimal plan in
   the array 'join->best_positions', and the cost of the plan in
   'join->best_read'.
-
-  @param join         pointer to the structure providing all context info for
-                      the query
-  @param join_tables  set of the tables in the query
+  The function can be invoked to produce a plan for all tables in the query
+  (in this case, the const tables are usually filtered out), or it can be
+  invoked to produce a plan for a materialization of a semi join nest.
+  Call with non-NULL sjm_nest pointer when producing a plan for a semi join
+  nest to be materialized and a NULL pointer when producing a full query plan.
+
+  @param join      pointer to structure providing all context info for query
+  @param sjm_nest  semi join nest that we are producing plan for, or NULL
+                   which means produce plan for full query.
 
   @retval
     FALSE       ok
@@ -7537,24 +7547,31 @@ best_access_path(JOIN      *join,
     TRUE        Fatal error
 */
 
-static bool
-choose_plan(JOIN *join, table_map join_tables)
+static bool choose_plan(JOIN *join, TABLE_LIST *sjm_nest)
 {
   uint search_depth= join->thd->variables.optimizer_search_depth;
   uint prune_level=  join->thd->variables.optimizer_prune_level;
   bool straight_join= test(join->select_options & SELECT_STRAIGHT_JOIN);
+  table_map join_tables;
   DBUG_ENTER("choose_plan");
 
+  join->emb_sjm_nest= sjm_nest;
   join->cur_embedding_map= 0;
   reset_nj_counters(join->join_list);
   qsort2_cmp jtab_sort_func;
 
-  if (join->emb_sjm_nest)
+  if (sjm_nest)
   {
     /* We're optimizing semi-join materialization nest, so put the 
        tables from this semi-join as first
     */
     jtab_sort_func= join_tab_cmp_embedded_first;
+    join_tables= sjm_nest->sj_inner_tables;
+    /*
+      Inner tables of semi join nest are not allowed to be identified as
+      const tables.
+    */
+    DBUG_ASSERT(join_tables == (join_tables & ~join->const_table_map));
   }
   else
   {
@@ -7567,6 +7584,7 @@ choose_plan(JOIN *join, table_map join_t
         records accessed.
     */
     jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+    join_tables= join->all_table_map & ~join->const_table_map;
   }
   my_qsort2(join->best_ref + join->const_tables,
             join->tables - join->const_tables, sizeof(JOIN_TAB*),
@@ -7594,6 +7612,9 @@ choose_plan(JOIN *join, table_map join_t
   */
   if (join->thd->lex->is_single_level_stmt())
     join->thd->status_var.last_query_cost= join->best_read;
+
+  join->emb_sjm_nest= NULL;
+
   DBUG_RETURN(FALSE);
 }
 
@@ -7986,16 +8007,14 @@ greedy_search(JOIN      *join,
   DBUG_ENTER("greedy_search");
 
   /* number of tables that remain to be optimized */
-  n_tables= size_remain= my_count_bits(remaining_tables &
-                                       (join->emb_sjm_nest? 
-                                         join->emb_sjm_nest->sj_inner_tables :
-                                         ~(table_map)0));
+  n_tables= size_remain= my_count_bits(remaining_tables);
 
   do {
     /* Find the extension of the current QEP with the lowest cost */
     join->best_read= DBL_MAX;
-    if (best_extension_by_limited_search(join, remaining_tables, idx, record_count,
-                                         read_time, search_depth, prune_level))
+    if (best_extension_by_limited_search(join, remaining_tables, idx,
+                                         record_count, read_time,
+                                         search_depth, prune_level))
       DBUG_RETURN(TRUE);
     /*
       'best_read < DBL_MAX' means that optimizer managed to find
@@ -8242,18 +8261,25 @@ best_extension_by_limited_search(JOIN   
 
   DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
                                 "part_plan"););
+  /*
+    When calculating a plan for a materialized semi join nest,
+    best_access_plan() needs to know not only the remaining tables within the
+    join nest, but also all tables outside of this nest, because there may
+    be key references between the semi join nest and the outside tables
+    that should not be considered when materializing the semi join nest.
+  */
+  const table_map excluded_tables=
+    join->emb_sjm_nest ?
+      join->all_table_map & ~join->emb_sjm_nest->sj_inner_tables :
+      0;
 
-  table_map allowed_tables= ~(table_map)0;
-  if (join->emb_sjm_nest)
-    allowed_tables= join->emb_sjm_nest->sj_inner_tables;
-
-  bool has_sj= !join->select_lex->sj_nests.is_empty();
+  const bool has_sj=
+    !(join->select_lex->sj_nests.is_empty() || join->emb_sjm_nest);
 
   for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
   {
     table_map real_table_bit= s->table->map;
     if ((remaining_tables & real_table_bit) && 
-        (allowed_tables & real_table_bit) &&
         !(remaining_tables & s->dependent) && 
         (!idx || !check_interleaving_with_nj(s)))
     {
@@ -8262,7 +8288,8 @@ best_extension_by_limited_search(JOIN   
 
       /* Find the best access method from 's' to the current partial plan */
       POSITION loose_scan_pos;
-      best_access_path(join, s, remaining_tables, idx, FALSE, record_count, 
+      best_access_path(join, s, excluded_tables | remaining_tables,
+                       idx, FALSE, record_count, 
                        join->positions + idx, &loose_scan_pos);
 
       /* Compute the cost of extending the plan with 's' */
@@ -8277,6 +8304,8 @@ best_extension_by_limited_search(JOIN   
           cost (takes 9% of time in a 20-table plan search), hence the if()
           above, which is also more efficient than the same if() inside
           advance_sj_state() would be.
+          Besides, never call advance_sj_state() when calculating the plan
+          for a materialized semi join nest.
         */
         advance_sj_state(join, remaining_tables, s, idx,
                          &current_record_count, &current_read_time,
@@ -8333,7 +8362,7 @@ best_extension_by_limited_search(JOIN   
         }
       }
 
-      if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
+      if (search_depth > 1 && (remaining_tables & ~real_table_bit))
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
         if (best_extension_by_limited_search(join,
@@ -13932,6 +13961,15 @@ void advance_sj_state(JOIN *join, table_
   TABLE_LIST *emb_sj_nest= new_join_tab->emb_sj_nest;
   POSITION *pos= join->positions + idx;
 
+  /*
+    Semi join nests cannot be nested, hence we never need to advance the
+    semi join state of a materialized semi join query.
+    In fact, doing this may cause undesirable effects because all tables
+    within a semi join nest have emb_sj_nest |= NULL, which triggers several
+    of the actions inside this function.
+  */
+  DBUG_ASSERT(join->emb_sjm_nest == NULL);
+
   /* Add this table to the join prefix */
   remaining_tables &= ~new_join_tab->table->map;
 

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2011-03-01 14:57:53 +0000
+++ b/sql/sql_select.h	2011-03-15 13:13:54 +0000
@@ -1640,11 +1640,10 @@ public:
   bool     first_record,full_join, no_field_update;
   bool	   group;          /**< If query contains GROUP BY clause */
   bool	   do_send_rows;
-  table_map const_table_map,found_const_table_map;
-  /*
-     Bitmap of all inner tables from outer joins
-  */
-  table_map outer_join;
+  table_map all_table_map;   /**< Set of tables contained in query */
+  table_map const_table_map; /**< Set of tables found to be const */
+  table_map found_const_table_map; /**< Tables that are const and non-empty */
+  table_map outer_join;      /**< Bitmap of all inner tables from outer joins */
   /* Number of records produced after join + group operation */
   ha_rows  send_records;
   ha_rows found_records,examined_rows,row_limit;


Attachment: [text/bzr-bundle] bzr/roy.lyseng@oracle.com-20110315131354-nsw9y9qpc4t2y4dm.bundle
Thread
bzr commit into mysql-trunk branch (roy.lyseng:3348) Bug#11822517Roy Lyseng15 Mar
  • Re: bzr commit into mysql-trunk branch (roy.lyseng:3348) Bug#11822517Guilhem Bichot17 Mar
    • Re: bzr commit into mysql-trunk branch (roy.lyseng:3348) Bug#11822517Ole John Aske17 Mar
    • Re: bzr commit into mysql-trunk branch (roy.lyseng:3348) Bug#11822517Roy Lyseng21 Mar
      • Re: bzr commit into mysql-trunk branch (roy.lyseng:3348) Bug#11822517Guilhem Bichot21 Mar