List:Commits« Previous MessageNext Message »
From:Evgeny Potemkin Date:April 28 2011 12:53pm
Subject:Re: bzr commit into mysql-trunk branch (ole.john.aske:3553) Bug#41740
Bug#58225 Bug#59326
View as plain text  
Hi Ole John,

The idea to prune plans earlier is good, but the check itself is ineffective.

At the moment of the check sort_by_table points to a table to be sorted if:
1) there is only one of order by and group by lists or they are 'compatible' 
(i.e the same)
2) all order by/group by items are from the only one table
otherwise it's NULL.
And the check effectively tests if such table isn't a const one.
Obviously it's far from covering real order by usage.

I suggest to replace it for another one. It would consist of two steps:
1) build a table map of all tables involved into order by/group by
2) check if all needed tables are present in the partial plan and if so compare cost
Prune to taste.

This check should replace old one in best_extension..., eq_ref_extension..., 
plan_is_complete functions.
I attached a sample patch below.

Regards, Evgen.

----------------------
=== modified file 'sql/sql_select.cc'
*** sql/sql_select.cc    2011-04-28 12:44:53 +0000
--- sql/sql_select.cc    2011-04-28 12:45:02 +0000
*************** static bool best_extension_by_limited_se
*** 85,96 ****
                                                table_map remaining_tables,
                                                uint idx, double record_count,
                                                double read_time, uint depth,
!                                              uint prune_level);
   static table_map eq_ref_extension_by_limited_search(JOIN *join,
                                                  table_map remaining_tables,
                                                  uint idx, double record_count,
                                                  double read_time, uint depth,
!                                                uint prune_level);
   static uint determine_search_depth(JOIN* join);
   C_MODE_START
   static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
--- 85,98 ----
                                                table_map remaining_tables,
                                                uint idx, double record_count,
                                                double read_time, uint depth,
!                                              uint prune_level,
!                                              bool need_sort);
   static table_map eq_ref_extension_by_limited_search(JOIN *join,
                                                  table_map remaining_tables,
                                                  uint idx, double record_count,
                                                  double read_time, uint depth,
!                                                uint prune_level,
!                                                bool need_sort);
   static uint determine_search_depth(JOIN* join);
   C_MODE_START
   static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
*************** JOIN::optimize()
*** 1955,1960 ****
--- 1957,1974 ----
     }
     error= -1;                    // Error is sent to client
     sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
+   if (order || group_list)
+   {
+     for (ORDER *ord= order; ord; ord= ord->next)
+       order_map|= ord->depend_map;
+     for (ORDER *ord= group_list; ord; ord= ord->next)
+       order_map|= ord->depend_map;
+   }

     /* Calculate how to do the join */
     THD_STAGE_INFO(thd, stage_statistics);
*************** greedy_search(JOIN      *join,
*** 8035,8041 ****
       /* Find the extension of the current QEP with the lowest cost */
       join->best_read= DBL_MAX;
       if (best_extension_by_limited_search(join, remaining_tables, idx, 
record_count,
!                                          read_time, search_depth, prune_level))
         DBUG_RETURN(TRUE);
       /*
         'best_read < DBL_MAX' means that optimizer managed to find
--- 8049,8056 ----
       /* Find the extension of the current QEP with the lowest cost */
       join->best_read= DBL_MAX;
       if (best_extension_by_limited_search(join, remaining_tables, idx, 
record_count,
!                                          read_time, search_depth,
!                                          prune_level, join->order_map != 0))
         DBUG_RETURN(TRUE);
       /*
         'best_read < DBL_MAX' means that optimizer managed to find
*************** best_extension_by_limited_search(JOIN
*** 8313,8319 ****
                                    double    record_count,
                                    double    read_time,
                                    uint      search_depth,
!                                  uint      prune_level)
   {
     DBUG_ENTER("best_extension_by_limited_search");

--- 8328,8335 ----
                                    double    record_count,
                                    double    read_time,
                                    uint      search_depth,
!                                  uint      prune_level,
!                                  bool      need_sort)
   {
     DBUG_ENTER("best_extension_by_limited_search");

*************** best_extension_by_limited_search(JOIN
*** 8401,8407 ****
           backout_nj_sj_state(remaining_tables, s);
           continue;
         }
!
         /*
           Prune some less promising partial plans. This heuristic may miss
           the optimal QEPs, thus it results in a non-exhaustive search.
--- 8417,8445 ----
           backout_nj_sj_state(remaining_tables, s);
           continue;
         }
!       /*
!         Prune plan by sorting cost if:
!         1) there is ORDER or GROUP clause
!         2) all necessary tables are already joined
!         3) we need to sort (tables to be sorted aren't const or eq_ref'ed to
!            them)
!       */
!       if (need_sort &&                                // (1)
!           !(join->order_map & remaining_tables))      // (2)
!       {
!         if (current_record_count == 1)                // (3)
!           need_sort= false;
!         else if (current_read_time + current_record_count > join->best_read)
!         {
!           DBUG_EXECUTE("opt", print_plan(join, idx+1,
!                                          current_record_count,
!                                          read_time,
!                                          current_read_time,
!                                          "prune_by_sort_cost"););
!           backout_nj_sj_state(remaining_tables, s);
!           continue;
!         }
!       }
         /*
           Prune some less promising partial plans. This heuristic may miss
           the optimal QEPs, thus it results in a non-exhaustive search.
*************** best_extension_by_limited_search(JOIN
*** 8469,8475 ****
                                                  current_record_count,
                                                  current_read_time,
                                                  search_depth - 1,
!                                                prune_level);
                 if (eq_ref_extended == ~(table_map)0)
                   DBUG_RETURN(TRUE);      // Failed
                 if (eq_ref_extended == remaining_tables)
--- 8507,8513 ----
                                                  current_record_count,
                                                  current_read_time,
                                                  search_depth - 1,
!                                                prune_level, need_sort);
                 if (eq_ref_extended == ~(table_map)0)
                   DBUG_RETURN(TRUE);      // Failed
                 if (eq_ref_extended == remaining_tables)
*************** best_extension_by_limited_search(JOIN
*** 8494,8500 ****
                                                  current_record_count,
                                                  current_read_time,
                                                  search_depth - 1,
!                                                prune_level))
               DBUG_RETURN(TRUE);
           } while (false);
         }
--- 8532,8539 ----
                                                  current_record_count,
                                                  current_read_time,
                                                  search_depth - 1,
!                                                prune_level,
!                                                need_sort))
               DBUG_RETURN(TRUE);
           } while (false);
         }
*************** eq_ref_extension_by_limited_search(JOIN
*** 8631,8637 ****
                                      double    record_count,
                                      double    read_time,
                                      uint      search_depth,
!                                    uint      prune_level)
   {
     DBUG_ENTER("eq_ref_extension_by_limited_search");

--- 8670,8677 ----
                                      double    record_count,
                                      double    read_time,
                                      uint      search_depth,
!                                    uint      prune_level,
!                                    bool      need_sort)
   {
     DBUG_ENTER("eq_ref_extension_by_limited_search");

*************** eq_ref_extension_by_limited_search(JOIN
*** 8750,8756 ****
                                                      current_read_time,
                                                      (search_depth > 1)
                                                         ? search_depth - 1 : 0,
!                                                    prune_level);
             }
             else
             {
--- 8790,8796 ----
                                                      current_read_time,
                                                      (search_depth > 1)
                                                         ? search_depth - 1 : 0,
!                                                    prune_level, need_sort);
             }
             else
             {
*************** eq_ref_extension_by_limited_search(JOIN
*** 8778,8784 ****
                                            record_count,
                                            read_time,
                                            search_depth,
!                                          prune_level))
         DBUG_RETURN(~(table_map)0);

       DBUG_RETURN(eq_ref_ext);
--- 8818,8825 ----
                                            record_count,
                                            read_time,
                                            search_depth,
!                                          prune_level,
!                                          need_sort))
         DBUG_RETURN(~(table_map)0);

       DBUG_RETURN(eq_ref_ext);


----------------------

On 02/01/2011 01:27 PM, Ole John Aske wrote:
> #At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on
> revid:ole.john.aske@stripped
>
>   3553 Ole John Aske	2011-02-01
>        Last addendum fix related to bug#59326, and particularly
>        its relatives/duplicates bug#41740 and bug#58225.
>
>        If a filesort is required by the final result, we can include
>        the cost for the filesort in the pruning of 'bad' plans, and
>        possible prune the plan at an earlier stage than today.
>
>        NOTE1: The 'sort_cost' is calculated with logic similar
>               to the one in plan_is_complete() which adds the sort_cost
>               to the best plan.
>
>        NOTE2: The 'sort_cost' is calculated as '= current_record_count'.
>               This is not entirely correct when used in the pruning logic
>               as we do not yet #rows in the *final* result which is what
>               we have to sort. However, it should be a safe assumption that
>               'final #rows'>= 'intermediat #rows'. So this prune-calculation
>               should always be on the safe side.
>
>        NOTE3: Calculating 'sort_cost= current_record_count' is a bad
>               estimate as most sort algorithms are O(n*log(n)).
>
>        This fix reduce the query compile time for the infamous bug#58225 problem
>        further from 50ms to ~20ms
>
>      modified:
>        sql/sql_select.cc
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc	2011-02-01 09:43:41 +0000
> +++ b/sql/sql_select.cc	2011-02-01 10:27:28 +0000
> @@ -8325,7 +8325,31 @@ best_extension_by_limited_search(JOIN
>             backout_nj_sj_state(remaining_tables, s);
>             continue;
>           }
> -      }
> +
> +        /*
> +          There might later apply an addition cost for filesorting
> +          if the result can't be ordered by using an ordered index.
> +          Can prune if 'cost sofar + later_sort' already exceeds 'best_read'.
> +          Note: Similar code in plan_is_complete().
> +        */
> +        if (idx>  join->const_tables&&   //
> '->positions[const_tables]' valid?
> +            join->sort_by_table&&
> +            join->sort_by_table !=
> +            join->positions[join->const_tables].table->table)
> +        {
> +          double sort_time = current_record_count;
> +          if (current_read_time + sort_time>= join->best_read)
> +          {
> +            DBUG_EXECUTE("opt", print_plan(join, idx+1,
> +                                           current_record_count,
> +                                           read_time,
> +                                           current_read_time,
> +                                           "prune_by_sortcost_heuristic"););
> +            backout_nj_sj_state(remaining_tables, s);
> +            continue;
> +          }
> +        }
> +      } // if (prune...)
>
>         if ( (search_depth>  1)&&  (remaining_tables& 
> ~real_table_bit)&  allowed_tables )
>         {
>
>
>
>

Thread
bzr commit into mysql-trunk branch (ole.john.aske:3553) Bug#41740 Bug#58225Bug#59326Ole John Aske1 Feb
  • Re: bzr commit into mysql-trunk branch (ole.john.aske:3553) Bug#41740Bug#58225 Bug#59326Olav Sandstaa19 Apr
  • Re: bzr commit into mysql-trunk branch (ole.john.aske:3553) Bug#41740Bug#58225 Bug#59326Evgeny Potemkin28 Apr