Hi Ole John,
The idea to prune plans earlier is good, but the check itself is ineffective.
At the moment of the check sort_by_table points to a table to be sorted if:
1) there is only one of order by and group by lists or they are 'compatible'
(i.e the same)
2) all order by/group by items are from the only one table
otherwise it's NULL.
And the check effectively tests if such table isn't a const one.
Obviously it's far from covering real order by usage.
I suggest to replace it for another one. It would consist of two steps:
1) build a table map of all tables involved into order by/group by
2) check if all needed tables are present in the partial plan and if so compare cost
Prune to taste.
This check should replace old one in best_extension..., eq_ref_extension...,
plan_is_complete functions.
I attached a sample patch below.
Regards, Evgen.
----------------------
=== modified file 'sql/sql_select.cc'
*** sql/sql_select.cc 2011-04-28 12:44:53 +0000
--- sql/sql_select.cc 2011-04-28 12:45:02 +0000
*************** static bool best_extension_by_limited_se
*** 85,96 ****
table_map remaining_tables,
uint idx, double record_count,
double read_time, uint depth,
! uint prune_level);
static table_map eq_ref_extension_by_limited_search(JOIN *join,
table_map remaining_tables,
uint idx, double record_count,
double read_time, uint depth,
! uint prune_level);
static uint determine_search_depth(JOIN* join);
C_MODE_START
static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
--- 85,98 ----
table_map remaining_tables,
uint idx, double record_count,
double read_time, uint depth,
! uint prune_level,
! bool need_sort);
static table_map eq_ref_extension_by_limited_search(JOIN *join,
table_map remaining_tables,
uint idx, double record_count,
double read_time, uint depth,
! uint prune_level,
! bool need_sort);
static uint determine_search_depth(JOIN* join);
C_MODE_START
static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
*************** JOIN::optimize()
*** 1955,1960 ****
--- 1957,1974 ----
}
error= -1; // Error is sent to client
sort_by_table= get_sort_by_table(order, group_list, select_lex->leaf_tables);
+ if (order || group_list)
+ {
+ for (ORDER *ord= order; ord; ord= ord->next)
+ order_map|= ord->depend_map;
+ for (ORDER *ord= group_list; ord; ord= ord->next)
+ order_map|= ord->depend_map;
+ }
/* Calculate how to do the join */
THD_STAGE_INFO(thd, stage_statistics);
*************** greedy_search(JOIN *join,
*** 8035,8041 ****
/* Find the extension of the current QEP with the lowest cost */
join->best_read= DBL_MAX;
if (best_extension_by_limited_search(join, remaining_tables, idx,
record_count,
! read_time, search_depth, prune_level))
DBUG_RETURN(TRUE);
/*
'best_read < DBL_MAX' means that optimizer managed to find
--- 8049,8056 ----
/* Find the extension of the current QEP with the lowest cost */
join->best_read= DBL_MAX;
if (best_extension_by_limited_search(join, remaining_tables, idx,
record_count,
! read_time, search_depth,
! prune_level, join->order_map != 0))
DBUG_RETURN(TRUE);
/*
'best_read < DBL_MAX' means that optimizer managed to find
*************** best_extension_by_limited_search(JOIN
*** 8313,8319 ****
double record_count,
double read_time,
uint search_depth,
! uint prune_level)
{
DBUG_ENTER("best_extension_by_limited_search");
--- 8328,8335 ----
double record_count,
double read_time,
uint search_depth,
! uint prune_level,
! bool need_sort)
{
DBUG_ENTER("best_extension_by_limited_search");
*************** best_extension_by_limited_search(JOIN
*** 8401,8407 ****
backout_nj_sj_state(remaining_tables, s);
continue;
}
!
/*
Prune some less promising partial plans. This heuristic may miss
the optimal QEPs, thus it results in a non-exhaustive search.
--- 8417,8445 ----
backout_nj_sj_state(remaining_tables, s);
continue;
}
! /*
! Prune plan by sorting cost if:
! 1) there is ORDER or GROUP clause
! 2) all necessary tables are already joined
! 3) we need to sort (tables to be sorted aren't const or eq_ref'ed to
! them)
! */
! if (need_sort && // (1)
! !(join->order_map & remaining_tables)) // (2)
! {
! if (current_record_count == 1) // (3)
! need_sort= false;
! else if (current_read_time + current_record_count > join->best_read)
! {
! DBUG_EXECUTE("opt", print_plan(join, idx+1,
! current_record_count,
! read_time,
! current_read_time,
! "prune_by_sort_cost"););
! backout_nj_sj_state(remaining_tables, s);
! continue;
! }
! }
/*
Prune some less promising partial plans. This heuristic may miss
the optimal QEPs, thus it results in a non-exhaustive search.
*************** best_extension_by_limited_search(JOIN
*** 8469,8475 ****
current_record_count,
current_read_time,
search_depth - 1,
! prune_level);
if (eq_ref_extended == ~(table_map)0)
DBUG_RETURN(TRUE); // Failed
if (eq_ref_extended == remaining_tables)
--- 8507,8513 ----
current_record_count,
current_read_time,
search_depth - 1,
! prune_level, need_sort);
if (eq_ref_extended == ~(table_map)0)
DBUG_RETURN(TRUE); // Failed
if (eq_ref_extended == remaining_tables)
*************** best_extension_by_limited_search(JOIN
*** 8494,8500 ****
current_record_count,
current_read_time,
search_depth - 1,
! prune_level))
DBUG_RETURN(TRUE);
} while (false);
}
--- 8532,8539 ----
current_record_count,
current_read_time,
search_depth - 1,
! prune_level,
! need_sort))
DBUG_RETURN(TRUE);
} while (false);
}
*************** eq_ref_extension_by_limited_search(JOIN
*** 8631,8637 ****
double record_count,
double read_time,
uint search_depth,
! uint prune_level)
{
DBUG_ENTER("eq_ref_extension_by_limited_search");
--- 8670,8677 ----
double record_count,
double read_time,
uint search_depth,
! uint prune_level,
! bool need_sort)
{
DBUG_ENTER("eq_ref_extension_by_limited_search");
*************** eq_ref_extension_by_limited_search(JOIN
*** 8750,8756 ****
current_read_time,
(search_depth > 1)
? search_depth - 1 : 0,
! prune_level);
}
else
{
--- 8790,8796 ----
current_read_time,
(search_depth > 1)
? search_depth - 1 : 0,
! prune_level, need_sort);
}
else
{
*************** eq_ref_extension_by_limited_search(JOIN
*** 8778,8784 ****
record_count,
read_time,
search_depth,
! prune_level))
DBUG_RETURN(~(table_map)0);
DBUG_RETURN(eq_ref_ext);
--- 8818,8825 ----
record_count,
read_time,
search_depth,
! prune_level,
! need_sort))
DBUG_RETURN(~(table_map)0);
DBUG_RETURN(eq_ref_ext);
----------------------
On 02/01/2011 01:27 PM, Ole John Aske wrote:
> #At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on
> revid:ole.john.aske@stripped
>
> 3553 Ole John Aske 2011-02-01
> Last addendum fix related to bug#59326, and particularly
> its relatives/duplicates bug#41740 and bug#58225.
>
> If a filesort is required by the final result, we can include
> the cost for the filesort in the pruning of 'bad' plans, and
> possible prune the plan at an earlier stage than today.
>
> NOTE1: The 'sort_cost' is calculated with logic similar
> to the one in plan_is_complete() which adds the sort_cost
> to the best plan.
>
> NOTE2: The 'sort_cost' is calculated as '= current_record_count'.
> This is not entirely correct when used in the pruning logic
> as we do not yet #rows in the *final* result which is what
> we have to sort. However, it should be a safe assumption that
> 'final #rows'>= 'intermediat #rows'. So this prune-calculation
> should always be on the safe side.
>
> NOTE3: Calculating 'sort_cost= current_record_count' is a bad
> estimate as most sort algorithms are O(n*log(n)).
>
> This fix reduce the query compile time for the infamous bug#58225 problem
> further from 50ms to ~20ms
>
> modified:
> sql/sql_select.cc
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2011-02-01 09:43:41 +0000
> +++ b/sql/sql_select.cc 2011-02-01 10:27:28 +0000
> @@ -8325,7 +8325,31 @@ best_extension_by_limited_search(JOIN
> backout_nj_sj_state(remaining_tables, s);
> continue;
> }
> - }
> +
> + /*
> + There might later apply an addition cost for filesorting
> + if the result can't be ordered by using an ordered index.
> + Can prune if 'cost sofar + later_sort' already exceeds 'best_read'.
> + Note: Similar code in plan_is_complete().
> + */
> + if (idx> join->const_tables&& //
> '->positions[const_tables]' valid?
> + join->sort_by_table&&
> + join->sort_by_table !=
> + join->positions[join->const_tables].table->table)
> + {
> + double sort_time = current_record_count;
> + if (current_read_time + sort_time>= join->best_read)
> + {
> + DBUG_EXECUTE("opt", print_plan(join, idx+1,
> + current_record_count,
> + read_time,
> + current_read_time,
> + "prune_by_sortcost_heuristic"););
> + backout_nj_sj_state(remaining_tables, s);
> + continue;
> + }
> + }
> + } // if (prune...)
>
> if ( (search_depth> 1)&& (remaining_tables&
> ~real_table_bit)& allowed_tables )
> {
>
>
>
>