#At file:///home/rl136806/mysql/repo/mysql-work5/ based on revid:tor.didriksen@stripped
3367 Roy Lyseng 2011-05-02
Bug#12407753: Time to compare a row is missing in cost calculation of semi-join
Stage 1: Change cost factors to be multiplicative
Basically, this patch changes the constant values TIME_FOR_COMPARE
and TIME_FOR_COMPARE_ROWID to be the inverse of their previous values
and gives them names that are more similar to other cost factors.
As a consequence, all cost calculations involving these constants
are changed from divisions to multiplications.
sql/filesort.cc
Replaced cost divisions with multiplications.
sql/filesort_utils.cc
Replaced cost divisions with multiplications.
sql/ha_ndbcluster.cc
Replaced cost divisions with multiplications.
Removed redundant cast to (double).
sql/handler.cc
Replaced cost divisions with multiplications.
Removed redundant cast to (double).
sql/opt_range.cc
Replaced cost divisions with multiplications.
Removed redundant casts to (double).
sql/sql_const.h
Replaced definition of TIME_FOR_COMPARE with ROW_EVALUATE_COST.
Replaced definition of TIME_FOR_COMPARE_ROWID with ROWID_COMPARE_COST.
sql/sql_select.cc
Replaced cost divisions with multiplications.
Removed redundant casts to (double).
sql/uniques.cc
Replaced cost divisions with multiplications.
modified:
sql/filesort.cc
sql/filesort_utils.cc
sql/ha_ndbcluster.cc
sql/handler.cc
sql/opt_range.cc
sql/sql_const.h
sql/sql_select.cc
sql/uniques.cc
=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc 2011-04-04 08:47:25 +0000
+++ b/sql/filesort.cc 2011-05-02 13:22:25 +0000
@@ -1226,7 +1226,7 @@ bool check_if_pq_applicable(Sort_param *
row_length);
/*
PQ has cost:
- (insert + qsort) * log(queue size) / TIME_FOR_COMPARE_ROWID +
+ (insert + qsort) * log(queue size) * ROWID_COMPARE_COST +
cost of file lookup afterwards.
The lookup cost is a bit pessimistic: we take scan_time and assume
that on average we find the row after scanning half of the file.
@@ -1235,7 +1235,7 @@ bool check_if_pq_applicable(Sort_param *
*/
const double pq_cpu_cost=
(PQ_slowness * num_rows + param->max_keys_per_buffer) *
- log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID;
+ log((double) param->max_keys_per_buffer) * ROWID_COMPARE_COST;
const double pq_io_cost=
param->max_rows * table->file->scan_time() / 2.0;
const double pq_cost= pq_cpu_cost + pq_io_cost;
=== modified file 'sql/filesort_utils.cc'
--- a/sql/filesort_utils.cc 2010-12-17 09:41:21 +0000
+++ b/sql/filesort_utils.cc 2011-05-02 13:22:25 +0000
@@ -26,8 +26,7 @@ double get_merge_cost(ha_rows num_elemen
{
return
2.0 * ((double) num_elements * elem_size) / IO_SIZE
- + (double) num_elements * log((double) num_buffers) /
- (TIME_FOR_COMPARE_ROWID * M_LN2);
+ + num_elements * log((double) num_buffers) * ROWID_COMPARE_COST / M_LN2;
}
}
@@ -49,8 +48,7 @@ double get_merge_many_buffs_cost_fast(ha
// Calculate CPU cost of sorting buffers.
total_cost=
( num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) +
- last_n_elems * log(1.0 + last_n_elems) )
- / TIME_FOR_COMPARE_ROWID;
+ last_n_elems * log(1.0 + last_n_elems) ) * ROWID_COMPARE_COST;
// Simulate behavior of merge_many_buff().
while (num_buffers >= MERGEBUFF2)
=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc 2011-04-19 03:29:06 +0000
+++ b/sql/ha_ndbcluster.cc 2011-05-02 13:22:25 +0000
@@ -9033,7 +9033,7 @@ ha_ndbcluster::multi_range_read_info_con
cost->io_count= index_only_read_time(keyno, total_rows);
else
cost->io_count= read_time(keyno, n_ranges, total_rows);
- cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+ cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01;
}
return total_rows;
}
=== modified file 'sql/handler.cc'
--- a/sql/handler.cc 2011-04-28 11:55:16 +0000
+++ b/sql/handler.cc 2011-05-02 13:22:25 +0000
@@ -4559,7 +4559,7 @@ handler::multi_range_read_info_const(uin
cost->io_count= index_only_read_time(keyno, total_rows);
else
cost->io_count= read_time(keyno, n_ranges, total_rows);
- cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+ cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01;
}
return total_rows;
}
@@ -5305,7 +5305,7 @@ void get_sort_and_sweep_cost(TABLE *tabl
{
get_sweep_read_cost(table, nrows, FALSE, cost);
/* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
- double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+ double cmp_op= rows2double(nrows) * ROWID_COMPARE_COST;
if (cmp_op < 3)
cmp_op= 3;
cost->cpu_cost += cmp_op * log2(cmp_op);
=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc 2011-04-23 20:44:45 +0000
+++ b/sql/opt_range.cc 2011-05-02 13:22:25 +0000
@@ -2223,8 +2223,8 @@ int SQL_SELECT::test_quick_select(THD *t
records= head->file->stats.records;
if (!records)
records++; /* purecov: inspected */
- scan_time= (double) records / TIME_FOR_COMPARE + 1;
- read_time= (double) head->file->scan_time() + scan_time + 1.1;
+ scan_time= records * ROW_EVALUATE_COST + 1;
+ read_time= head->file->scan_time() + scan_time + 1.1;
if (head->force_index)
scan_time= read_time= DBL_MAX;
if (limit < records)
@@ -2324,7 +2324,7 @@ int SQL_SELECT::test_quick_select(THD *t
double key_read_time=
param.table->file->index_only_read_time(key_for_use,
rows2double(records)) +
- (double) records / TIME_FOR_COMPARE;
+ records * ROW_EVALUATE_COST;
DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
"read time %g", key_for_use, key_read_time));
if (key_read_time < read_time)
@@ -3877,7 +3877,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick
Add one ROWID comparison for each row retrieved on non-CPK scan. (it
is done in QUICK_RANGE_SELECT::row_in_ranges)
*/
- imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
+ imerge_cost += non_cpk_scan_records * ROWID_COMPARE_COST;
}
/* Calculate cost(rowid_to_row_scan) */
@@ -3966,7 +3966,7 @@ skip_to_ror_scan:
cost= param->table->file->
read_time(param->real_keynr[(*cur_child)->key_idx], 1,
(*cur_child)->records) +
- rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
+ rows2double((*cur_child)->records) * ROW_EVALUATE_COST;
}
else
cost= read_time;
@@ -4013,8 +4013,8 @@ skip_to_ror_scan:
get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
&sweep_cost);
roru_total_cost= roru_index_costs +
- rows2double(roru_total_records)*log((double)n_child_scans) /
- (TIME_FOR_COMPARE_ROWID * M_LN2) +
+ rows2double(roru_total_records) *
+ log((double)n_child_scans) * ROWID_COMPARE_COST / M_LN2 +
sweep_cost.total_cost();
}
@@ -4469,11 +4469,11 @@ static bool ror_intersect_add(ROR_INTERS
{
/*
CPK scan is used to filter out rows. We apply filtering for
- each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
+ each record of every scan. Assuming ROWID_COMPARE_COST
per check this gives us:
*/
- info->index_scan_costs += rows2double(info->index_records) /
- TIME_FOR_COMPARE_ROWID;
+ info->index_scan_costs += rows2double(info->index_records) *
+ ROWID_COMPARE_COST;
}
else
{
@@ -4856,9 +4856,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror
tree->ror_scans, ror_scan_mark););
/* Add priority queue use cost. */
- total_cost += rows2double(records)*
- log((double)(ror_scan_mark - tree->ror_scans)) /
- (TIME_FOR_COMPARE_ROWID * M_LN2);
+ total_cost += rows2double(records) *
+ log((double)(ror_scan_mark - tree->ror_scans)) *
+ ROWID_COMPARE_COST / M_LN2;
DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
if (total_cost > read_time)
@@ -10529,7 +10529,7 @@ void cost_group_min_max(TABLE* table, KE
no CPU cost. We leave it here to make this cost comparable to that of index
scan as computed in SQL_SELECT::test_quick_select().
*/
- cpu_cost= (double) num_groups / TIME_FOR_COMPARE;
+ cpu_cost= num_groups * ROW_EVALUATE_COST;
*read_cost= io_cost + cpu_cost;
*records= num_groups;
=== modified file 'sql/sql_const.h'
--- a/sql/sql_const.h 2010-12-17 09:41:21 +0000
+++ b/sql/sql_const.h 2011-05-02 13:22:25 +0000
@@ -158,16 +158,15 @@
/**
The following is used to decide if MySQL should use table scanning
- instead of reading with keys. The number says how many evaluation of the
- WHERE clause is comparable to reading one extra row from a table.
+ instead of reading with keys. The number says how costly evaluation of the
+ filter condition for a row is compared to reading one extra row from a table.
*/
-#define TIME_FOR_COMPARE 5.0 // 5 compares == one read
+#define ROW_EVALUATE_COST 0.20
/**
- Number of comparisons of table rowids equivalent to reading one row from a
- table.
+ Cost of comparing a rowid compared to reading one row from a table.
*/
-#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2.0)
+#define ROWID_COMPARE_COST 0.10 // Half the cost of a general row comparison
/*
For sequential disk seeks the cost formula is:
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2011-04-28 11:55:16 +0000
+++ b/sql/sql_select.cc 2011-05-02 13:22:25 +0000
@@ -7452,12 +7452,12 @@ best_access_path(JOIN *join,
loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
} /* not ft_key */
- if (tmp < best_time - records/(double) TIME_FOR_COMPARE ||
+ if (tmp < best_time - records * ROW_EVALUATE_COST ||
(quick_matches_more_parts &&
quick_records < best_quick_records))
{
best_quick_records = quick_records;
- best_time= tmp + records/(double) TIME_FOR_COMPARE;
+ best_time= tmp + records * ROW_EVALUATE_COST;
best= tmp;
best_records= records;
best_key= start_key;
@@ -7542,7 +7542,7 @@ best_access_path(JOIN *join,
*/
tmp= record_count *
(s->quick->read_time +
- (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+ (s->found_records - rnd_records) * ROW_EVALUATE_COST);
loose_scan_opt.check_range_access(join, idx, s->quick);
}
@@ -7562,8 +7562,7 @@ best_access_path(JOIN *join,
- skip rows which does not satisfy join condition
*/
tmp= record_count *
- (tmp +
- (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+ (tmp + (s->records - rnd_records) * ROW_EVALUATE_COST);
}
else
{
@@ -7582,18 +7581,18 @@ best_access_path(JOIN *join,
we read the table (see flush_cached_records for details). Here we
take into account cost to read and skip these records.
*/
- tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+ tmp+= (s->records - rnd_records) * ROW_EVALUATE_COST;
}
}
/*
We estimate the cost of evaluating WHERE clause for found records
- as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
+ as record_count * rnd_records * ROW_EVALUATE_COST. This cost plus
tmp give us total cost of using TABLE SCAN
*/
if (best == DBL_MAX ||
- (tmp + record_count/(double) TIME_FOR_COMPARE*rnd_records <
- best + record_count/(double) TIME_FOR_COMPARE*records))
+ (tmp + (record_count * ROW_EVALUATE_COST * rnd_records) <
+ best + (record_count * ROW_EVALUATE_COST * records)))
{
/*
If the table has a range (s->quick is set) make_join_select()
@@ -7914,7 +7913,7 @@ void Optimize_table_order::optimize_stra
/* compute the cost of the new plan extended with 's' */
record_count*= join->positions[idx].records_read;
read_time+= join->positions[idx].read_time
- + record_count / (double) TIME_FOR_COMPARE;
+ + record_count * ROW_EVALUATE_COST;
advance_sj_state(join_tables, s, idx, &record_count, &read_time,
&loose_scan_pos);
@@ -8168,7 +8167,7 @@ bool Optimize_table_order::greedy_search
/* compute the cost of the new plan extended with 'best_table' */
record_count*= join->positions[idx].records_read;
read_time+= join->positions[idx].read_time
- + record_count / (double) TIME_FOR_COMPARE;
+ + record_count * ROW_EVALUATE_COST;
remaining_tables&= ~(best_table->table->map);
--size_remain;
@@ -8214,7 +8213,7 @@ void get_partial_join_cost(JOIN *join, u
{
record_count *= join->best_positions[i].records_read;
read_time += join->best_positions[i].read_time
- + record_count / (double) TIME_FOR_COMPARE;
+ + record_count * ROW_EVALUATE_COST;
}
}
*read_time_arg= read_time;
@@ -8393,7 +8392,7 @@ bool Optimize_table_order::best_extensio
current_record_count= record_count * position->records_read;
current_read_time= read_time
+ position->read_time
- + current_record_count / (double) TIME_FOR_COMPARE;
+ + current_record_count * ROW_EVALUATE_COST;
if (has_sj)
{
=== modified file 'sql/uniques.cc'
--- a/sql/uniques.cc 2011-03-09 20:54:55 +0000
+++ b/sql/uniques.cc 2011-05-02 13:22:25 +0000
@@ -122,7 +122,7 @@ inline double log2_n_fact(double x)
the same length, so each of total_buf_size elements will be added to a sort
heap with (n_buffers-1) elements. This gives the comparison cost:
- total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
+ total_buf_elems * log2(n_buffers) * ROWID_COMPARE_COST;
*/
static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
@@ -137,7 +137,7 @@ static double get_merge_buffers_cost(uin
/* Using log2(n)=log(n)/log(2) formula */
return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
- total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
+ total_buf_elems*log((double) n_buffers) * ROWID_COMPARE_COST / M_LN2;
}
@@ -267,7 +267,6 @@ double Unique::get_use_cost(uint *buffer
ulong max_elements_in_tree;
ulong last_tree_elems;
int n_full_trees; /* number of trees in unique - 1 */
- double result;
max_elements_in_tree= ((ulong) max_in_memory_size /
ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
@@ -276,10 +275,10 @@ double Unique::get_use_cost(uint *buffer
last_tree_elems= nkeys % max_elements_in_tree;
/* Calculate cost of creating trees */
- result= 2*log2_n_fact(last_tree_elems + 1.0);
+ double result= 2 * log2_n_fact(last_tree_elems + 1.0);
if (n_full_trees)
result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
- result /= TIME_FOR_COMPARE_ROWID;
+ result*= ROWID_COMPARE_COST;
DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
n_full_trees, n_full_trees?max_elements_in_tree:0,
Attachment: [text/bzr-bundle] bzr/roy.lyseng@oracle.com-20110502132225-p0aanleg1clco1zj.bundle
| Thread |
|---|
| • bzr commit into mysql-trunk branch (roy.lyseng:3367) Bug#12407753 | Roy Lyseng | 2 May |