From: Roy Lyseng Date: May 2 2011 1:22pm Subject: bzr push into mysql-trunk branch (roy.lyseng:3366 to 3367) Bug#12407753 List-Archive: http://lists.mysql.com/commits/136524 X-Bug: 12407753 Message-Id: <20110502132258.CBCCA1F4@tyr67.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3367 Roy Lyseng 2011-05-02 Bug#12407753: Time to compare a row is missing in cost calculation of semi-join Stage 1: Change cost factors to be multiplicative Basically, this patch changes the constant values TIME_FOR_COMPARE and TIME_FOR_COMPARE_ROWID to be the inverse of their previous values and gives them names that are more similar to other cost factors. As a consequence, all cost calculations involving these constants are changed from divisions to multiplications. sql/filesort.cc Replaced cost divisions with multiplications. sql/filesort_utils.cc Replaced cost divisions with multiplications. sql/ha_ndbcluster.cc Replaced cost divisions with multiplications. Removed redundant cast to (double). sql/handler.cc Replaced cost divisions with multiplications. Removed redundant cast to (double). sql/opt_range.cc Replaced cost divisions with multiplications. Removed redundant casts to (double). sql/sql_const.h Replaced definition of TIME_FOR_COMPARE with ROW_EVALUATE_COST. Replaced definition of TIME_FOR_COMPARE_ROWID with ROWID_COMPARE_COST. sql/sql_select.cc Replaced cost divisions with multiplications. Removed redundant casts to (double). sql/uniques.cc Replaced cost divisions with multiplications. modified: sql/filesort.cc sql/filesort_utils.cc sql/ha_ndbcluster.cc sql/handler.cc sql/opt_range.cc sql/sql_const.h sql/sql_select.cc sql/uniques.cc 3366 Tor Didriksen 2011-04-28 [merge] NULL merge trunk => opt-backporting === modified file 'sql/filesort.cc' --- a/sql/filesort.cc 2011-04-04 08:47:25 +0000 +++ b/sql/filesort.cc 2011-05-02 13:22:25 +0000 @@ -1226,7 +1226,7 @@ bool check_if_pq_applicable(Sort_param * row_length); /* PQ has cost: - (insert + qsort) * log(queue size) / TIME_FOR_COMPARE_ROWID + + (insert + qsort) * log(queue size) * ROWID_COMPARE_COST + cost of file lookup afterwards. The lookup cost is a bit pessimistic: we take scan_time and assume that on average we find the row after scanning half of the file. @@ -1235,7 +1235,7 @@ bool check_if_pq_applicable(Sort_param * */ const double pq_cpu_cost= (PQ_slowness * num_rows + param->max_keys_per_buffer) * - log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID; + log((double) param->max_keys_per_buffer) * ROWID_COMPARE_COST; const double pq_io_cost= param->max_rows * table->file->scan_time() / 2.0; const double pq_cost= pq_cpu_cost + pq_io_cost; === modified file 'sql/filesort_utils.cc' --- a/sql/filesort_utils.cc 2010-12-17 09:41:21 +0000 +++ b/sql/filesort_utils.cc 2011-05-02 13:22:25 +0000 @@ -26,8 +26,7 @@ double get_merge_cost(ha_rows num_elemen { return 2.0 * ((double) num_elements * elem_size) / IO_SIZE - + (double) num_elements * log((double) num_buffers) / - (TIME_FOR_COMPARE_ROWID * M_LN2); + + num_elements * log((double) num_buffers) * ROWID_COMPARE_COST / M_LN2; } } @@ -49,8 +48,7 @@ double get_merge_many_buffs_cost_fast(ha // Calculate CPU cost of sorting buffers. total_cost= ( num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) + - last_n_elems * log(1.0 + last_n_elems) ) - / TIME_FOR_COMPARE_ROWID; + last_n_elems * log(1.0 + last_n_elems) ) * ROWID_COMPARE_COST; // Simulate behavior of merge_many_buff(). while (num_buffers >= MERGEBUFF2) === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2011-04-19 03:29:06 +0000 +++ b/sql/ha_ndbcluster.cc 2011-05-02 13:22:25 +0000 @@ -9033,7 +9033,7 @@ ha_ndbcluster::multi_range_read_info_con cost->io_count= index_only_read_time(keyno, total_rows); else cost->io_count= read_time(keyno, n_ranges, total_rows); - cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01; } return total_rows; } === modified file 'sql/handler.cc' --- a/sql/handler.cc 2011-04-28 11:55:16 +0000 +++ b/sql/handler.cc 2011-05-02 13:22:25 +0000 @@ -4559,7 +4559,7 @@ handler::multi_range_read_info_const(uin cost->io_count= index_only_read_time(keyno, total_rows); else cost->io_count= read_time(keyno, n_ranges, total_rows); - cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01; } return total_rows; } @@ -5305,7 +5305,7 @@ void get_sort_and_sweep_cost(TABLE *tabl { get_sweep_read_cost(table, nrows, FALSE, cost); /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ - double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); + double cmp_op= rows2double(nrows) * ROWID_COMPARE_COST; if (cmp_op < 3) cmp_op= 3; cost->cpu_cost += cmp_op * log2(cmp_op); === modified file 'sql/opt_range.cc' --- a/sql/opt_range.cc 2011-04-23 20:44:45 +0000 +++ b/sql/opt_range.cc 2011-05-02 13:22:25 +0000 @@ -2223,8 +2223,8 @@ int SQL_SELECT::test_quick_select(THD *t records= head->file->stats.records; if (!records) records++; /* purecov: inspected */ - scan_time= (double) records / TIME_FOR_COMPARE + 1; - read_time= (double) head->file->scan_time() + scan_time + 1.1; + scan_time= records * ROW_EVALUATE_COST + 1; + read_time= head->file->scan_time() + scan_time + 1.1; if (head->force_index) scan_time= read_time= DBL_MAX; if (limit < records) @@ -2324,7 +2324,7 @@ int SQL_SELECT::test_quick_select(THD *t double key_read_time= param.table->file->index_only_read_time(key_for_use, rows2double(records)) + - (double) records / TIME_FOR_COMPARE; + records * ROW_EVALUATE_COST; DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " "read time %g", key_for_use, key_read_time)); if (key_read_time < read_time) @@ -3877,7 +3877,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick Add one ROWID comparison for each row retrieved on non-CPK scan. (it is done in QUICK_RANGE_SELECT::row_in_ranges) */ - imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID; + imerge_cost += non_cpk_scan_records * ROWID_COMPARE_COST; } /* Calculate cost(rowid_to_row_scan) */ @@ -3966,7 +3966,7 @@ skip_to_ror_scan: cost= param->table->file-> read_time(param->real_keynr[(*cur_child)->key_idx], 1, (*cur_child)->records) + - rows2double((*cur_child)->records) / TIME_FOR_COMPARE; + rows2double((*cur_child)->records) * ROW_EVALUATE_COST; } else cost= read_time; @@ -4013,8 +4013,8 @@ skip_to_ror_scan: get_sweep_read_cost(param->table, roru_total_records, is_interrupted, &sweep_cost); roru_total_cost= roru_index_costs + - rows2double(roru_total_records)*log((double)n_child_scans) / - (TIME_FOR_COMPARE_ROWID * M_LN2) + + rows2double(roru_total_records) * + log((double)n_child_scans) * ROWID_COMPARE_COST / M_LN2 + sweep_cost.total_cost(); } @@ -4469,11 +4469,11 @@ static bool ror_intersect_add(ROR_INTERS { /* CPK scan is used to filter out rows. We apply filtering for - each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID + each record of every scan. Assuming ROWID_COMPARE_COST per check this gives us: */ - info->index_scan_costs += rows2double(info->index_records) / - TIME_FOR_COMPARE_ROWID; + info->index_scan_costs += rows2double(info->index_records) * + ROWID_COMPARE_COST; } else { @@ -4856,9 +4856,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror tree->ror_scans, ror_scan_mark);); /* Add priority queue use cost. */ - total_cost += rows2double(records)* - log((double)(ror_scan_mark - tree->ror_scans)) / - (TIME_FOR_COMPARE_ROWID * M_LN2); + total_cost += rows2double(records) * + log((double)(ror_scan_mark - tree->ror_scans)) * + ROWID_COMPARE_COST / M_LN2; DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost)); if (total_cost > read_time) @@ -10529,7 +10529,7 @@ void cost_group_min_max(TABLE* table, KE no CPU cost. We leave it here to make this cost comparable to that of index scan as computed in SQL_SELECT::test_quick_select(). */ - cpu_cost= (double) num_groups / TIME_FOR_COMPARE; + cpu_cost= num_groups * ROW_EVALUATE_COST; *read_cost= io_cost + cpu_cost; *records= num_groups; === modified file 'sql/sql_const.h' --- a/sql/sql_const.h 2010-12-17 09:41:21 +0000 +++ b/sql/sql_const.h 2011-05-02 13:22:25 +0000 @@ -158,16 +158,15 @@ /** The following is used to decide if MySQL should use table scanning - instead of reading with keys. The number says how many evaluation of the - WHERE clause is comparable to reading one extra row from a table. + instead of reading with keys. The number says how costly evaluation of the + filter condition for a row is compared to reading one extra row from a table. */ -#define TIME_FOR_COMPARE 5.0 // 5 compares == one read +#define ROW_EVALUATE_COST 0.20 /** - Number of comparisons of table rowids equivalent to reading one row from a - table. + Cost of comparing a rowid compared to reading one row from a table. */ -#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2.0) +#define ROWID_COMPARE_COST 0.10 // Half the cost of a general row comparison /* For sequential disk seeks the cost formula is: === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2011-04-28 11:55:16 +0000 +++ b/sql/sql_select.cc 2011-05-02 13:22:25 +0000 @@ -7452,12 +7452,12 @@ best_access_path(JOIN *join, loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ - if (tmp < best_time - records/(double) TIME_FOR_COMPARE || + if (tmp < best_time - records * ROW_EVALUATE_COST || (quick_matches_more_parts && quick_records < best_quick_records)) { best_quick_records = quick_records; - best_time= tmp + records/(double) TIME_FOR_COMPARE; + best_time= tmp + records * ROW_EVALUATE_COST; best= tmp; best_records= records; best_key= start_key; @@ -7542,7 +7542,7 @@ best_access_path(JOIN *join, */ tmp= record_count * (s->quick->read_time + - (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE); + (s->found_records - rnd_records) * ROW_EVALUATE_COST); loose_scan_opt.check_range_access(join, idx, s->quick); } @@ -7562,8 +7562,7 @@ best_access_path(JOIN *join, - skip rows which does not satisfy join condition */ tmp= record_count * - (tmp + - (s->records - rnd_records)/(double) TIME_FOR_COMPARE); + (tmp + (s->records - rnd_records) * ROW_EVALUATE_COST); } else { @@ -7582,18 +7581,18 @@ best_access_path(JOIN *join, we read the table (see flush_cached_records for details). Here we take into account cost to read and skip these records. */ - tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE; + tmp+= (s->records - rnd_records) * ROW_EVALUATE_COST; } } /* We estimate the cost of evaluating WHERE clause for found records - as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus + as record_count * rnd_records * ROW_EVALUATE_COST. This cost plus tmp give us total cost of using TABLE SCAN */ if (best == DBL_MAX || - (tmp + record_count/(double) TIME_FOR_COMPARE*rnd_records < - best + record_count/(double) TIME_FOR_COMPARE*records)) + (tmp + (record_count * ROW_EVALUATE_COST * rnd_records) < + best + (record_count * ROW_EVALUATE_COST * records))) { /* If the table has a range (s->quick is set) make_join_select() @@ -7914,7 +7913,7 @@ void Optimize_table_order::optimize_stra /* compute the cost of the new plan extended with 's' */ record_count*= join->positions[idx].records_read; read_time+= join->positions[idx].read_time - + record_count / (double) TIME_FOR_COMPARE; + + record_count * ROW_EVALUATE_COST; advance_sj_state(join_tables, s, idx, &record_count, &read_time, &loose_scan_pos); @@ -8168,7 +8167,7 @@ bool Optimize_table_order::greedy_search /* compute the cost of the new plan extended with 'best_table' */ record_count*= join->positions[idx].records_read; read_time+= join->positions[idx].read_time - + record_count / (double) TIME_FOR_COMPARE; + + record_count * ROW_EVALUATE_COST; remaining_tables&= ~(best_table->table->map); --size_remain; @@ -8214,7 +8213,7 @@ void get_partial_join_cost(JOIN *join, u { record_count *= join->best_positions[i].records_read; read_time += join->best_positions[i].read_time - + record_count / (double) TIME_FOR_COMPARE; + + record_count * ROW_EVALUATE_COST; } } *read_time_arg= read_time; @@ -8393,7 +8392,7 @@ bool Optimize_table_order::best_extensio current_record_count= record_count * position->records_read; current_read_time= read_time + position->read_time - + current_record_count / (double) TIME_FOR_COMPARE; + + current_record_count * ROW_EVALUATE_COST; if (has_sj) { === modified file 'sql/uniques.cc' --- a/sql/uniques.cc 2011-03-09 20:54:55 +0000 +++ b/sql/uniques.cc 2011-05-02 13:22:25 +0000 @@ -122,7 +122,7 @@ inline double log2_n_fact(double x) the same length, so each of total_buf_size elements will be added to a sort heap with (n_buffers-1) elements. This gives the comparison cost: - total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID; + total_buf_elems * log2(n_buffers) * ROWID_COMPARE_COST; */ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, @@ -137,7 +137,7 @@ static double get_merge_buffers_cost(uin /* Using log2(n)=log(n)/log(2) formula */ return 2*((double)total_buf_elems*elem_size) / IO_SIZE + - total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2); + total_buf_elems*log((double) n_buffers) * ROWID_COMPARE_COST / M_LN2; } @@ -267,7 +267,6 @@ double Unique::get_use_cost(uint *buffer ulong max_elements_in_tree; ulong last_tree_elems; int n_full_trees; /* number of trees in unique - 1 */ - double result; max_elements_in_tree= ((ulong) max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size)); @@ -276,10 +275,10 @@ double Unique::get_use_cost(uint *buffer last_tree_elems= nkeys % max_elements_in_tree; /* Calculate cost of creating trees */ - result= 2*log2_n_fact(last_tree_elems + 1.0); + double result= 2 * log2_n_fact(last_tree_elems + 1.0); if (n_full_trees) result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0); - result /= TIME_FOR_COMPARE_ROWID; + result*= ROWID_COMPARE_COST; DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys, n_full_trees, n_full_trees?max_elements_in_tree:0, No bundle (reason: useless for push emails).