List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:May 2 2011 1:22pm
Subject:bzr push into mysql-trunk branch (roy.lyseng:3366 to 3367) Bug#12407753
View as plain text  
 3367 Roy Lyseng	2011-05-02
      Bug#12407753: Time to compare a row is missing in cost calculation of semi-join
      
      Stage 1: Change cost factors to be multiplicative  
      
      Basically, this patch changes the constant values TIME_FOR_COMPARE
      and TIME_FOR_COMPARE_ROWID to be the inverse of their previous values
      and gives them names that are more similar to other cost factors.
      As a consequence, all cost calculations involving these constants
      are changed from divisions to multiplications.
      
      sql/filesort.cc
        Replaced cost divisions with multiplications.
      
      sql/filesort_utils.cc
        Replaced cost divisions with multiplications.
      
      sql/ha_ndbcluster.cc
        Replaced cost divisions with multiplications.
        Removed redundant cast to (double).
      
      sql/handler.cc
        Replaced cost divisions with multiplications.
        Removed redundant cast to (double).
      
      sql/opt_range.cc
        Replaced cost divisions with multiplications.
        Removed redundant casts to (double).
      
      sql/sql_const.h
        Replaced definition of TIME_FOR_COMPARE with ROW_EVALUATE_COST.
        Replaced definition of TIME_FOR_COMPARE_ROWID with ROWID_COMPARE_COST.
      
      sql/sql_select.cc
        Replaced cost divisions with multiplications.
        Removed redundant casts to (double).
      
      sql/uniques.cc
        Replaced cost divisions with multiplications.

    modified:
      sql/filesort.cc
      sql/filesort_utils.cc
      sql/ha_ndbcluster.cc
      sql/handler.cc
      sql/opt_range.cc
      sql/sql_const.h
      sql/sql_select.cc
      sql/uniques.cc
 3366 Tor Didriksen	2011-04-28 [merge]
      NULL merge trunk => opt-backporting

=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc	2011-04-04 08:47:25 +0000
+++ b/sql/filesort.cc	2011-05-02 13:22:25 +0000
@@ -1226,7 +1226,7 @@ bool check_if_pq_applicable(Sort_param *
                                        row_length);
       /*
         PQ has cost:
-        (insert + qsort) * log(queue size) / TIME_FOR_COMPARE_ROWID +
+        (insert + qsort) * log(queue size) * ROWID_COMPARE_COST +
         cost of file lookup afterwards.
         The lookup cost is a bit pessimistic: we take scan_time and assume
         that on average we find the row after scanning half of the file.
@@ -1235,7 +1235,7 @@ bool check_if_pq_applicable(Sort_param *
       */
       const double pq_cpu_cost= 
         (PQ_slowness * num_rows + param->max_keys_per_buffer) *
-        log((double) param->max_keys_per_buffer) / TIME_FOR_COMPARE_ROWID;
+        log((double) param->max_keys_per_buffer) * ROWID_COMPARE_COST;
       const double pq_io_cost=
         param->max_rows * table->file->scan_time() / 2.0;
       const double pq_cost= pq_cpu_cost + pq_io_cost;

=== modified file 'sql/filesort_utils.cc'
--- a/sql/filesort_utils.cc	2010-12-17 09:41:21 +0000
+++ b/sql/filesort_utils.cc	2011-05-02 13:22:25 +0000
@@ -26,8 +26,7 @@ double get_merge_cost(ha_rows num_elemen
 {
   return 
     2.0 * ((double) num_elements * elem_size) / IO_SIZE
-    + (double) num_elements * log((double) num_buffers) /
-      (TIME_FOR_COMPARE_ROWID * M_LN2);
+    + num_elements * log((double) num_buffers) * ROWID_COMPARE_COST / M_LN2;
 }
 }
 
@@ -49,8 +48,7 @@ double get_merge_many_buffs_cost_fast(ha
   // Calculate CPU cost of sorting buffers.
   total_cost=
     ( num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) +
-      last_n_elems * log(1.0 + last_n_elems) )
-    / TIME_FOR_COMPARE_ROWID;
+      last_n_elems * log(1.0 + last_n_elems) ) * ROWID_COMPARE_COST;
   
   // Simulate behavior of merge_many_buff().
   while (num_buffers >= MERGEBUFF2)

=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc	2011-04-19 03:29:06 +0000
+++ b/sql/ha_ndbcluster.cc	2011-05-02 13:22:25 +0000
@@ -9033,7 +9033,7 @@ ha_ndbcluster::multi_range_read_info_con
       cost->io_count= index_only_read_time(keyno, total_rows);
     else
       cost->io_count= read_time(keyno, n_ranges, total_rows);
-    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+    cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01;
   }
   return total_rows;
 }

=== modified file 'sql/handler.cc'
--- a/sql/handler.cc	2011-04-28 11:55:16 +0000
+++ b/sql/handler.cc	2011-05-02 13:22:25 +0000
@@ -4559,7 +4559,7 @@ handler::multi_range_read_info_const(uin
       cost->io_count= index_only_read_time(keyno, total_rows);
     else
       cost->io_count= read_time(keyno, n_ranges, total_rows);
-    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+    cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01;
   }
   return total_rows;
 }
@@ -5305,7 +5305,7 @@ void get_sort_and_sweep_cost(TABLE *tabl
   {
     get_sweep_read_cost(table, nrows, FALSE, cost);
     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
-    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+    double cmp_op= rows2double(nrows) * ROWID_COMPARE_COST;
     if (cmp_op < 3)
       cmp_op= 3;
     cost->cpu_cost += cmp_op * log2(cmp_op);

=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc	2011-04-23 20:44:45 +0000
+++ b/sql/opt_range.cc	2011-05-02 13:22:25 +0000
@@ -2223,8 +2223,8 @@ int SQL_SELECT::test_quick_select(THD *t
   records= head->file->stats.records;
   if (!records)
     records++;					/* purecov: inspected */
-  scan_time= (double) records / TIME_FOR_COMPARE + 1;
-  read_time= (double) head->file->scan_time() + scan_time + 1.1;
+  scan_time= records * ROW_EVALUATE_COST + 1;
+  read_time= head->file->scan_time() + scan_time + 1.1;
   if (head->force_index)
     scan_time= read_time= DBL_MAX;
   if (limit < records)
@@ -2324,7 +2324,7 @@ int SQL_SELECT::test_quick_select(THD *t
       double key_read_time= 
         param.table->file->index_only_read_time(key_for_use, 
                                                 rows2double(records)) +
-        (double) records / TIME_FOR_COMPARE;
+        records * ROW_EVALUATE_COST;
       DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                            "read time %g", key_for_use, key_read_time));
       if (key_read_time < read_time)
@@ -3877,7 +3877,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick
       Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
       is done in QUICK_RANGE_SELECT::row_in_ranges)
      */
-    imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
+    imerge_cost += non_cpk_scan_records * ROWID_COMPARE_COST;
   }
 
   /* Calculate cost(rowid_to_row_scan) */
@@ -3966,7 +3966,7 @@ skip_to_ror_scan:
       cost= param->table->file->
               read_time(param->real_keynr[(*cur_child)->key_idx], 1,
                         (*cur_child)->records) +
-              rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
+              rows2double((*cur_child)->records) * ROW_EVALUATE_COST;
     }
     else
       cost= read_time;
@@ -4013,8 +4013,8 @@ skip_to_ror_scan:
     get_sweep_read_cost(param->table, roru_total_records, is_interrupted,
                         &sweep_cost);
     roru_total_cost= roru_index_costs +
-                     rows2double(roru_total_records)*log((double)n_child_scans) /
-                     (TIME_FOR_COMPARE_ROWID * M_LN2) +
+                     rows2double(roru_total_records) *
+                     log((double)n_child_scans) * ROWID_COMPARE_COST / M_LN2 +
                      sweep_cost.total_cost();
   }
 
@@ -4469,11 +4469,11 @@ static bool ror_intersect_add(ROR_INTERS
   {
     /*
       CPK scan is used to filter out rows. We apply filtering for 
-      each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
+      each record of every scan. Assuming ROWID_COMPARE_COST
       per check this gives us:
     */
-    info->index_scan_costs += rows2double(info->index_records) / 
-                              TIME_FOR_COMPARE_ROWID;
+    info->index_scan_costs += rows2double(info->index_records) * 
+                              ROWID_COMPARE_COST;
   }
   else
   {
@@ -4856,9 +4856,9 @@ TRP_ROR_INTERSECT *get_best_covering_ror
                                            tree->ror_scans, ror_scan_mark););
 
   /* Add priority queue use cost. */
-  total_cost += rows2double(records)*
-                log((double)(ror_scan_mark - tree->ror_scans)) /
-                (TIME_FOR_COMPARE_ROWID * M_LN2);
+  total_cost += rows2double(records) *
+                log((double)(ror_scan_mark - tree->ror_scans)) *
+                ROWID_COMPARE_COST / M_LN2;
   DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));
 
   if (total_cost > read_time)
@@ -10529,7 +10529,7 @@ void cost_group_min_max(TABLE* table, KE
     no CPU cost. We leave it here to make this cost comparable to that of index
     scan as computed in SQL_SELECT::test_quick_select().
   */
-  cpu_cost= (double) num_groups / TIME_FOR_COMPARE;
+  cpu_cost= num_groups * ROW_EVALUATE_COST;
 
   *read_cost= io_cost + cpu_cost;
   *records= num_groups;

=== modified file 'sql/sql_const.h'
--- a/sql/sql_const.h	2010-12-17 09:41:21 +0000
+++ b/sql/sql_const.h	2011-05-02 13:22:25 +0000
@@ -158,16 +158,15 @@
 
 /**
   The following is used to decide if MySQL should use table scanning
-  instead of reading with keys.  The number says how many evaluation of the
-  WHERE clause is comparable to reading one extra row from a table.
+  instead of reading with keys.  The number says how costly evaluation of the
+  filter condition for a row is compared to reading one extra row from a table.
 */
-#define TIME_FOR_COMPARE   5.0	// 5 compares == one read
+#define ROW_EVALUATE_COST  0.20
 
 /**
-  Number of comparisons of table rowids equivalent to reading one row from a 
-  table.
+  Cost of comparing a rowid compared to reading one row from a table.
 */
-#define TIME_FOR_COMPARE_ROWID  (TIME_FOR_COMPARE*2.0)
+#define ROWID_COMPARE_COST 0.10  // Half the cost of a general row comparison
 
 /*
   For sequential disk seeks the cost formula is:

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2011-04-28 11:55:16 +0000
+++ b/sql/sql_select.cc	2011-05-02 13:22:25 +0000
@@ -7452,12 +7452,12 @@ best_access_path(JOIN      *join,
         loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp);
 
       } /* not ft_key */
-      if (tmp < best_time - records/(double) TIME_FOR_COMPARE ||
+      if (tmp < best_time - records * ROW_EVALUATE_COST ||
           (quick_matches_more_parts && 
            quick_records < best_quick_records))
       {
         best_quick_records = quick_records;
-        best_time= tmp + records/(double) TIME_FOR_COMPARE;
+        best_time= tmp + records * ROW_EVALUATE_COST;
         best= tmp;
         best_records= records;
         best_key= start_key;
@@ -7542,7 +7542,7 @@ best_access_path(JOIN      *join,
       */
       tmp= record_count *
         (s->quick->read_time +
-         (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
+         (s->found_records - rnd_records) * ROW_EVALUATE_COST);
 
       loose_scan_opt.check_range_access(join, idx, s->quick);
     }
@@ -7562,8 +7562,7 @@ best_access_path(JOIN      *join,
           - skip rows which does not satisfy join condition
         */
         tmp= record_count *
-          (tmp +
-           (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
+             (tmp + (s->records - rnd_records) * ROW_EVALUATE_COST);
       }
       else
       {
@@ -7582,18 +7581,18 @@ best_access_path(JOIN      *join,
            we read the table (see flush_cached_records for details). Here we
            take into account cost to read and skip these records.
         */
-        tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
+        tmp+= (s->records - rnd_records) * ROW_EVALUATE_COST;
       }
     }
 
     /*
       We estimate the cost of evaluating WHERE clause for found records
-      as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
+      as record_count * rnd_records * ROW_EVALUATE_COST. This cost plus
       tmp give us total cost of using TABLE SCAN
     */
     if (best == DBL_MAX ||
-        (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
-         best + record_count/(double) TIME_FOR_COMPARE*records))
+        (tmp  + (record_count * ROW_EVALUATE_COST * rnd_records) <
+         best + (record_count * ROW_EVALUATE_COST * records)))
     {
       /*
         If the table has a range (s->quick is set) make_join_select()
@@ -7914,7 +7913,7 @@ void Optimize_table_order::optimize_stra
     /* compute the cost of the new plan extended with 's' */
     record_count*= join->positions[idx].records_read;
     read_time+=    join->positions[idx].read_time
-                   + record_count / (double) TIME_FOR_COMPARE;
+                   + record_count * ROW_EVALUATE_COST;
     advance_sj_state(join_tables, s, idx, &record_count, &read_time,
                      &loose_scan_pos);
 
@@ -8168,7 +8167,7 @@ bool Optimize_table_order::greedy_search
     /* compute the cost of the new plan extended with 'best_table' */
     record_count*= join->positions[idx].records_read;
     read_time+=    join->positions[idx].read_time
-                   + record_count / (double) TIME_FOR_COMPARE;
+                   + record_count * ROW_EVALUATE_COST;
 
     remaining_tables&= ~(best_table->table->map);
     --size_remain;
@@ -8214,7 +8213,7 @@ void get_partial_join_cost(JOIN *join, u
     {
       record_count *= join->best_positions[i].records_read;
       read_time += join->best_positions[i].read_time
-                   + record_count / (double) TIME_FOR_COMPARE;
+                   + record_count * ROW_EVALUATE_COST;
     }
   }
   *read_time_arg= read_time;
@@ -8393,7 +8392,7 @@ bool Optimize_table_order::best_extensio
       current_record_count= record_count * position->records_read;
       current_read_time=    read_time
                             + position->read_time
-                            + current_record_count / (double) TIME_FOR_COMPARE;
+                            + current_record_count * ROW_EVALUATE_COST;
 
       if (has_sj)
       {

=== modified file 'sql/uniques.cc'
--- a/sql/uniques.cc	2011-03-09 20:54:55 +0000
+++ b/sql/uniques.cc	2011-05-02 13:22:25 +0000
@@ -122,7 +122,7 @@ inline double log2_n_fact(double x)
     the same length, so each of total_buf_size elements will be added to a sort
     heap with (n_buffers-1) elements. This gives the comparison cost:
 
-      total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
+      total_buf_elems * log2(n_buffers) * ROWID_COMPARE_COST;
 */
 
 static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
@@ -137,7 +137,7 @@ static double get_merge_buffers_cost(uin
 
   /* Using log2(n)=log(n)/log(2) formula */
   return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
-     total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
+     total_buf_elems*log((double) n_buffers) * ROWID_COMPARE_COST / M_LN2;
 }
 
 
@@ -267,7 +267,6 @@ double Unique::get_use_cost(uint *buffer
   ulong max_elements_in_tree;
   ulong last_tree_elems;
   int   n_full_trees; /* number of trees in unique - 1 */
-  double result;
 
   max_elements_in_tree= ((ulong) max_in_memory_size /
                          ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
@@ -276,10 +275,10 @@ double Unique::get_use_cost(uint *buffer
   last_tree_elems= nkeys % max_elements_in_tree;
 
   /* Calculate cost of creating trees */
-  result= 2*log2_n_fact(last_tree_elems + 1.0);
+  double result= 2 * log2_n_fact(last_tree_elems + 1.0);
   if (n_full_trees)
     result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
-  result /= TIME_FOR_COMPARE_ROWID;
+  result*= ROWID_COMPARE_COST;
 
   DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
                      n_full_trees, n_full_trees?max_elements_in_tree:0,

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk branch (roy.lyseng:3366 to 3367) Bug#12407753Roy Lyseng2 May