List:Commits« Previous MessageNext Message »
From:Mattias Jonsson Date:November 12 2009 11:24am
Subject:bzr commit into mysql-5.1-bugteam branch (mattias.jonsson:3161)
Bug#47261
View as plain text  
#At file:///Users/mattiasj/clones/bzrroot/b47261-51-bugteam/ based on revid:mattias.jonsson@stripped

 3161 Mattias Jonsson	2009-11-12
      bug#47261 bad performance in 5.1 for heavily partitioned tables
      
      The optimizer calls on a partitioned table was called
      for each and every partition. This patch makes the call
      only to a subset of the partitions instead, which improve
      the performance for tables with many partitions.
     @ sql/ha_partition.cc
        Bug#47261: Bad performance in 5.1 for heavily partitioned tables
        
        Only use up to 1/3 of the partitions (or max 10) for estimating
        the number of rows in range, scan time and rows upper bound.
        
        NOTE: If no rows or time is found, it will continue untill it finds any rows, since zero rows really means it does not exists
        any matching rows.
     @ sql/ha_partition.h
        Bug#47261: Bad performance in 5.1 for heavily partitioned tables
        
        Added helper function declaration to avoid code duplication.

    modified:
      sql/ha_partition.cc
      sql/ha_partition.h
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2009-10-13 14:59:43 +0000
+++ b/sql/ha_partition.cc	2009-11-12 11:24:41 +0000
@@ -5770,6 +5770,23 @@ const key_map *ha_partition::keys_to_use
   DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
 }
 
+#define MAX_PARTS_FOR_OPTIMIZER_CALLS 10
+/*
+  Prepare start variables for estimating optimizer costs.
+
+  @param[out] num_used_parts  Number of partitions after pruning.
+  @param[out] check_min_num   Number of partitions to call.
+  @param[out] first           first used partition.
+*/
+void ha_partition::partitions_optimizer_call_preparations(uint *first,
+                                                          uint *num_used_parts,
+                                                          uint *check_min_num)
+{
+  *first= bitmap_get_first_set(&(m_part_info->used_partitions));
+  *num_used_parts= bitmap_bits_set(&(m_part_info->used_partitions));
+  *check_min_num= min(MAX_PARTS_FOR_OPTIMIZER_CALLS, *num_used_parts);
+}
+
 
 /*
   Return time for a scan of the table
@@ -5783,43 +5800,67 @@ const key_map *ha_partition::keys_to_use
 
 double ha_partition::scan_time()
 {
-  double scan_time= 0;
-  handler **file;
+  double scan_time= 0.0;
+  uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
   DBUG_ENTER("ha_partition::scan_time");
 
-  for (file= m_file; *file; file++)
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-      scan_time+= (*file)->scan_time();
+  partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
+  for (part_id= first; partitions_called < num_used_parts ; part_id++)
+  {
+    if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
+      continue;
+    scan_time+= m_file[part_id]->scan_time();
+    partitions_called++;
+    if (partitions_called >= check_min_num && scan_time != 0.0)
+    {
+      DBUG_RETURN(scan_time *
+                      (double) num_used_parts / (double) partitions_called);
+    }
+  }
   DBUG_RETURN(scan_time);
 }
 
 
 /*
-  Get time to read
-
-  SYNOPSIS
-    read_time()
-    index                Index number used
-    ranges               Number of ranges
-    rows                 Number of rows
+  Estimate rows for records_in_range or estimate_rows_upper_bound.
 
-  RETURN VALUE
-    time for read
+  @param is_records_in_range  call records_in_range instead of
+                              estimate_rows_upper_bound.
+  @param inx                  (only for records_in_range) index to use.
+  @param min_key              (only for records_in_range) start of range.
+  @param max_key              (only for records_in_range) end of range.
 
-  DESCRIPTION
-    This will be optimised later to include whether or not the index can
-    be used with partitioning. To achieve we need to add another parameter
-    that specifies how many of the index fields that are bound in the ranges.
-    Possibly added as a new call to handlers.
+  @return Number of rows or HA_POS_ERROR.
 */
-
-double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+ha_rows ha_partition::estimate_rows(bool is_records_in_range, uint inx,
+                                    key_range *min_key, key_range *max_key)
 {
-  DBUG_ENTER("ha_partition::read_time");
+  ha_rows rows, estimated_rows= 0;
+  uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
+  DBUG_ENTER("ha_partition::records_in_range");
 
-  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
+  partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
+  for (part_id= first; partitions_called < num_used_parts ; part_id++)
+  {
+    if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
+      continue;
+    if (is_records_in_range)
+      rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
+    else
+      rows= m_file[part_id]->estimate_rows_upper_bound();
+    if (rows == HA_POS_ERROR)
+      DBUG_RETURN(HA_POS_ERROR);
+    estimated_rows+= rows;
+    partitions_called++;
+    if (partitions_called >= check_min_num && estimated_rows)
+    {
+      DBUG_RETURN(estimated_rows * num_used_parts / partitions_called);
+    }
+  }
+  DBUG_RETURN(estimated_rows);
 }
 
+
 /*
   Find number of records in a range
 
@@ -5847,22 +5888,9 @@ double ha_partition::read_time(uint inde
 ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
 				       key_range *max_key)
 {
-  handler **file;
-  ha_rows in_range= 0;
   DBUG_ENTER("ha_partition::records_in_range");
 
-  file= m_file;
-  do
-  {
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-    {
-      ha_rows tmp_in_range= (*file)->records_in_range(inx, min_key, max_key);
-      if (tmp_in_range == HA_POS_ERROR)
-        DBUG_RETURN(tmp_in_range);
-      in_range+= tmp_in_range;
-    }
-  } while (*(++file));
-  DBUG_RETURN(in_range);
+  DBUG_RETURN(estimate_rows(TRUE, inx, min_key, max_key));
 }
 
 
@@ -5878,22 +5906,36 @@ ha_rows ha_partition::records_in_range(u
 
 ha_rows ha_partition::estimate_rows_upper_bound()
 {
-  ha_rows rows, tot_rows= 0;
-  handler **file;
   DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
 
-  file= m_file;
-  do
-  {
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-    {
-      rows= (*file)->estimate_rows_upper_bound();
-      if (rows == HA_POS_ERROR)
-        DBUG_RETURN(HA_POS_ERROR);
-      tot_rows+= rows;
-    }
-  } while (*(++file));
-  DBUG_RETURN(tot_rows);
+  DBUG_RETURN(estimate_rows(FALSE, 0, NULL, NULL));
+}
+
+
+/*
+  Get time to read
+
+  SYNOPSIS
+    read_time()
+    index                Index number used
+    ranges               Number of ranges
+    rows                 Number of rows
+
+  RETURN VALUE
+    time for read
+
+  DESCRIPTION
+    This will be optimised later to include whether or not the index can
+    be used with partitioning. To achieve we need to add another parameter
+    that specifies how many of the index fields that are bound in the ranges.
+    Possibly added as a new call to handlers.
+*/
+
+double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+{
+  DBUG_ENTER("ha_partition::read_time");
+
+  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
 }
 
 

=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h	2009-09-23 13:21:29 +0000
+++ b/sql/ha_partition.h	2009-11-12 11:24:41 +0000
@@ -547,6 +547,18 @@ public:
      -------------------------------------------------------------------------
   */
 
+private:
+  /*
+    Helper function to get the minimum number of partitions to use for
+    the optimizer hints/cost calls.
+  */
+  void partitions_optimizer_call_preparations(uint *num_used_parts,
+                                              uint *check_min_num,
+                                              uint *first);
+  ha_rows estimate_rows(bool is_records_in_range, uint inx,
+                        key_range *min_key, key_range *max_key);
+public:
+
   /*
     keys_to_use_for_scanning can probably be implemented as the
     intersection of all underlying handlers if mixed handlers are used.


Attachment: [text/bzr-bundle]
Thread
bzr commit into mysql-5.1-bugteam branch (mattias.jonsson:3161)Bug#47261Mattias Jonsson12 Nov