List:Commits« Previous MessageNext Message »
From:Mattias Jonsson Date:October 17 2008 6:30pm
Subject:bzr commit into mysql-6.0 branch (mattias.jonsson:2697) Bug#35845
View as plain text  
#At file:///Users/mattiasj/clones/bzrroot/b35845-60-bugteam/

 2697 Mattias Jonsson	2008-10-17
      Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
      
      Problem is that when insert (ha_start_bulk_insert) in i partitioned table,
      it will call ha_start_bulk_insert for every partition, used or not.
      
      Solution is to delay the call to the partitions ha_start_bulk_insert until
      the first row is to be inserted into that partition
modified:
  sql/ha_partition.cc
  sql/ha_partition.h

per-file messages:
  sql/ha_partition.cc
    Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
    
    Using a bitmap for keeping record of which partitions for which
    ha_start_bulk_insert has been called, and check against that if one
    should call it before continue with the insert/update, or if it has already
    been called.
    
    This way it will only call ha_start_bulk_insert for the used partitions.
    
    There is also a little prediction on how many rows that will be inserted into
    the current partition, it will guess on equal distribution of the records
    across all partitions, accept for the first used partition, which will guess
    at 50% of the given estimate, if it is a monotonic partitioning function.
    
    A little clean up in the initialization of ha_partition.
  sql/ha_partition.h
    Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
    
    Added help variables and function for delaying ha_bulk_insert until it has
    to be called.
    
    Fixed a comment.
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2008-06-18 04:01:25 +0000
+++ b/sql/ha_partition.cc	2008-10-17 18:29:48 +0000
@@ -160,8 +160,7 @@ const uint ha_partition::NO_CURRENT_PART
 
 ha_partition::ha_partition(handlerton *hton, TABLE_SHARE *share)
   :handler(hton, share), m_part_info(NULL), m_create_handler(FALSE),
-   m_is_sub_partitioned(0), is_clone(FALSE), auto_increment_lock(FALSE),
-   auto_increment_safe_stmt_log_lock(FALSE)
+   m_is_sub_partitioned(0)
 {
   DBUG_ENTER("ha_partition::ha_partition(table)");
   init_handler_variables();
@@ -183,8 +182,7 @@ ha_partition::ha_partition(handlerton *h
 ha_partition::ha_partition(handlerton *hton, partition_info *part_info)
   :handler(hton, NULL), m_part_info(part_info),
    m_create_handler(TRUE),
-   m_is_sub_partitioned(m_part_info->is_sub_partitioned()), is_clone(FALSE),
-   auto_increment_lock(FALSE), auto_increment_safe_stmt_log_lock(FALSE)
+   m_is_sub_partitioned(m_part_info->is_sub_partitioned())
 {
   DBUG_ENTER("ha_partition::ha_partition(part_info)");
   init_handler_variables();
@@ -240,6 +238,10 @@ void ha_partition::init_handler_variable
   m_last_part= 0;
   m_rec0= 0;
   m_curr_key_info= 0;
+  m_part_func_monotonicity_info= NON_MONOTONIC;
+  is_clone= FALSE;
+  auto_increment_lock= FALSE;
+  auto_increment_safe_stmt_log_lock= FALSE;
   /*
     this allows blackhole to work properly
   */
@@ -2351,11 +2353,17 @@ int ha_partition::open(const char *name,
     }
   }
 
+  /* Initialise the bitmap we use to minimize ha_start_bulk_insert calls */
+  if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
+    DBUG_RETURN(1);
   /* Initialise the bitmap we use to determine what partitions are used */
   if (!is_clone)
   {
     if (bitmap_init(&(m_part_info->used_partitions), NULL, m_tot_parts, TRUE))
+    {
+      bitmap_free(&m_bulk_insert_started);
       DBUG_RETURN(1);
+    }
     bitmap_set_all(&(m_part_info->used_partitions));
   }
 
@@ -2423,12 +2431,18 @@ int ha_partition::open(const char *name,
     to ensure we have correct statistics we call info from open after
     calling open on all individual handlers.
   */
+  if (m_part_info->part_expr)
+    m_part_func_monotonicity_info=
+                            m_part_info->part_expr->get_monotonicity_info();
+  else if (m_part_info->list_of_part_fields)
+    m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
   info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
   DBUG_RETURN(0);
 
 err_handler:
   while (file-- != m_file)
     (*file)->close();
+  bitmap_free(&m_bulk_insert_started);
 
   DBUG_RETURN(error);
 }
@@ -2474,6 +2488,7 @@ int ha_partition::close(void)
 
   DBUG_ASSERT(table->s == table_share);
   delete_queue(&m_queue);
+  bitmap_free(&m_bulk_insert_started);
   if (!is_clone)
     bitmap_free(&(m_part_info->used_partitions));
   file= m_file;
@@ -2821,7 +2836,8 @@ int ha_partition::write_row(uchar * buf)
     goto exit;
   }
   m_last_part= part_id;
-  DBUG_PRINT("info", ("Insert in partition %d", part_id));
+  start_part_bulk_insert(part_id);
+
   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
   error= m_file[part_id]->ha_write_row(buf);
   if (have_auto_increment && !table->s->next_number_keypart)
@@ -2892,6 +2908,7 @@ int ha_partition::update_row(const uchar
   }
 
   m_last_part= new_part_id;
+  start_part_bulk_insert(new_part_id);
   if (new_part_id == old_part_id)
   {
     DBUG_PRINT("info", ("Update in partition %d", new_part_id));
@@ -3044,23 +3061,66 @@ int ha_partition::delete_all_rows()
   DESCRIPTION
     rows == 0 means we will probably insert many rows
 */
-
 void ha_partition::start_bulk_insert(ha_rows rows)
 {
-  handler **file;
   DBUG_ENTER("ha_partition::start_bulk_insert");
 
-  rows= rows ? rows/m_tot_parts + 1 : 0;
-  file= m_file;
-  do
-  {
-    (*file)->ha_start_bulk_insert(rows);
-  } while (*(++file));
+  m_bulk_inserted_rows= 0;
+  bitmap_clear_all(&m_bulk_insert_started);
+  /* use the last bit for marking if bulk_insert_started was called */
+  bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
   DBUG_VOID_RETURN;
 }
 
 
 /*
+  Check if start_bulk_insert has been called for this partition,
+  if not, call it and mark it called
+*/
+void ha_partition::start_part_bulk_insert(uint part_id)
+{
+  if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
+      bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
+  {
+    m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+    bitmap_set_bit(&m_bulk_insert_started, part_id);
+  }
+  m_bulk_inserted_rows++;
+}
+
+
+/*
+  Try to predict the number of inserts into this partition.
+
+  If less than 10 rows (including 0 which means Unknown)
+    just give that as a guess
+  If monotonic partitioning function was used
+    guess that 50 % of the inserts goes to the first partition
+  For all other cases, guess on equal distribution between the partitions
+*/ 
+ha_rows ha_partition::guess_bulk_insert_rows()
+{
+  DBUG_ENTER("guess_bulk_insert_rows");
+
+  if (estimation_rows_to_insert < 10)
+    DBUG_RETURN(estimation_rows_to_insert);
+
+  /* If first insert/partition and monotonic partition function, guess 50%.  */
+  if (!m_bulk_inserted_rows && 
+      m_part_func_monotonicity_info != NON_MONOTONIC &&
+      m_tot_parts > 1)
+    DBUG_RETURN(estimation_rows_to_insert / 2);
+
+  /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
+  if (m_bulk_inserted_rows < estimation_rows_to_insert)
+    DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
+                / m_tot_parts) + 1);
+  /* The estimation was wrong, must say 'Unknown' */
+  DBUG_RETURN(0);
+}
+
+
+/*
   Finish a large batch of insert rows
 
   SYNOPSIS
@@ -3074,16 +3134,18 @@ void ha_partition::start_bulk_insert(ha_
 int ha_partition::end_bulk_insert()
 {
   int error= 0;
-  handler **file;
+  uint i;
   DBUG_ENTER("ha_partition::end_bulk_insert");
 
-  file= m_file;
-  do
+  DBUG_ASSERT(bitmap_is_set(&m_bulk_insert_started, m_tot_parts));
+  for (i= 0; i < m_tot_parts; i++)
   {
     int tmp;
-    if ((tmp= (*file)->ha_end_bulk_insert()))
+    if (bitmap_is_set(&m_bulk_insert_started, i) &&
+        (tmp= m_file[i]->ha_end_bulk_insert()))
       error= tmp;
-  } while (*(++file));
+  }
+  bitmap_clear_all(&m_bulk_insert_started);
   DBUG_RETURN(error);
 }
 

=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h	2008-05-08 13:01:30 +0000
+++ b/sql/ha_partition.h	2008-10-17 18:29:48 +0000
@@ -155,6 +155,11 @@ private:
     This to ensure it will work with statement based replication.
   */
   bool auto_increment_safe_stmt_log_lock;
+  /** For optimizing ha_start_bulk_insert calls */
+  MY_BITMAP m_bulk_insert_started;
+  ha_rows   m_bulk_inserted_rows;
+  /* used for prediction of start_bulk_insert rows */
+  enum_monotonicity_info m_part_func_monotonicity_info;
 public:
   handler *clone(MEM_ROOT *mem_root);
   virtual void set_part_info(partition_info *part_info, bool early)
@@ -322,7 +327,6 @@ public:
     Bulk inserts are supported if all underlying handlers support it.
     start_bulk_insert and end_bulk_insert is called before and after a
     number of calls to write_row.
-    Not yet though.
   */
   virtual int write_row(uchar * buf);
   virtual int update_row(const uchar * old_data, uchar * new_data);
@@ -330,6 +334,10 @@ public:
   virtual int delete_all_rows(void);
   virtual void start_bulk_insert(ha_rows rows);
   virtual int end_bulk_insert();
+private:
+  ha_rows guess_bulk_insert_rows();
+  void start_part_bulk_insert(uint part_id);
+public:
 
   virtual bool is_fatal_error(int error, uint flags)
   {

Thread
bzr commit into mysql-6.0 branch (mattias.jonsson:2697) Bug#35845Mattias Jonsson17 Oct