List:Commits« Previous MessageNext Message »
From:Mattias Jonsson Date:September 5 2008 11:20am
Subject:bzr commit into mysql-6.0 branch (mattias.jonsson:2697) Bug#35845
View as plain text  
#At file:///Users/mattiasj/clones/bzrroot/b35845-60-bugteam/

 2697 Mattias Jonsson	2008-09-05
      Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
      
      Problem is that when insert (ha_start_bulk_insert) in i partitioned table,
      it will call ha_start_bulk_insert for every partition, used or not.
      
      Solution is to delay the call to the partitions ha_start_bulk_insert until
      the first row is to be inserted into that partition
modified:
  sql/ha_partition.cc
  sql/ha_partition.h

per-file messages:
  sql/ha_partition.cc
    Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
    
    Using a bitmap for keeping record of which partitions for which
    ha_start_bulk_insert has been called, and check against that if one
    should call it before continue with the insert/update, or if it has already
    been called.
    
    This way it will only call ha_start_bulk_insert for the used partitions.
    
    There is also a little prediction on how many rows that will be inserted into
    the current partition, depending of HASH type partitioning and how
    the distribution of the previous records has been.
  sql/ha_partition.h
    Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
    
    Added help variables and function for delaying ha_bulk_insert until it has
    to be called.
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2008-06-18 04:01:25 +0000
+++ b/sql/ha_partition.cc	2008-09-05 11:20:12 +0000
@@ -2351,11 +2351,17 @@ int ha_partition::open(const char *name,
     }
   }
 
+  /* Initialise the bitmap we use to minimize ha_start_bulk_insert calls */
+  if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
+    DBUG_RETURN(1);
   /* Initialise the bitmap we use to determine what partitions are used */
   if (!is_clone)
   {
     if (bitmap_init(&(m_part_info->used_partitions), NULL, m_tot_parts, TRUE))
+    {
+      bitmap_free(&m_bulk_insert_started);
       DBUG_RETURN(1);
+    }
     bitmap_set_all(&(m_part_info->used_partitions));
   }
 
@@ -2429,6 +2435,7 @@ int ha_partition::open(const char *name,
 err_handler:
   while (file-- != m_file)
     (*file)->close();
+  bitmap_free(&m_bulk_insert_started);
 
   DBUG_RETURN(error);
 }
@@ -2474,6 +2481,7 @@ int ha_partition::close(void)
 
   DBUG_ASSERT(table->s == table_share);
   delete_queue(&m_queue);
+  bitmap_free(&m_bulk_insert_started);
   if (!is_clone)
     bitmap_free(&(m_part_info->used_partitions));
   file= m_file;
@@ -2822,6 +2830,14 @@ int ha_partition::write_row(uchar * buf)
   }
   m_last_part= part_id;
   DBUG_PRINT("info", ("Insert in partition %d", part_id));
+  if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
+      bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
+  {
+    m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+    bitmap_set_bit(&m_bulk_insert_started, part_id);
+  }
+  m_bulk_inserted_rows++;
+
   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
   error= m_file[part_id]->ha_write_row(buf);
   if (have_auto_increment && !table->s->next_number_keypart)
@@ -2892,6 +2908,13 @@ int ha_partition::update_row(const uchar
   }
 
   m_last_part= new_part_id;
+  if (bitmap_is_set(&m_bulk_insert_started, m_tot_parts) &&
+      !bitmap_is_set(&m_bulk_insert_started, new_part_id))
+  {
+    m_file[new_part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+    bitmap_set_bit(&m_bulk_insert_started, new_part_id);
+  }
+  m_bulk_inserted_rows++;
   if (new_part_id == old_part_id)
   {
     DBUG_PRINT("info", ("Update in partition %d", new_part_id));
@@ -3044,23 +3067,58 @@ int ha_partition::delete_all_rows()
   DESCRIPTION
     rows == 0 means we will probably insert many rows
 */
-
 void ha_partition::start_bulk_insert(ha_rows rows)
 {
-  handler **file;
   DBUG_ENTER("ha_partition::start_bulk_insert");
 
-  rows= rows ? rows/m_tot_parts + 1 : 0;
-  file= m_file;
-  do
-  {
-    (*file)->ha_start_bulk_insert(rows);
-  } while (*(++file));
+  m_bulk_total_rows= rows;
+  m_bulk_inserted_rows= 0;
+  bitmap_clear_all(&m_bulk_insert_started);
+  /* use the last bit for marking if bulk_insert_started was called */
+  bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
   DBUG_VOID_RETURN;
 }
 
 
 /*
+  Try to predict the number of inserts into this partition.
+
+  If hash or key partitioning and the number of inserts so far is
+  less than twice the expected average number of rows per partition so far:
+    guess that it is evently distributed over all partitions
+  otherwise
+    guess that the number of rows decrease for each newly used partition
+    (i.e. guess is 50% for the first used partition, 33 % of the remaining
+     inserts to the second used partition, 25% of the remains to the
+     third used and so on.)
+*/ 
+ha_rows ha_partition::guess_bulk_insert_rows()
+{
+  uint call_num;
+  ha_rows average_rows_per_partition;
+  DBUG_ENTER("guess_bulk_insert_rows");
+  if (m_bulk_total_rows == 0)
+    DBUG_RETURN(0);
+  average_rows_per_partition= (m_bulk_total_rows / m_tot_parts);
+  if (!average_rows_per_partition)
+    average_rows_per_partition= 1;
+  call_num= bitmap_bits_set(&m_bulk_insert_started);
+  if (m_part_info->part_type != HASH_PARTITION ||
+      (m_bulk_inserted_rows) > ((2 * call_num) / average_rows_per_partition))
+  {
+    average_rows_per_partition= (m_bulk_total_rows - m_bulk_inserted_rows) /
+                                (call_num + 1);
+  }
+  if (!average_rows_per_partition)
+    average_rows_per_partition= 1;
+  DBUG_PRINT("info", ("tot %lu done %lu call_num %u guess %lu",
+                      m_bulk_total_rows, m_bulk_inserted_rows, call_num,
+                      average_rows_per_partition));
+  DBUG_RETURN(average_rows_per_partition);
+}
+
+
+/*
   Finish a large batch of insert rows
 
   SYNOPSIS
@@ -3074,16 +3132,18 @@ void ha_partition::start_bulk_insert(ha_
 int ha_partition::end_bulk_insert()
 {
   int error= 0;
-  handler **file;
+  uint i;
   DBUG_ENTER("ha_partition::end_bulk_insert");
 
-  file= m_file;
-  do
+  DBUG_ASSERT(bitmap_is_set(&m_bulk_insert_started, m_tot_parts));
+  for (i= 0; i < m_tot_parts; i++)
   {
     int tmp;
-    if ((tmp= (*file)->ha_end_bulk_insert()))
+    if (bitmap_is_set(&m_bulk_insert_started, i) &&
+        (tmp= m_file[i]->ha_end_bulk_insert()))
       error= tmp;
-  } while (*(++file));
+  }
+  bitmap_clear_all(&m_bulk_insert_started);
   DBUG_RETURN(error);
 }
 

=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h	2008-05-08 13:01:30 +0000
+++ b/sql/ha_partition.h	2008-09-05 11:20:12 +0000
@@ -155,6 +155,10 @@ private:
     This to ensure it will work with statement based replication.
   */
   bool auto_increment_safe_stmt_log_lock;
+  /** For optimizing ha_start_bulk_insert calls */
+  MY_BITMAP m_bulk_insert_started;
+  ha_rows   m_bulk_total_rows;
+  ha_rows   m_bulk_inserted_rows;
 public:
   handler *clone(MEM_ROOT *mem_root);
   virtual void set_part_info(partition_info *part_info, bool early)
@@ -322,7 +326,6 @@ public:
     Bulk inserts are supported if all underlying handlers support it.
     start_bulk_insert and end_bulk_insert is called before and after a
     number of calls to write_row.
-    Not yet though.
   */
   virtual int write_row(uchar * buf);
   virtual int update_row(const uchar * old_data, uchar * new_data);
@@ -330,6 +333,9 @@ public:
   virtual int delete_all_rows(void);
   virtual void start_bulk_insert(ha_rows rows);
   virtual int end_bulk_insert();
+private:
+  ha_rows guess_bulk_insert_rows();
+public:
 
   virtual bool is_fatal_error(int error, uint flags)
   {

Thread
bzr commit into mysql-6.0 branch (mattias.jonsson:2697) Bug#35845Mattias Jonsson5 Sep