List:Commits« Previous MessageNext Message »
From:Mattias Jonsson Date:September 4 2009 1:02pm
Subject:bzr commit into mysql-5.1-bugteam branch (mattias.jonsson:3112)
Bug#35845
View as plain text  
#At file:///Users/mattiasj/clones/bzrroot/b35845-51-bugteam_backport/ based on revid:sergey.glukhov@stripped

 3112 Mattias Jonsson	2009-09-04
      Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
      
      (Backport)
      
      Problem is that when insert (ha_start_bulk_insert) in i partitioned table,
      it will call ha_start_bulk_insert for every partition, used or not.
      
      Solution is to delay the call to the partitions ha_start_bulk_insert until
      the first row is to be inserted into that partition
     @ sql/ha_partition.cc
        Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
        
        Using a bitmap for keeping record of which partitions for which
        ha_start_bulk_insert has been called, and check against that if one
        should call it before continue with the insert/update, or if it has already
        been called.
        
        This way it will only call ha_start_bulk_insert for the used partitions.
        There is also a little prediction on how many rows that will be inserted into
        the current partition, it will guess on equal distribution of the records
        across all partitions, accept for the first used partition, which will guess
        at 50% of the given estimate, if it is a monotonic partitioning function.
     @ sql/ha_partition.h
        Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
        
        Added help variables and function for delaying ha_bulk_insert until it has
        to be called.
        
        Fixed a comment.

    modified:
      sql/ha_partition.cc
      sql/ha_partition.h
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2009-09-04 03:57:11 +0000
+++ b/sql/ha_partition.cc	2009-09-04 13:02:15 +0000
@@ -239,6 +239,7 @@ void ha_partition::init_handler_variable
   m_curr_key_info[0]= NULL;
   m_curr_key_info[1]= NULL;
   is_clone= FALSE,
+  m_part_func_monotonicity_info= NON_MONOTONIC;
   auto_increment_lock= FALSE;
   auto_increment_safe_stmt_log_lock= FALSE;
   /*
@@ -2464,11 +2465,18 @@ int ha_partition::open(const char *name,
     }
   }
 
+  /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
+  if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
+    DBUG_RETURN(1);
+  bitmap_clear_all(&m_bulk_insert_started);
   /* Initialize the bitmap we use to determine what partitions are used */
   if (!is_clone)
   {
     if (bitmap_init(&(m_part_info->used_partitions), NULL, m_tot_parts, TRUE))
+    {
+      bitmap_free(&m_bulk_insert_started);
       DBUG_RETURN(1);
+    }
     bitmap_set_all(&(m_part_info->used_partitions));
   }
 
@@ -2552,12 +2560,18 @@ int ha_partition::open(const char *name,
     calling open on all individual handlers.
   */
   m_handler_status= handler_opened;
+  if (m_part_info->part_expr)
+    m_part_func_monotonicity_info=
+                            m_part_info->part_expr->get_monotonicity_info();
+  else if (m_part_info->list_of_part_fields)
+    m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
   info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
   DBUG_RETURN(0);
 
 err_handler:
   while (file-- != m_file)
     (*file)->close();
+  bitmap_free(&m_bulk_insert_started);
   if (!is_clone)
     bitmap_free(&(m_part_info->used_partitions));
 
@@ -2605,6 +2619,7 @@ int ha_partition::close(void)
 
   DBUG_ASSERT(table->s == table_share);
   delete_queue(&m_queue);
+  bitmap_free(&m_bulk_insert_started);
   if (!is_clone)
     bitmap_free(&(m_part_info->used_partitions));
   file= m_file;
@@ -3021,6 +3036,8 @@ int ha_partition::write_row(uchar * buf)
   }
   m_last_part= part_id;
   DBUG_PRINT("info", ("Insert in partition %d", part_id));
+  start_part_bulk_insert(part_id);
+
   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
   error= m_file[part_id]->ha_write_row(buf);
   if (have_auto_increment && !table->s->next_number_keypart)
@@ -3083,6 +3100,7 @@ int ha_partition::update_row(const uchar
   }
 
   m_last_part= new_part_id;
+  start_part_bulk_insert(new_part_id);
   if (new_part_id == old_part_id)
   {
     DBUG_PRINT("info", ("Update in partition %d", new_part_id));
@@ -3247,23 +3265,66 @@ int ha_partition::delete_all_rows()
   DESCRIPTION
     rows == 0 means we will probably insert many rows
 */
-
 void ha_partition::start_bulk_insert(ha_rows rows)
 {
-  handler **file;
   DBUG_ENTER("ha_partition::start_bulk_insert");
 
-  rows= rows ? rows/m_tot_parts + 1 : 0;
-  file= m_file;
-  do
-  {
-    (*file)->ha_start_bulk_insert(rows);
-  } while (*(++file));
+  m_bulk_inserted_rows= 0;
+  bitmap_clear_all(&m_bulk_insert_started);
+  /* use the last bit for marking if bulk_insert_started was called */
+  bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
   DBUG_VOID_RETURN;
 }
 
 
 /*
+  Check if start_bulk_insert has been called for this partition,
+  if not, call it and mark it called
+*/
+void ha_partition::start_part_bulk_insert(uint part_id)
+{
+  if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
+      bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
+  {
+    m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+    bitmap_set_bit(&m_bulk_insert_started, part_id);
+  }
+  m_bulk_inserted_rows++;
+}
+
+
+/*
+  Try to predict the number of inserts into this partition.
+
+  If less than 10 rows (including 0 which means Unknown)
+    just give that as a guess
+  If monotonic partitioning function was used
+    guess that 50 % of the inserts goes to the first partition
+  For all other cases, guess on equal distribution between the partitions
+*/ 
+ha_rows ha_partition::guess_bulk_insert_rows()
+{
+  DBUG_ENTER("guess_bulk_insert_rows");
+
+  if (estimation_rows_to_insert < 10)
+    DBUG_RETURN(estimation_rows_to_insert);
+
+  /* If first insert/partition and monotonic partition function, guess 50%.  */
+  if (!m_bulk_inserted_rows && 
+      m_part_func_monotonicity_info != NON_MONOTONIC &&
+      m_tot_parts > 1)
+    DBUG_RETURN(estimation_rows_to_insert / 2);
+
+  /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
+  if (m_bulk_inserted_rows < estimation_rows_to_insert)
+    DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
+                / m_tot_parts) + 1);
+  /* The estimation was wrong, must say 'Unknown' */
+  DBUG_RETURN(0);
+}
+
+
+/*
   Finish a large batch of insert rows
 
   SYNOPSIS
@@ -3272,21 +3333,29 @@ void ha_partition::start_bulk_insert(ha_
   RETURN VALUE
     >0                      Error code
     0                       Success
+
+  Note: end_bulk_insert can be called without start_bulk_insert
+        being called, see bug¤44108.
+
 */
 
 int ha_partition::end_bulk_insert()
 {
   int error= 0;
-  handler **file;
+  uint i;
   DBUG_ENTER("ha_partition::end_bulk_insert");
 
-  file= m_file;
-  do
+  if (!bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
+    DBUG_RETURN(error);
+
+  for (i= 0; i < m_tot_parts; i++)
   {
     int tmp;
-    if ((tmp= (*file)->ha_end_bulk_insert()))
+    if (bitmap_is_set(&m_bulk_insert_started, i) &&
+        (tmp= m_file[i]->ha_end_bulk_insert()))
       error= tmp;
-  } while (*(++file));
+  }
+  bitmap_clear_all(&m_bulk_insert_started);
   DBUG_RETURN(error);
 }
 

=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h	2009-09-04 03:57:11 +0000
+++ b/sql/ha_partition.h	2009-09-04 13:02:15 +0000
@@ -176,6 +176,11 @@ private:
     This to ensure it will work with statement based replication.
   */
   bool auto_increment_safe_stmt_log_lock;
+  /** For optimizing ha_start_bulk_insert calls */
+  MY_BITMAP m_bulk_insert_started;
+  ha_rows   m_bulk_inserted_rows;
+  /** used for prediction of start_bulk_insert rows */
+  enum_monotonicity_info m_part_func_monotonicity_info;
 public:
   handler *clone(MEM_ROOT *mem_root);
   virtual void set_part_info(partition_info *part_info)
@@ -353,7 +358,6 @@ public:
     Bulk inserts are supported if all underlying handlers support it.
     start_bulk_insert and end_bulk_insert is called before and after a
     number of calls to write_row.
-    Not yet though.
   */
   virtual int write_row(uchar * buf);
   virtual int update_row(const uchar * old_data, uchar * new_data);
@@ -361,6 +365,10 @@ public:
   virtual int delete_all_rows(void);
   virtual void start_bulk_insert(ha_rows rows);
   virtual int end_bulk_insert();
+private:
+  ha_rows guess_bulk_insert_rows();
+  void start_part_bulk_insert(uint part_id);
+public:
 
   virtual bool is_fatal_error(int error, uint flags)
   {

Attachment: [text/bzr-bundle]
Thread
bzr commit into mysql-5.1-bugteam branch (mattias.jonsson:3112)Bug#35845Mattias Jonsson4 Sep