List:Commits« Previous MessageNext Message »
From:Anurag Shekhar Date:September 10 2009 7:48am
Subject:bzr commit into mysql-5.1-bugteam branch (anurag.shekhar:3111)
Bug#45840
View as plain text  
#At file:///home/anurag/mysqlsrc/mysql-5.1-bugteam-45840/ based on revid:joro@stripped

 3111 Anurag Shekhar	2009-09-10
      Bug #45840 read_buffer_size allocated for each partition when 
                   "insert into.. select * from"
        
      When inserting into a partitioned table using 'insert into <target> 
      select * from <src>', read_buffer_size bytes of memory are allocated 
      for each partition in the target table.
      
      This resulted in large memory consumption when the number of partitions 
      are high.
      
      This patch introduces a new method which tries to estimate the buffer 
      size required for each partition and limits the maximum buffer size 
      used to maximum of 10 * read_buffer_size.
      
      Additionally fix for bug#35845 is also back ported which checks for the 
      partitions actually expected to be effected by the insert resulting in 
      lower uses of resources.
     @ sql/ha_partition.cc
        Back ported patch for bug#35845.
        Introduced a method ha_partition::estimate_read_buffer_size
        to estimate buffer size required for each partition. 
        Method ha_partition::start_part_bulk_insert updated
        to update the read_buffer_size before calling bulk upload
        in storage engines.
     @ sql/ha_partition.h
        Back ported patch for bug#35845.
        Introduced a method ha_partition::estimate_read_buffer_size.

    modified:
      sql/ha_partition.cc
      sql/ha_partition.h
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2009-09-07 10:22:57 +0000
+++ b/sql/ha_partition.cc	2009-09-10 07:48:47 +0000
@@ -239,6 +239,7 @@ void ha_partition::init_handler_variable
   m_curr_key_info[0]= NULL;
   m_curr_key_info[1]= NULL;
   is_clone= FALSE,
+  m_part_func_monotonicity_info= NON_MONOTONIC;
   auto_increment_lock= FALSE;
   auto_increment_safe_stmt_log_lock= FALSE;
   /*
@@ -2464,12 +2465,18 @@ int ha_partition::open(const char *name,
       m_start_key.key= (const uchar*)ptr;
     }
   }
-
+  
+  /* Initialize the bitmap we use to minimize ha_start_bulk_insert calls */
+  if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
+    DBUG_RETURN(1);
   /* Initialize the bitmap we use to determine what partitions are used */
   if (!is_clone)
   {
     if (bitmap_init(&(m_part_info->used_partitions), NULL, m_tot_parts, TRUE))
+    {
+      bitmap_free(&m_bulk_insert_started);
       DBUG_RETURN(1);
+    }
     bitmap_set_all(&(m_part_info->used_partitions));
   }
 
@@ -2553,12 +2560,18 @@ int ha_partition::open(const char *name,
     calling open on all individual handlers.
   */
   m_handler_status= handler_opened;
+  if (m_part_info->part_expr)
+    m_part_func_monotonicity_info=
+                            m_part_info->part_expr->get_monotonicity_info();
+  else if (m_part_info->list_of_part_fields)
+    m_part_func_monotonicity_info= MONOTONIC_STRICT_INCREASING;
   info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
   DBUG_RETURN(0);
 
 err_handler:
   while (file-- != m_file)
     (*file)->close();
+  bitmap_free(&m_bulk_insert_started);
   if (!is_clone)
     bitmap_free(&(m_part_info->used_partitions));
 
@@ -2606,6 +2619,7 @@ int ha_partition::close(void)
 
   DBUG_ASSERT(table->s == table_share);
   delete_queue(&m_queue);
+  bitmap_free(&m_bulk_insert_started);
   if (!is_clone)
     bitmap_free(&(m_part_info->used_partitions));
   file= m_file;
@@ -3021,7 +3035,8 @@ int ha_partition::write_row(uchar * buf)
     goto exit;
   }
   m_last_part= part_id;
-  DBUG_PRINT("info", ("Insert in partition %d", part_id));
+  start_part_bulk_insert(part_id);
+
   tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
   error= m_file[part_id]->ha_write_row(buf);
   if (have_auto_increment && !table->s->next_number_keypart)
@@ -3084,6 +3099,7 @@ int ha_partition::update_row(const uchar
   }
 
   m_last_part= new_part_id;
+  start_part_bulk_insert(new_part_id);
   if (new_part_id == old_part_id)
   {
     DBUG_PRINT("info", ("Update in partition %d", new_part_id));
@@ -3248,23 +3264,100 @@ int ha_partition::delete_all_rows()
   DESCRIPTION
     rows == 0 means we will probably insert many rows
 */
-
 void ha_partition::start_bulk_insert(ha_rows rows)
 {
-  handler **file;
   DBUG_ENTER("ha_partition::start_bulk_insert");
 
-  rows= rows ? rows/m_tot_parts + 1 : 0;
-  file= m_file;
-  do
-  {
-    (*file)->ha_start_bulk_insert(rows);
-  } while (*(++file));
+  m_bulk_inserted_rows= 0;
+  bitmap_clear_all(&m_bulk_insert_started);
+  /* use the last bit for marking if bulk_insert_started was called */
+  bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
   DBUG_VOID_RETURN;
 }
 
 
 /*
+  Check if start_bulk_insert has been called for this partition,
+  if not, call it and mark it called
+*/
+void ha_partition::start_part_bulk_insert(uint part_id)
+{
+  long old_buffer_size;
+  THD *thd;
+  if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
+      bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
+  {
+    thd = ha_thd ();
+    old_buffer_size= thd->variables.read_buff_size;
+    /* Update read_buffer_size for this partition */
+    thd->variables.read_buff_size= estimate_read_buffer_size(old_buffer_size);
+    m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+    bitmap_set_bit(&m_bulk_insert_started, part_id);
+    thd->variables.read_buff_size= old_buffer_size;
+  }
+  m_bulk_inserted_rows++;
+}
+
+/*
+  Estimate the read buffer size for each partition.
+*/
+long ha_partition::estimate_read_buffer_size(long original_size)
+{
+  /*
+    If number of rows to insert is less than 10, but not 0,
+    retian original buffer size.
+  */
+  if (estimation_rows_to_insert && (estimation_rows_to_insert < 10))
+    return (original_size);
+  /*
+    If first insert/partition and monotonic partition function,
+    allow using buffer size originially set.
+   */
+  if (!m_bulk_inserted_rows &&
+      m_part_func_monotonicity_info != NON_MONOTONIC &&
+      m_tot_parts > 1)
+    return original_size;
+  /*
+    Allow total buffer used in all partition to go upto 10*read_buffer_size.
+  */
+
+  if (m_tot_parts < 10)
+      return original_size;
+  return (original_size * 10 / m_tot_parts);
+}
+
+/*
+  Try to predict the number of inserts into this partition.
+
+  If less than 10 rows (including 0 which means Unknown)
+    just give that as a guess
+  If monotonic partitioning function was used
+    guess that 50 % of the inserts goes to the first partition
+  For all other cases, guess on equal distribution between the partitions
+*/ 
+ha_rows ha_partition::guess_bulk_insert_rows()
+{
+  DBUG_ENTER("guess_bulk_insert_rows");
+
+  if (estimation_rows_to_insert < 10)
+    DBUG_RETURN(estimation_rows_to_insert);
+
+  /* If first insert/partition and monotonic partition function, guess 50%.  */
+  if (!m_bulk_inserted_rows && 
+      m_part_func_monotonicity_info != NON_MONOTONIC &&
+      m_tot_parts > 1)
+    DBUG_RETURN(estimation_rows_to_insert / 2);
+
+  /* Else guess on equal distribution (+1 is to avoid returning 0/Unknown) */
+  if (m_bulk_inserted_rows < estimation_rows_to_insert)
+    DBUG_RETURN(((estimation_rows_to_insert - m_bulk_inserted_rows)
+                / m_tot_parts) + 1);
+  /* The estimation was wrong, must say 'Unknown' */
+  DBUG_RETURN(0);
+}
+
+
+/*
   Finish a large batch of insert rows
 
   SYNOPSIS
@@ -3278,16 +3371,18 @@ void ha_partition::start_bulk_insert(ha_
 int ha_partition::end_bulk_insert()
 {
   int error= 0;
-  handler **file;
+  uint i;
   DBUG_ENTER("ha_partition::end_bulk_insert");
 
-  file= m_file;
-  do
+  DBUG_ASSERT(bitmap_is_set(&m_bulk_insert_started, m_tot_parts));
+  for (i= 0; i < m_tot_parts; i++)
   {
     int tmp;
-    if ((tmp= (*file)->ha_end_bulk_insert()))
+    if (bitmap_is_set(&m_bulk_insert_started, i) &&
+        (tmp= m_file[i]->ha_end_bulk_insert()))
       error= tmp;
-  } while (*(++file));
+  }
+  bitmap_clear_all(&m_bulk_insert_started);
   DBUG_RETURN(error);
 }
 

=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h	2009-09-04 03:57:11 +0000
+++ b/sql/ha_partition.h	2009-09-10 07:48:47 +0000
@@ -176,6 +176,11 @@ private:
     This to ensure it will work with statement based replication.
   */
   bool auto_increment_safe_stmt_log_lock;
+  /** For optimizing ha_start_bulk_insert calls */
+  MY_BITMAP m_bulk_insert_started;
+  ha_rows   m_bulk_inserted_rows;
+  /* used for prediction of start_bulk_insert rows */
+  enum_monotonicity_info m_part_func_monotonicity_info;
 public:
   handler *clone(MEM_ROOT *mem_root);
   virtual void set_part_info(partition_info *part_info)
@@ -353,7 +358,6 @@ public:
     Bulk inserts are supported if all underlying handlers support it.
     start_bulk_insert and end_bulk_insert is called before and after a
     number of calls to write_row.
-    Not yet though.
   */
   virtual int write_row(uchar * buf);
   virtual int update_row(const uchar * old_data, uchar * new_data);
@@ -361,6 +365,11 @@ public:
   virtual int delete_all_rows(void);
   virtual void start_bulk_insert(ha_rows rows);
   virtual int end_bulk_insert();
+private:
+  ha_rows guess_bulk_insert_rows();
+  long estimate_read_buffer_size(long original_size);
+  void start_part_bulk_insert(uint part_id);
+public:
 
   virtual bool is_fatal_error(int error, uint flags)
   {


Attachment: [text/bzr-bundle] bzr/anurag.shekhar@sun.com-20090910074847-bpsnhpg1cpyc5jtz.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (anurag.shekhar:3111)Bug#45840Anurag Shekhar10 Sep