#At file:///Users/mattiasj/clones/bzrroot/b35845-60-bugteam/
2697 Mattias Jonsson 2008-09-05
Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
Problem is that when insert (ha_start_bulk_insert) in i partitioned table,
it will call ha_start_bulk_insert for every partition, used or not.
Solution is to delay the call to the partitions ha_start_bulk_insert until
the first row is to be inserted into that partition
modified:
sql/ha_partition.cc
sql/ha_partition.h
per-file messages:
sql/ha_partition.cc
Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
Using a bitmap for keeping record of which partitions for which
ha_start_bulk_insert has been called, and check against that if one
should call it before continue with the insert/update, or if it has already
been called.
This way it will only call ha_start_bulk_insert for the used partitions.
There is also a little prediction on how many rows that will be inserted into
the current partition, depending of HASH type partitioning and how
the distribution of the previous records has been.
sql/ha_partition.h
Bug#35845: unneccesary call to ha_start_bulk_insert for not used partitions
Added help variables and function for delaying ha_bulk_insert until it has
to be called.
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc 2008-06-18 04:01:25 +0000
+++ b/sql/ha_partition.cc 2008-09-05 11:20:12 +0000
@@ -2351,11 +2351,17 @@ int ha_partition::open(const char *name,
}
}
+ /* Initialise the bitmap we use to minimize ha_start_bulk_insert calls */
+ if (bitmap_init(&m_bulk_insert_started, NULL, m_tot_parts + 1, FALSE))
+ DBUG_RETURN(1);
/* Initialise the bitmap we use to determine what partitions are used */
if (!is_clone)
{
if (bitmap_init(&(m_part_info->used_partitions), NULL, m_tot_parts, TRUE))
+ {
+ bitmap_free(&m_bulk_insert_started);
DBUG_RETURN(1);
+ }
bitmap_set_all(&(m_part_info->used_partitions));
}
@@ -2429,6 +2435,7 @@ int ha_partition::open(const char *name,
err_handler:
while (file-- != m_file)
(*file)->close();
+ bitmap_free(&m_bulk_insert_started);
DBUG_RETURN(error);
}
@@ -2474,6 +2481,7 @@ int ha_partition::close(void)
DBUG_ASSERT(table->s == table_share);
delete_queue(&m_queue);
+ bitmap_free(&m_bulk_insert_started);
if (!is_clone)
bitmap_free(&(m_part_info->used_partitions));
file= m_file;
@@ -2822,6 +2830,14 @@ int ha_partition::write_row(uchar * buf)
}
m_last_part= part_id;
DBUG_PRINT("info", ("Insert in partition %d", part_id));
+ if (!bitmap_is_set(&m_bulk_insert_started, part_id) &&
+ bitmap_is_set(&m_bulk_insert_started, m_tot_parts))
+ {
+ m_file[part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+ bitmap_set_bit(&m_bulk_insert_started, part_id);
+ }
+ m_bulk_inserted_rows++;
+
tmp_disable_binlog(thd); /* Do not replicate the low-level changes. */
error= m_file[part_id]->ha_write_row(buf);
if (have_auto_increment && !table->s->next_number_keypart)
@@ -2892,6 +2908,13 @@ int ha_partition::update_row(const uchar
}
m_last_part= new_part_id;
+ if (bitmap_is_set(&m_bulk_insert_started, m_tot_parts) &&
+ !bitmap_is_set(&m_bulk_insert_started, new_part_id))
+ {
+ m_file[new_part_id]->ha_start_bulk_insert(guess_bulk_insert_rows());
+ bitmap_set_bit(&m_bulk_insert_started, new_part_id);
+ }
+ m_bulk_inserted_rows++;
if (new_part_id == old_part_id)
{
DBUG_PRINT("info", ("Update in partition %d", new_part_id));
@@ -3044,23 +3067,58 @@ int ha_partition::delete_all_rows()
DESCRIPTION
rows == 0 means we will probably insert many rows
*/
-
void ha_partition::start_bulk_insert(ha_rows rows)
{
- handler **file;
DBUG_ENTER("ha_partition::start_bulk_insert");
- rows= rows ? rows/m_tot_parts + 1 : 0;
- file= m_file;
- do
- {
- (*file)->ha_start_bulk_insert(rows);
- } while (*(++file));
+ m_bulk_total_rows= rows;
+ m_bulk_inserted_rows= 0;
+ bitmap_clear_all(&m_bulk_insert_started);
+ /* use the last bit for marking if bulk_insert_started was called */
+ bitmap_set_bit(&m_bulk_insert_started, m_tot_parts);
DBUG_VOID_RETURN;
}
/*
+ Try to predict the number of inserts into this partition.
+
+ If hash or key partitioning and the number of inserts so far is
+ less than twice the expected average number of rows per partition so far:
+ guess that it is evently distributed over all partitions
+ otherwise
+ guess that the number of rows decrease for each newly used partition
+ (i.e. guess is 50% for the first used partition, 33 % of the remaining
+ inserts to the second used partition, 25% of the remains to the
+ third used and so on.)
+*/
+ha_rows ha_partition::guess_bulk_insert_rows()
+{
+ uint call_num;
+ ha_rows average_rows_per_partition;
+ DBUG_ENTER("guess_bulk_insert_rows");
+ if (m_bulk_total_rows == 0)
+ DBUG_RETURN(0);
+ average_rows_per_partition= (m_bulk_total_rows / m_tot_parts);
+ if (!average_rows_per_partition)
+ average_rows_per_partition= 1;
+ call_num= bitmap_bits_set(&m_bulk_insert_started);
+ if (m_part_info->part_type != HASH_PARTITION ||
+ (m_bulk_inserted_rows) > ((2 * call_num) / average_rows_per_partition))
+ {
+ average_rows_per_partition= (m_bulk_total_rows - m_bulk_inserted_rows) /
+ (call_num + 1);
+ }
+ if (!average_rows_per_partition)
+ average_rows_per_partition= 1;
+ DBUG_PRINT("info", ("tot %lu done %lu call_num %u guess %lu",
+ m_bulk_total_rows, m_bulk_inserted_rows, call_num,
+ average_rows_per_partition));
+ DBUG_RETURN(average_rows_per_partition);
+}
+
+
+/*
Finish a large batch of insert rows
SYNOPSIS
@@ -3074,16 +3132,18 @@ void ha_partition::start_bulk_insert(ha_
int ha_partition::end_bulk_insert()
{
int error= 0;
- handler **file;
+ uint i;
DBUG_ENTER("ha_partition::end_bulk_insert");
- file= m_file;
- do
+ DBUG_ASSERT(bitmap_is_set(&m_bulk_insert_started, m_tot_parts));
+ for (i= 0; i < m_tot_parts; i++)
{
int tmp;
- if ((tmp= (*file)->ha_end_bulk_insert()))
+ if (bitmap_is_set(&m_bulk_insert_started, i) &&
+ (tmp= m_file[i]->ha_end_bulk_insert()))
error= tmp;
- } while (*(++file));
+ }
+ bitmap_clear_all(&m_bulk_insert_started);
DBUG_RETURN(error);
}
=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h 2008-05-08 13:01:30 +0000
+++ b/sql/ha_partition.h 2008-09-05 11:20:12 +0000
@@ -155,6 +155,10 @@ private:
This to ensure it will work with statement based replication.
*/
bool auto_increment_safe_stmt_log_lock;
+ /** For optimizing ha_start_bulk_insert calls */
+ MY_BITMAP m_bulk_insert_started;
+ ha_rows m_bulk_total_rows;
+ ha_rows m_bulk_inserted_rows;
public:
handler *clone(MEM_ROOT *mem_root);
virtual void set_part_info(partition_info *part_info, bool early)
@@ -322,7 +326,6 @@ public:
Bulk inserts are supported if all underlying handlers support it.
start_bulk_insert and end_bulk_insert is called before and after a
number of calls to write_row.
- Not yet though.
*/
virtual int write_row(uchar * buf);
virtual int update_row(const uchar * old_data, uchar * new_data);
@@ -330,6 +333,9 @@ public:
virtual int delete_all_rows(void);
virtual void start_bulk_insert(ha_rows rows);
virtual int end_bulk_insert();
+private:
+ ha_rows guess_bulk_insert_rows();
+public:
virtual bool is_fatal_error(int error, uint flags)
{
| Thread |
|---|
| • bzr commit into mysql-6.0 branch (mattias.jonsson:2697) Bug#35845 | Mattias Jonsson | 5 Sep |