#At file:///Users/mattiasj/clones/bzrroot/b47261-51-bugteam/ based on revid:v.narayanan@stripped
3160 Mattias Jonsson 2009-10-13
Bug#47261: 5.1 fix for Partitioning performance drop...
with hundreds of partitions
One problem that is solvable in 5.1 is that the sorting of locks
is not partitioning aware, every partition have its on locks,
and they are already sorted in partition order, but sort_locks
sorted them as independent locks.
Solution was to group locks togather as already sorted locks, and
only sort those group of locks.
And as a part of this change I also changed the sorting algorithm
from an O(n2) to O(nlog(n)), by using my_qsort instead. The order
remains the same.
@ include/thr_lock.h
Bug#47261: 5.1 fix for Partitioning performance drop...
with hundreds of partitions
Added flag for grouping locks for use with partitioning,
since all locks in a partitioned table is stored in
the same order.
@ mysys/thr_lock.c
Bug#47261: 5.1 fix for Partitioning performance drop...
with hundreds of partitions
Changed the sorting of locks into two parts, first pick out
the locks that need to be sorted (only using the first lock in
groups already sorted, like locks for partitioned tables).
And then sorting only these locks, then copying the locks in the
correct order.
To ease the ordering, I changed the inplace ordering in thr_multi_lock
to only read the first count members, and then write the ordered locks
into the second count members (since that was already handled that way
in sql/lock.cc).
@ sql/ha_partition.cc
Bug#47261: 5.1 fix for Partitioning performance drop...
with hundreds of partitions
Added grouping of locks to be able to increase the
performance of sorting locks for heavily partitioned
tables, by using the fact that the locks within a
partitioned table is already stored in partition
order.
Note the DBUG_ASSERT(prev == (to - 1)), since if an engine stores
several locks we cannot ensure the correct order.
@ sql/lock.cc
Bug#47261: 5.1 fix for Partitioning performance drop...
with hundreds of partitions
Since the lock array was already doubled, I moved the
copying of the sorted array to thr_multi_lock instead,
to ease the implementation.
(Fixed a typo in a comment too...)
modified:
include/thr_lock.h
mysys/thr_lock.c
sql/ha_partition.cc
sql/lock.cc
=== modified file 'include/thr_lock.h'
--- a/include/thr_lock.h 2009-04-14 12:05:32 +0000
+++ b/include/thr_lock.h 2009-10-13 14:59:43 +0000
@@ -119,6 +119,13 @@ typedef struct st_thr_lock_data {
enum thr_lock_type type;
void *status_param; /* Param to status functions */
void *debug_print_param;
+ /*
+ For already sorted locks, i.e. locks on partitions which are always stored
+ in partition id order, this is set for all but the last lock.
+ This is to be able to sort the whole group only by the first lock of that
+ group of locks.
+ */
+ my_bool have_more_sorted_locks;
} THR_LOCK_DATA;
struct st_lock_list {
=== modified file 'mysys/thr_lock.c'
--- a/mysys/thr_lock.c 2009-09-29 15:38:40 +0000
+++ b/mysys/thr_lock.c 2009-10-13 14:59:43 +0000
@@ -358,6 +358,7 @@ void thr_lock_data_init(THR_LOCK *lock,T
data->owner= 0; /* no owner yet */
data->status_param=param;
data->cond=0;
+ data->have_more_sorted_locks= FALSE;
}
@@ -958,49 +959,155 @@ end:
*/
-#define LOCK_CMP(A,B) ((uchar*) (A->lock) - (uint) ((A)->type) < (uchar*) (B->lock)- (uint) ((B)->type))
+/** Structure to be able to use my_qsort for groups of already sorted locks */
+struct st_sort_node {
+ THR_LOCK_DATA *lock;
+ uint index;
+};
-static void sort_locks(THR_LOCK_DATA **data,uint count)
-{
- THR_LOCK_DATA **pos,**end,**prev,*tmp;
+typedef struct st_sort_node SORT_NODE;
+
+
+/**
+ Compare two locks
+
+ @param a sort_node pointer (including THR_LOCK_DATA pointer)
+ @param b sort_node pointer (including THR_LOCK_DATA pointer)
+
+ @return a cmp b
+ @retval -1 a < b
+ @retval 0 a = b
+ @retval 1 a > b
- /* Sort locks with insertion sort (fast because almost always few locks) */
+ @note Sort locks by address (lock position), and on secondary level on type.
+*/
- for (pos=data+1,end=data+count; pos < end ; pos++)
+static int thr_lock_data_cmp(SORT_NODE *a, SORT_NODE *b)
+{
+ THR_LOCK_DATA *al= a->lock;
+ THR_LOCK_DATA *bl= b->lock;
+ /* First sort by lock address (position) */
+ if ((uchar*) al->lock < (uchar*) bl->lock)
+ return -1;
+ else if ((uchar*) al->lock > (uchar*) bl->lock)
+ return 1;
+ else
{
- tmp= *pos;
- if (LOCK_CMP(tmp,pos[-1]))
- {
- prev=pos;
- do {
- prev[0]=prev[-1];
- } while (--prev != data && LOCK_CMP(tmp,prev[-1]));
- prev[0]=tmp;
- }
+ /* if same lock, sort higher priority (higher type) before lower */
+ if ((uint) al->type > (uint) bl->type)
+ return -1;
+ else if ((uint) al->type < (uint) bl->type)
+ return 1;
}
+ return 0;
}
+
+
+/**
+ Lock multiple locks in a consitent order
+
+ @param data pointer to an array of 2*count THR_LOCK_DATA elements,
+ where the first half is the locks to lock (read),
+ and the second half is the locks sorted (written)
+ @param count number of locks to lock
+ @param owner owner of the lock
+
+ @return Operation status
+
+ @note Data is read from the first count members, sorted, written to
+ the second count part and then locked in that order.
+*/
+
enum enum_thr_lock_result
thr_multi_lock(THR_LOCK_DATA **data, uint count, THR_LOCK_OWNER *owner)
{
- THR_LOCK_DATA **pos,**end;
+ THR_LOCK_DATA **pos, **end;
+ THR_LOCK_DATA **sorted_data= data + count;
DBUG_ENTER("thr_multi_lock");
DBUG_PRINT("lock",("data: 0x%lx count: %d", (long) data, count));
+
if (count > 1)
- sort_locks(data,count);
+ {
+ SORT_NODE *sort_array;
+ uint i;
+ uint sort_count= 0;
+ /*
+ Prepare an array of pointers to the lock_data's, with only the first
+ lock_data in every group of already sorted lock_data's.
+ I.e. only including the first partition's lock_data in a partitioned
+ table.
+ */
+ sort_array= (SORT_NODE*) my_malloc(count*sizeof(SORT_NODE), MYF(0));
+ if (!sort_array)
+ DBUG_RETURN(THR_LOCK_ABORTED);
+ i= 0;
+ for (pos=data, end=data+count; pos < end ; pos++)
+ {
+ sort_array[sort_count].index= i++;
+ sort_array[sort_count++].lock= *pos;
+
+ /* Skip the rest of the already sorted group of lock_data's */
+ while ((*pos)->have_more_sorted_locks)
+ {
+ i++;
+ pos++;
+ }
+ }
+
+ if (sort_count > 1)
+ {
+ uint j= 0;
+ /* Sort the array of pointers. */
+ my_qsort(sort_array, sort_count, sizeof(SORT_NODE),
+ (qsort_cmp) thr_lock_data_cmp);
+
+ /* Write the sorted 'data' to 'sorted_data' */
+ for (i= 0; i < sort_count; i++)
+ {
+ DBUG_PRINT("info", ("moving lock %u to %u", sort_array[i].index, j));
+ sorted_data[j++]= sort_array[i].lock;
+
+ /*
+ Copy the rest of the already sorted group of lock_data's
+ after the newly sorted lock_data (which was the first lock_data
+ of the group).
+ */
+ pos= &data[sort_array[i].index];
+ while ((*pos)->have_more_sorted_locks)
+ {
+ DBUG_PRINT("info", ("moving secondary partition lock of %u to %u",
+ sort_array[i].index, j));
+ sorted_data[j++]= *(++pos);
+ }
+ }
+ DBUG_ASSERT(count == j);
+ }
+ else
+ memcpy(sorted_data, data, count * sizeof(THR_LOCK_DATA*));
+ my_free(sort_array, MYF(0));
+ }
+ else
+ sorted_data[0]= data[0];
+
+#ifdef MAIN
+ for (pos=sorted_data,end=sorted_data+count; pos < end ; pos++)
+ printf("Thread: %s lock order: 0x%lx (0x%lx) type: %d\n",my_thread_name(),
+ (long) pos[0]->lock, (ulong) pos[0], pos[0]->type); fflush(stdout);
+#endif
/* lock everything */
- for (pos=data,end=data+count; pos < end ; pos++)
+ for (pos=sorted_data,end=sorted_data+count; pos < end ; pos++)
{
enum enum_thr_lock_result result= thr_lock(*pos, owner, (*pos)->type);
if (result != THR_LOCK_SUCCESS)
{ /* Aborted */
- thr_multi_unlock(data,(uint) (pos-data));
+ thr_multi_unlock(sorted_data,(uint) (pos-sorted_data));
DBUG_RETURN(result);
}
#ifdef MAIN
- printf("Thread: %s Got lock: 0x%lx type: %d\n",my_thread_name(),
- (long) pos[0]->lock, pos[0]->type); fflush(stdout);
+ printf("Thread: %s Got lock: 0x%lx (0x%lx) type: %d\n",my_thread_name(),
+ (long) pos[0]->lock, (ulong) pos[0], pos[0]->type); fflush(stdout);
#endif
}
/*
@@ -1029,7 +1136,7 @@ thr_multi_lock(THR_LOCK_DATA **data, uin
*/
for (;
(*pos)->type <= TL_READ_NO_INSERT &&
- pos != data &&
+ pos != sorted_data &&
pos[-1]->lock == (*pos)->lock ;
pos--) ;
@@ -1047,7 +1154,7 @@ thr_multi_lock(THR_LOCK_DATA **data, uin
}
else
last_lock=(*pos);
- } while (pos != data);
+ } while (pos != sorted_data);
}
#endif
DBUG_RETURN(THR_LOCK_SUCCESS);
@@ -1064,8 +1171,8 @@ void thr_multi_unlock(THR_LOCK_DATA **da
for (pos=data,end=data+count; pos < end ; pos++)
{
#ifdef MAIN
- printf("Thread: %s Rel lock: 0x%lx type: %d\n",
- my_thread_name(), (long) pos[0]->lock, pos[0]->type);
+ printf("Thread: %s Rel lock: 0x%lx (0x%lx) type: %d\n",
+ my_thread_name(), (long) pos[0]->lock, (ulong) pos[0], pos[0]->type);
fflush(stdout);
#endif
if ((*pos)->type != TL_UNLOCK)
@@ -1621,7 +1728,7 @@ static void *test_thread(void *arg)
THR_LOCK_DATA data[MAX_LOCK_COUNT];
THR_LOCK_OWNER owner;
THR_LOCK_INFO lock_info;
- THR_LOCK_DATA *multi_locks[MAX_LOCK_COUNT];
+ THR_LOCK_DATA *multi_locks[2*MAX_LOCK_COUNT];
my_thread_init();
printf("Thread %s (%d) started\n",my_thread_name(),param); fflush(stdout);
@@ -1630,7 +1737,12 @@ static void *test_thread(void *arg)
thr_lock_info_init(&lock_info);
thr_lock_owner_init(&owner, &lock_info);
for (i=0; i < lock_counts[param] ; i++)
+ {
thr_lock_data_init(locks+tests[param][i].lock_nr,data+i,NULL);
+ printf("Thread %s (%d) uses lock %u (%u/%u)\n",
+ my_thread_name(), param, tests[param][i].lock_nr,
+ i, lock_counts[param]);
+ }
for (j=1 ; j < 10 ; j++) /* try locking 10 times */
{
for (i=0; i < lock_counts[param] ; i++)
@@ -1638,9 +1750,14 @@ static void *test_thread(void *arg)
multi_locks[i]= &data[i];
data[i].type= tests[param][i].lock_type;
}
+ printf("Thread %s (%d) trying to get %u locks\n",my_thread_name(),param,
+ lock_counts[param]);
+ fflush(stdout);
thr_multi_lock(multi_locks, lock_counts[param], &owner);
pthread_mutex_lock(&LOCK_thread_count);
{
+ printf("Thread %s (%d) working\n",my_thread_name(),param);
+ fflush(stdout);
int tmp=rand() & 7; /* Do something from 0-2 sec */
if (tmp == 0)
sleep(1);
@@ -1654,7 +1771,7 @@ static void *test_thread(void *arg)
}
}
pthread_mutex_unlock(&LOCK_thread_count);
- thr_multi_unlock(multi_locks,lock_counts[param]);
+ thr_multi_unlock(multi_locks+lock_counts[param],lock_counts[param]);
}
printf("Thread %s (%d) ended\n",my_thread_name(),param); fflush(stdout);
@@ -1753,7 +1870,9 @@ int main(int argc __attribute__((unused)
{
if ((error=pthread_cond_wait(&COND_thread_count,&LOCK_thread_count)))
fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error);
+ printf("Thread count %u\n", thread_count);
}
+ printf("All threads are done\n");
if ((error=pthread_mutex_unlock(&LOCK_thread_count)))
fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error);
for (i=0 ; i < (int) array_elements(locks) ; i++)
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc 2009-10-09 07:56:07 +0000
+++ b/sql/ha_partition.cc 2009-10-13 14:59:43 +0000
@@ -2774,13 +2774,36 @@ THR_LOCK_DATA **ha_partition::store_lock
enum thr_lock_type lock_type)
{
handler **file;
+ THR_LOCK_DATA **first= to;
+#ifndef DBUG_OFF
+ THR_LOCK_DATA **prev;
+#endif
DBUG_ENTER("ha_partition::store_lock");
file= m_file;
do
{
+#ifndef DBUG_OFF
DBUG_PRINT("info", ("store lock %d iteration", (int) (file - m_file)));
+ prev= to;
+#endif
to= (*file)->store_lock(thd, to, lock_type);
+ /*
+ Ensure that engines stores exactly one lock, have_more_sorted_locks may
+ not work properly otherwise.
+ */
+#ifndef DBUG_OFF
+ DBUG_ASSERT(prev == (to - 1));
+#endif
} while (*(++file));
+
+ /* Mark all partitions accept the last with have_more_sorted_locks */
+ while (first < (to - 1))
+ {
+ /* May already be set by concurrent handler */
+ (*(first++))->have_more_sorted_locks= TRUE;
+ }
+ /* Last should never be set! */
+ DBUG_ASSERT(!(*first)->have_more_sorted_locks);
DBUG_RETURN(to);
}
=== modified file 'sql/lock.cc'
--- a/sql/lock.cc 2009-08-28 16:21:54 +0000
+++ b/sql/lock.cc 2009-10-13 14:59:43 +0000
@@ -269,12 +269,11 @@ MYSQL_LOCK *mysql_lock_tables(THD *thd,
thd_proc_info(thd, "Table lock");
DBUG_PRINT("info", ("thd->proc_info %s", thd->proc_info));
thd->locked=1;
- /* Copy the lock data array. thr_multi_lock() reorders its contens. */
- memcpy(sql_lock->locks + sql_lock->lock_count, sql_lock->locks,
- sql_lock->lock_count * sizeof(*sql_lock->locks));
- /* Lock on the copied half of the lock data array. */
- rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks +
- sql_lock->lock_count,
+ /*
+ thr_multi_lock reads the locks in the first lock_count elements,
+ and writes them ordered into the second lock_count elements.
+ */
+ rc= thr_lock_errno_to_mysql[(int) thr_multi_lock(sql_lock->locks,
sql_lock->lock_count,
thd->lock_id)];
if (rc > 1) /* a timeout or a deadlock */
@@ -832,7 +831,7 @@ static MYSQL_LOCK *get_lock_data(THD *th
/*
Allocating twice the number of pointers for lock data for use in
- thr_mulit_lock(). This function reorders the lock data, but cannot
+ thr_multi_lock(). This function reorders the lock data, but cannot
update the table values. So the second part of the array is copied
from the first part immediately before calling thr_multi_lock().
*/
Attachment: [text/bzr-bundle]
| Thread |
|---|
| • bzr commit into mysql-5.1-bugteam branch (mattias.jonsson:3160)Bug#47261 | Mattias Jonsson | 13 Oct |