#At file:///Users/malff/BZR_TREE/mysql-next-mr-bugfixing-52502/ based on revid:alik@stripped
3144 Marc Alff 2010-03-31
Bug#52502 Performance schema does not start with large mutex_instance buffers
This is a performance issue affecting scalability.
Prior to this fix, the performance schema code did not scale well
when a very large number of instruments are created in the code.
The root cause was create_mutex() spinning "forever",
trying to find an empty slot in a full mutex_array.
With this fix:
- functions like create_mutex() abort sooner,
when used with a very large array which is full or almost full.
- randomize_index() has been revised to improve
the statistical spread of values in the mutex_array.
See the comments in the code for details.
Other allocation functions similar to create_mutex()
have been also fixed.
No MTR test case provided (performance scalability issue).
Fix tested manually.
modified:
storage/perfschema/pfs_global.h
storage/perfschema/pfs_instr.cc
storage/perfschema/pfs_instr.h
=== modified file 'storage/perfschema/pfs_global.h'
--- a/storage/perfschema/pfs_global.h 2010-01-12 01:47:27 +0000
+++ b/storage/perfschema/pfs_global.h 2010-03-31 15:48:47 +0000
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2009 Sun Microsystems, Inc
+/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,27 +30,49 @@ void pfs_free(void *ptr);
inline uint randomized_index(const void *ptr, uint max_size)
{
+ static uint seed1= 0;
+ static uint seed2= 0;
+ uint result;
+ register intptr value;
+
if (unlikely(max_size == 0))
return 0;
/*
- ptr is typically an aligned structure,
- so the last bits are not really random, but this has no effect.
- Apply a factor A*x to spread
- close values of ptr further apart (which helps with arrays),
- and to spread values way beyond a typical max_size.
- Then, apply a modulo to end within [0, max_size - 1].
- A is big prime numbers, to avoid resonating with max_size,
- to have a uniform distribution in [0, max_size - 1].
- The value of A is chosen so that index(ptr) and index(ptr + N) (for arrays)
- are likely to be not similar for typical values of max_size
- (50, 100, 1000, etc).
- In other words, (sizeof(T)*A % max_size) should not be a small number,
- to avoid that with 'T array[max_size]', index(array[i])
- and index(array[i + 1]) end up pointing in the same area in [0, max_size - 1].
+ ptr is typically an aligned structure, and can be in an array.
+ - The last bits are not random because of alignment,
+ so we divide by 8.
+ - The high bits are mostly constant, especially with 64 bits architectures,
+ but we keep most of them anyway, by doing computation in intptr.
+ The high bits are significant depending on where the data is
+ stored (the data segment, the stack, the heap, ...).
+ - To spread consecutive cells in an array further, we multiply by
+ a factor A. This factor should not be too high, which would cause
+ an overflow and cause loss of randomness (droping the top high bits).
+ The factor is a prime number, to help spread the distribution.
+ - To add more noise, and to be more robust if the calling code is
+ passing a constant value instead of a random identity,
+ we add the previous results, for hysteresys, with a degree 2 polynom,
+ X^2 + X + 1.
+ - Last, a modulo is applied to be within the [0, max_size - 1] range.
+ Note that seed1 and seed2 are static, and are *not* thread safe,
+ which is even better.
+ Effect with arrays: T array[N]
+ - ptr(i) = & array[i] = & array[0] + i * sizeof(T)
+ - ptr(i+1) = ptr(i) + sizeof(T).
+ What we want here, is to have index(i) and index(i+1) fall into
+ very different areas in [0, max_size - 1], to avoid locality.
*/
- return static_cast<uint>
- (((reinterpret_cast<intptr> (ptr)) * 2166179) % max_size);
+ value= (reinterpret_cast<intptr> (ptr)) >> 3;
+ value*= 1789;
+ value+= seed2 + seed1 + 1;
+ value%= max_size;
+
+ result= static_cast<uint> (value);
+ seed2= seed1*seed1;
+ seed1= result;
+
+ return result;
}
void pfs_print_error(const char *format, ...);
=== modified file 'storage/perfschema/pfs_instr.cc'
--- a/storage/perfschema/pfs_instr.cc 2010-03-22 12:48:18 +0000
+++ b/storage/perfschema/pfs_instr.cc 2010-03-31 15:48:47 +0000
@@ -454,6 +454,7 @@ void cleanup_file_hash(void)
PFS_mutex* create_mutex(PFS_mutex_class *klass, const void *identity)
{
int pass;
+ uint attempt= 0;
uint i= randomized_index(identity, mutex_max);
/*
@@ -464,7 +465,9 @@ PFS_mutex* create_mutex(PFS_mutex_class
{
PFS_mutex *pfs= mutex_array + i;
PFS_mutex *pfs_last= mutex_array + mutex_max;
- for ( ; pfs < pfs_last; pfs++)
+ for ( ;
+ (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+ pfs++, attempt++)
{
if (pfs->m_lock.is_free())
{
@@ -512,6 +515,7 @@ void destroy_mutex(PFS_mutex *pfs)
PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity)
{
int pass;
+ uint attempt= 0;
uint i= randomized_index(identity, rwlock_max);
/*
@@ -522,7 +526,9 @@ PFS_rwlock* create_rwlock(PFS_rwlock_cla
{
PFS_rwlock *pfs= rwlock_array + i;
PFS_rwlock *pfs_last= rwlock_array + rwlock_max;
- for ( ; pfs < pfs_last; pfs++)
+ for ( ;
+ (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+ pfs++, attempt++)
{
if (pfs->m_lock.is_free())
{
@@ -576,6 +582,7 @@ void destroy_rwlock(PFS_rwlock *pfs)
PFS_cond* create_cond(PFS_cond_class *klass, const void *identity)
{
int pass;
+ uint attempt= 0;
uint i= randomized_index(identity, cond_max);
/*
@@ -586,7 +593,9 @@ PFS_cond* create_cond(PFS_cond_class *kl
{
PFS_cond *pfs= cond_array + i;
PFS_cond *pfs_last= cond_array + cond_max;
- for ( ; pfs < pfs_last; pfs++)
+ for ( ;
+ (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+ pfs++, attempt++)
{
if (pfs->m_lock.is_free())
{
@@ -634,6 +643,7 @@ PFS_thread* create_thread(PFS_thread_cla
ulong thread_id)
{
int pass;
+ uint attempt= 0;
uint i= randomized_index(identity, thread_max);
/*
@@ -644,7 +654,9 @@ PFS_thread* create_thread(PFS_thread_cla
{
PFS_thread *pfs= thread_array + i;
PFS_thread *pfs_last= thread_array + thread_max;
- for ( ; pfs < pfs_last; pfs++)
+ for ( ;
+ (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+ pfs++, attempt++)
{
if (pfs->m_lock.is_free())
{
@@ -728,6 +740,7 @@ find_or_create_file(PFS_thread *thread,
{
PFS_file *pfs;
int pass;
+ uint attempt= 0;
if (! filename_hash_inited)
{
@@ -849,8 +862,9 @@ search:
{
pfs= file_array + i;
PFS_file *pfs_last= file_array + file_max;
-
- for ( ; pfs < pfs_last; pfs++)
+ for ( ;
+ (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+ pfs++, attempt++)
{
if (pfs->m_lock.is_free())
{
@@ -935,6 +949,7 @@ void destroy_file(PFS_thread *thread, PF
PFS_table* create_table(PFS_table_share *share, const void *identity)
{
int pass;
+ uint attempt= 0;
uint i= randomized_index(identity, table_max);
/*
@@ -945,7 +960,9 @@ PFS_table* create_table(PFS_table_share
{
PFS_table *pfs= table_array + i;
PFS_table *pfs_last= table_array + table_max;
- for ( ; pfs < pfs_last; pfs++)
+ for ( ;
+ (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+ pfs++, attempt++)
{
if (pfs->m_lock.is_free())
{
=== modified file 'storage/perfschema/pfs_instr.h'
--- a/storage/perfschema/pfs_instr.h 2010-01-12 01:47:27 +0000
+++ b/storage/perfschema/pfs_instr.h 2010-03-31 15:48:47 +0000
@@ -136,6 +136,13 @@ struct PFS_table : public PFS_instr
*/
#define LOCKER_STACK_SIZE 3
+/**
+ @def PFS_MAX_ALLOC_RETRY
+ Maximun number of times the code attempts to allocate an item
+ from internal buffers, before giving up.
+*/
+#define PFS_MAX_ALLOC_RETRY 1000
+
/** Instrumented thread implementation. @see PSI_thread. */
struct PFS_thread
{
Attachment: [text/bzr-bundle] bzr/marc.alff@oracle.com-20100331154847-fhxl1phdy823x1i4.bundle