List:Commits« Previous MessageNext Message »
From:Marc Alff Date:March 31 2010 3:48pm
Subject:bzr commit into mysql-next-mr-bugfixing branch (marc.alff:3144)
Bug#52502
View as plain text  
#At file:///Users/malff/BZR_TREE/mysql-next-mr-bugfixing-52502/ based on revid:alik@stripped

 3144 Marc Alff	2010-03-31
      Bug#52502 Performance schema does not start with large mutex_instance buffers
      
      This is a performance issue affecting scalability.
      
      Prior to this fix, the performance schema code did not scale well
      when a very large number of instruments are created in the code.
      
      The root cause was create_mutex() spinning "forever",
      trying to find an empty slot in a full mutex_array.
      
      With this fix:
      - functions like create_mutex() abort sooner,
        when used with a very large array which is full or almost full.
      - randomize_index() has been revised to improve
        the statistical spread of values in the mutex_array.  
        See the comments in the code for details.
      
      Other allocation functions similar to create_mutex()
      have been also fixed.
      
      No MTR test case provided (performance scalability issue).
      Fix tested manually.

    modified:
      storage/perfschema/pfs_global.h
      storage/perfschema/pfs_instr.cc
      storage/perfschema/pfs_instr.h
=== modified file 'storage/perfschema/pfs_global.h'
--- a/storage/perfschema/pfs_global.h	2010-01-12 01:47:27 +0000
+++ b/storage/perfschema/pfs_global.h	2010-03-31 15:48:47 +0000
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2009 Sun Microsystems, Inc
+/* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -30,27 +30,49 @@ void pfs_free(void *ptr);
 
 inline uint randomized_index(const void *ptr, uint max_size)
 {
+  static uint seed1= 0;
+  static uint seed2= 0;
+  uint result;
+  register intptr value;
+
   if (unlikely(max_size == 0))
     return 0;
 
   /*
-    ptr is typically an aligned structure,
-    so the last bits are not really random, but this has no effect.
-    Apply a factor A*x to spread
-    close values of ptr further apart (which helps with arrays),
-    and to spread values way beyond a typical max_size.
-    Then, apply a modulo to end within [0, max_size - 1].
-    A is big prime numbers, to avoid resonating with max_size,
-    to have a uniform distribution in [0, max_size - 1].
-    The value of A is chosen so that index(ptr) and index(ptr + N) (for arrays)
-    are likely to be not similar for typical values of max_size
-    (50, 100, 1000, etc).
-    In other words, (sizeof(T)*A % max_size) should not be a small number,
-    to avoid that with 'T array[max_size]', index(array[i])
-    and index(array[i + 1]) end up pointing in the same area in [0, max_size - 1].
+    ptr is typically an aligned structure, and can be in an array.
+    - The last bits are not random because of alignment,
+      so we divide by 8.
+    - The high bits are mostly constant, especially with 64 bits architectures,
+      but we keep most of them anyway, by doing computation in intptr.
+      The high bits are significant depending on where the data is
+      stored (the data segment, the stack, the heap, ...).
+    - To spread consecutive cells in an array further, we multiply by
+      a factor A. This factor should not be too high, which would cause
+      an overflow and cause loss of randomness (droping the top high bits).
+      The factor is a prime number, to help spread the distribution.
+    - To add more noise, and to be more robust if the calling code is
+      passing a constant value instead of a random identity,
+      we add the previous results, for hysteresys, with a degree 2 polynom,
+      X^2 + X + 1.
+    - Last, a modulo is applied to be within the [0, max_size - 1] range.
+    Note that seed1 and seed2 are static, and are *not* thread safe,
+    which is even better.
+    Effect with arrays: T array[N]
+    - ptr(i) = & array[i] = & array[0] + i * sizeof(T)
+    - ptr(i+1) = ptr(i) + sizeof(T).
+    What we want here, is to have index(i) and index(i+1) fall into
+    very different areas in [0, max_size - 1], to avoid locality.
   */
-  return static_cast<uint>
-    (((reinterpret_cast<intptr> (ptr)) * 2166179) % max_size);
+  value= (reinterpret_cast<intptr> (ptr)) >> 3;
+  value*= 1789;
+  value+= seed2 + seed1 + 1;
+  value%= max_size;
+  
+  result= static_cast<uint> (value);
+  seed2= seed1*seed1;
+  seed1= result;
+
+  return result;
 }
 
 void pfs_print_error(const char *format, ...);

=== modified file 'storage/perfschema/pfs_instr.cc'
--- a/storage/perfschema/pfs_instr.cc	2010-03-22 12:48:18 +0000
+++ b/storage/perfschema/pfs_instr.cc	2010-03-31 15:48:47 +0000
@@ -454,6 +454,7 @@ void cleanup_file_hash(void)
 PFS_mutex* create_mutex(PFS_mutex_class *klass, const void *identity)
 {
   int pass;
+  uint attempt= 0;
   uint i= randomized_index(identity, mutex_max);
 
   /*
@@ -464,7 +465,9 @@ PFS_mutex* create_mutex(PFS_mutex_class 
   {
     PFS_mutex *pfs= mutex_array + i;
     PFS_mutex *pfs_last= mutex_array + mutex_max;
-    for ( ; pfs < pfs_last; pfs++)
+    for ( ;
+         (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+         pfs++, attempt++)
     {
       if (pfs->m_lock.is_free())
       {
@@ -512,6 +515,7 @@ void destroy_mutex(PFS_mutex *pfs)
 PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity)
 {
   int pass;
+  uint attempt= 0;
   uint i= randomized_index(identity, rwlock_max);
 
   /*
@@ -522,7 +526,9 @@ PFS_rwlock* create_rwlock(PFS_rwlock_cla
   {
     PFS_rwlock *pfs= rwlock_array + i;
     PFS_rwlock *pfs_last= rwlock_array + rwlock_max;
-    for ( ; pfs < pfs_last; pfs++)
+    for ( ;
+         (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+         pfs++, attempt++)
     {
       if (pfs->m_lock.is_free())
       {
@@ -576,6 +582,7 @@ void destroy_rwlock(PFS_rwlock *pfs)
 PFS_cond* create_cond(PFS_cond_class *klass, const void *identity)
 {
   int pass;
+  uint attempt= 0;
   uint i= randomized_index(identity, cond_max);
 
   /*
@@ -586,7 +593,9 @@ PFS_cond* create_cond(PFS_cond_class *kl
   {
     PFS_cond *pfs= cond_array + i;
     PFS_cond *pfs_last= cond_array + cond_max;
-    for ( ; pfs < pfs_last; pfs++)
+    for ( ;
+         (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+         pfs++, attempt++)
     {
       if (pfs->m_lock.is_free())
       {
@@ -634,6 +643,7 @@ PFS_thread* create_thread(PFS_thread_cla
                           ulong thread_id)
 {
   int pass;
+  uint attempt= 0;
   uint i= randomized_index(identity, thread_max);
 
   /*
@@ -644,7 +654,9 @@ PFS_thread* create_thread(PFS_thread_cla
   {
     PFS_thread *pfs= thread_array + i;
     PFS_thread *pfs_last= thread_array + thread_max;
-    for ( ; pfs < pfs_last; pfs++)
+    for ( ;
+         (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+         pfs++, attempt++)
     {
       if (pfs->m_lock.is_free())
       {
@@ -728,6 +740,7 @@ find_or_create_file(PFS_thread *thread, 
 {
   PFS_file *pfs;
   int pass;
+  uint attempt= 0;
 
   if (! filename_hash_inited)
   {
@@ -849,8 +862,9 @@ search:
   {
     pfs= file_array + i;
     PFS_file *pfs_last= file_array + file_max;
-
-    for ( ; pfs < pfs_last; pfs++)
+    for ( ;
+         (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+         pfs++, attempt++)
     {
       if (pfs->m_lock.is_free())
       {
@@ -935,6 +949,7 @@ void destroy_file(PFS_thread *thread, PF
 PFS_table* create_table(PFS_table_share *share, const void *identity)
 {
   int pass;
+  uint attempt= 0;
   uint i= randomized_index(identity, table_max);
 
   /*
@@ -945,7 +960,9 @@ PFS_table* create_table(PFS_table_share 
   {
     PFS_table *pfs= table_array + i;
     PFS_table *pfs_last= table_array + table_max;
-    for ( ; pfs < pfs_last; pfs++)
+    for ( ;
+         (pfs < pfs_last) && (attempt < PFS_MAX_ALLOC_RETRY);
+         pfs++, attempt++)
     {
       if (pfs->m_lock.is_free())
       {

=== modified file 'storage/perfschema/pfs_instr.h'
--- a/storage/perfschema/pfs_instr.h	2010-01-12 01:47:27 +0000
+++ b/storage/perfschema/pfs_instr.h	2010-03-31 15:48:47 +0000
@@ -136,6 +136,13 @@ struct PFS_table : public PFS_instr
 */
 #define LOCKER_STACK_SIZE 3
 
+/**
+  @def PFS_MAX_ALLOC_RETRY
+  Maximun number of times the code attempts to allocate an item
+  from internal buffers, before giving up.
+*/
+#define PFS_MAX_ALLOC_RETRY 1000
+
 /** Instrumented thread implementation. @see PSI_thread. */
 struct PFS_thread
 {


Attachment: [text/bzr-bundle] bzr/marc.alff@oracle.com-20100331154847-fhxl1phdy823x1i4.bundle
Thread
bzr commit into mysql-next-mr-bugfixing branch (marc.alff:3144)Bug#52502Marc Alff31 Mar
  • Re: bzr commit into mysql-next-mr-bugfixing branch (marc.alff:3144)Bug#52502Guilhem Bichot31 Mar
    • Re: bzr commit into mysql-next-mr-bugfixing branch (marc.alff:3144)Bug#52502Marc Alff1 Apr