List:Commits« Previous MessageNext Message »
From:tomas Date:May 25 2007 10:25am
Subject:bk commit into 5.1 tree (tomas:1.2472) BUG#28525
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-05-25 12:25:15+02:00, tomas@stripped +2 -0
  Bug #28525 Node failures in PGMAN at ndbrequire (line 430)

  storage/ndb/src/kernel/blocks/pgman.cpp@stripped, 2007-05-25 12:25:12+02:00, tomas@stripped +65 -4
    Under heavy insert PGMAN can run out of page entries
    even when set to 100 times page cache entries.
    
    In this use pattern the extra entries remain idle
    on LIRS stack.  Only ONSTACK is set.  There is not
    enough activity to free them the normal way.
    
    A study of PGMAN / DBTUP behaviour is needed.
    
    This patch adds new sublist SL_IDLE.  When page entry
    pool is empty, an idle entry is released from SL_IDLE
    front if there is any.  Otherwise, we still crash.
    
    The factor above is set from 100 to 10 (still high).

  storage/ndb/src/kernel/blocks/pgman.hpp@stripped, 2007-05-25 12:25:12+02:00, tomas@stripped +4 -2
    Under heavy insert PGMAN can run out of page entries
    even when set to 100 times page cache entries.
    
    In this use pattern the extra entries remain idle
    on LIRS stack.  Only ONSTACK is set.  There is not
    enough activity to free them the normal way.
    
    A study of PGMAN / DBTUP behaviour is needed.
    
    This patch adds new sublist SL_IDLE.  When page entry
    pool is empty, an idle entry is released from SL_IDLE
    front if there is any.  Otherwise, we still crash.
    
    The factor above is set from 100 to 10 (still high).

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	whalegate.ndb.mysql.com
# Root:	/home/tomas/mysql-5.1-telco-gca

--- 1.21/storage/ndb/src/kernel/blocks/pgman.cpp	2007-05-22 17:53:04 +02:00
+++ 1.22/storage/ndb/src/kernel/blocks/pgman.cpp	2007-05-25 12:25:12 +02:00
@@ -123,8 +123,8 @@
   if (page_buffer > 0)
   {
     page_buffer /= GLOBAL_PAGE_SIZE; // in pages
-    m_page_entry_pool.setSize(100*page_buffer);
     m_param.m_max_pages = page_buffer;
+    m_page_entry_pool.setSize(m_param.m_lirs_stack_mult * page_buffer);
     m_param.m_max_hot_pages = (page_buffer * 9) / 10;
   }
 
@@ -141,6 +141,7 @@
 
 Pgman::Param::Param() :
   m_max_pages(64),      // smallish for testing
+  m_lirs_stack_mult(10),
   m_max_hot_pages(56),
   m_max_loop_count(256),
   m_max_io_waits(64),
@@ -301,6 +302,9 @@
   {
     return Page_entry::SL_LOCKED;
   }
+  if (state == Page_entry::ONSTACK) {
+    return Page_entry::SL_IDLE;
+  }
   return Page_entry::SL_OTHER;
 }
 
@@ -415,15 +419,55 @@
 {
   if (find_page_entry(ptr, file_no, page_no))
   {
+    jam();
     ndbrequire(ptr.p->m_state != 0);
     m_stats.m_page_hits++;
+
+#ifdef VM_TRACE
+  debugOut << "PGMAN: get_page_entry: found" << endl;
+  debugOut << "PGMAN: " << ptr << endl;
+#endif
     return true;
   }
 
+  if (m_page_entry_pool.getNoOfFree() == 0)
+  {
+    jam();
+    Page_sublist& pl_idle = *m_page_sublist[Page_entry::SL_IDLE];
+    Ptr<Page_entry> idle_ptr;
+    if (pl_idle.first(idle_ptr))
+    {
+      jam();
+
+#ifdef VM_TRACE
+    debugOut << "PGMAN: get_page_entry: re-use idle entry" << endl;
+    debugOut << "PGMAN: " << idle_ptr << endl;
+#endif
+
+      Page_state state = idle_ptr.p->m_state;
+      ndbrequire(state == Page_entry::ONSTACK);
+
+      Page_stack& pl_stack = m_page_stack;
+      ndbrequire(pl_stack.hasPrev(idle_ptr));
+      pl_stack.remove(idle_ptr);
+      state &= ~ Page_entry::ONSTACK;
+      set_page_state(idle_ptr, state);
+      ndbrequire(idle_ptr.p->m_state == 0);
+
+      release_page_entry(idle_ptr);
+    }
+  }
+
   if (seize_page_entry(ptr, file_no, page_no))
   {
+    jam();
     ndbrequire(ptr.p->m_state == 0);
     m_stats.m_page_faults++;
+
+#ifdef VM_TRACE
+  debugOut << "PGMAN: get_page_entry: seize" << endl;
+  debugOut << "PGMAN: " << ptr << endl;
+#endif
     return true;
   }
 
@@ -1929,6 +1973,8 @@
     break;
   case Page_entry::SL_LOCKED:
     break;
+  case Page_entry::SL_IDLE:
+    break;
   case Page_entry::SL_OTHER:
     break;
   default:
@@ -1975,8 +2021,11 @@
   ndbrequire(stack_count == pl_stack.count() || dump_page_lists());
   ndbrequire(queue_count == pl_queue.count() || dump_page_lists());
 
+  Uint32 hot_count = 0;
   Uint32 hot_bound_count = 0;
   Uint32 cold_bound_count = 0;
+  Uint32 stack_request_count = 0;
+  Uint32 queue_request_count = 0;
 
   Uint32 i1 = RNIL;
   for (pl_stack.first(ptr); ptr.i != RNIL; pl_stack.next(ptr))
@@ -1987,9 +2036,13 @@
     ndbrequire(state & Page_entry::ONSTACK || dump_page_lists());
     if (! pl_stack.hasPrev(ptr))
       ndbrequire(state & Page_entry::HOT || dump_page_lists());
-    if (state & Page_entry::HOT &&
-        state & Page_entry::BOUND)
-      hot_bound_count++;
+    if (state & Page_entry::HOT) {
+      hot_count++;
+      if (state & Page_entry::BOUND)
+        hot_bound_count++;
+    }
+    if (state & Page_entry::REQUEST)
+      stack_request_count++;
   }
 
   Uint32 i2 = RNIL;
@@ -2001,6 +2054,8 @@
     ndbrequire(state & Page_entry::ONQUEUE || dump_page_lists());
     ndbrequire(state & Page_entry::BOUND || dump_page_lists());
     cold_bound_count++;
+    if (state & Page_entry::REQUEST)
+      queue_request_count++;
   }
 
   Uint32 tot_bound_count =
@@ -2033,7 +2088,11 @@
            << " cache:" << m_stats.m_num_pages
            << "(" << locked_bound_count << "L)"
            << " stack:" << pl_stack.count()
+           << " hot:" << hot_count
+           << " hot_bound:" << hot_bound_count
+           << " stack_request:" << stack_request_count
            << " queue:" << pl_queue.count()
+           << " queue_request:" << queue_request_count
            << " queuewait:" << queuewait_count << endl;
 
   debugOut << "PGMAN:";
@@ -2141,6 +2200,8 @@
     return "busy";
   case Page_entry::SL_LOCKED:
     return "locked";
+  case Page_entry::SL_IDLE:
+    return "idle";
   case Page_entry::SL_OTHER:
     return "other";
   }

--- 1.10/storage/ndb/src/kernel/blocks/pgman.hpp	2006-12-23 20:33:30 +01:00
+++ 1.11/storage/ndb/src/kernel/blocks/pgman.hpp	2007-05-25 12:25:12 +02:00
@@ -325,8 +325,9 @@
       ,SL_CALLBACK_IO = 4
       ,SL_BUSY = 5
       ,SL_LOCKED = 6
-      ,SL_OTHER = 7
-      ,SUBLIST_COUNT = 8
+      ,SL_IDLE = 7
+      ,SL_OTHER = 8
+      ,SUBLIST_COUNT = 9
     };
 
     Uint16 m_file_no;       // disk page address set at seize
@@ -401,6 +402,7 @@
   struct Param {
     Param();
     Uint32 m_max_pages;         // max number of cache pages
+    Uint32 m_lirs_stack_mult;   // in m_max_pages (around 3-10)
     Uint32 m_max_hot_pages;     // max hot cache pages (up to 99%)
     Uint32 m_max_loop_count;    // limit purely local loops
     Uint32 m_max_io_waits;
Thread
bk commit into 5.1 tree (tomas:1.2472) BUG#28525tomas25 May