List:Internals« Previous MessageNext Message »
From:pekka Date:September 27 2005 2:44pm
Subject:bk commit into 5.1 tree (pekka:1.2022)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of pekka. When pekka does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2022 05/09/27 14:44:23 pekka@stripped +4 -0
  ndb -  wl#2718 clean up list handling

  storage/ndb/src/kernel/blocks/pgman.hpp
    1.31 05/09/27 14:40:24 pekka@stripped +56 -58
    wl#2718 - use std list templates. use hash list for LCP like originally

  storage/ndb/src/kernel/blocks/pgman.cpp
    1.38 05/09/27 14:40:24 pekka@stripped +157 -375
    wl#2718 - use std list templates. use hash list for LCP like originally

  storage/ndb/src/kernel/vm/DLCHashTable.hpp
    1.1 05/09/27 14:37:37 pekka@stripped +82 -0

  storage/ndb/src/kernel/vm/DLCHashTable.hpp
    1.0 05/09/27 14:37:37 pekka@stripped +0 -0
    BitKeeper file
/space/pekka/ndb/version/my51-dd/storage/ndb/src/kernel/vm/DLCHashTable.hpp

  storage/ndb/src/kernel/vm/DLCFifoList.hpp
    1.1 05/09/27 14:37:36 pekka@stripped +119 -0

  storage/ndb/src/kernel/vm/DLCFifoList.hpp
    1.0 05/09/27 14:37:36 pekka@stripped +0 -0
    BitKeeper file
/space/pekka/ndb/version/my51-dd/storage/ndb/src/kernel/vm/DLCFifoList.hpp

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	pekka
# Host:	orca.ndb.mysql.com
# Root:	/space/pekka/ndb/version/my51-dd

--- 1.37/storage/ndb/src/kernel/blocks/pgman.cpp	2005-09-06 20:47:08 +02:00
+++ 1.38/storage/ndb/src/kernel/blocks/pgman.cpp	2005-09-27 14:40:24 +02:00
@@ -34,8 +34,10 @@
 
 Pgman::Pgman(const Configuration & conf) :
   SimulatedBlock(PGMAN, conf),
-  m_file_map(m_data_buffer_pool)
-  ,m_page_entry_hash(m_page_entry_pool)
+  m_file_map(m_data_buffer_pool),
+  m_page_hashlist(m_page_entry_pool),
+  m_page_stack(m_page_entry_pool),
+  m_page_queue(m_page_entry_pool)
 #ifdef VM_TRACE
   ,debugOut(* new NullOutputStream())
 #endif
@@ -58,22 +60,30 @@
   m_page_entry_pool.setSize(2000);
   m_page_request_pool.setSize(10000);
   m_data_buffer_pool.setSize(1);
-  m_page_entry_hash.setSize(512);
-  init_page_lists();
+  m_page_hashlist.setSize(512);
+  
+  for (Uint32 k = 0; k < Page_entry::SUBLIST_COUNT; k++)
+    m_page_sublist[k] = new Page_sublist(m_page_entry_pool);
 
   m_lcp_on = false;
-  m_lcp_copy_page_free = true;
-  m_lcp_outstanding = m_last_lcp = m_last_lcp_complete = 0;
+  m_last_lcp = 0;
+  m_last_lcp_complete = 0;
+  m_lcp_curr_bucket = ~(Uint32)0;
+  m_lcp_outstanding = 0;
+  m_lcp_copy_page = RNIL;
+  m_lcp_copy_page_free = false;
 }
 
 Pgman::~Pgman()
 {
+  for (Uint32 k = 0; k < Page_entry::SUBLIST_COUNT; k++)
+    delete m_page_sublist[k];
 }
 
 BLOCK_FUNCTIONS(Pgman)
 
 Pgman::Stats::Stats() :
-  m_max_pages(1024),      // small for testing
+  m_max_pages(64),      // smallish for testing
   m_page_hits(0),
   m_page_faults(0),
   m_max_io_waits(64),
@@ -95,12 +105,13 @@
     c_tup = (Dbtup*)globalData.getBlock(DBTUP);
     break;
   case 3:
-  {
-    Ptr<GlobalPage> page_ptr;
-    ndbrequire(m_global_page_pool.seize(page_ptr));
-    m_lcp_copy_page = page_ptr.i;
-    process_all(signal, true);
-  }
+    {
+      Ptr<GlobalPage> page_ptr;
+      ndbrequire(m_global_page_pool.seize(page_ptr));
+      m_lcp_copy_page = page_ptr.i;
+      m_lcp_copy_page_free = true;
+      process_all(signal, true);
+    }
     break;
   case 7:
     break;
@@ -161,92 +172,11 @@
 }
 
 // page lists
-// XXX this implementation is only for testing
-
-Pgman::Page_list::Page_list() :
-  m_count(0),
-  m_off(~(Uint32)0),
-  m_head_entry_i(RNIL),
-  m_tail_entry_i(RNIL),
-  m_cmp(0)
-{
-}
-
-int
-Pgman::Page_entry::m_cmp_main(const Page_entry& a, const Page_entry& b)
-{
-  if (a.m_file_no < b.m_file_no)
-    return -1;
-  if (a.m_file_no > b.m_file_no)
-    return +1;
-  if (a.m_page_no < b.m_page_no)
-    return -1;
-  if (a.m_page_no > b.m_page_no)
-    return +1;
-  return 0;
-}
-
-int
-Pgman::Page_entry::m_cmp_dirty_unused(const Page_entry& a, const Page_entry& b)
-{
-  if (a.m_lsn < b.m_lsn)
-    return -1;
-  if (a.m_lsn > b.m_lsn)
-    return +1;
-  return m_cmp_main(a, b);
-}
-
-void
-Pgman::init_page_lists()
-{
-  { Page_list& pl = m_page_list[0];
-    pl.m_off = 0;
-    pl.m_cmp = Page_entry::m_cmp_main;
-  }
-  { Page_list& pl = m_page_list[Page_entry::NOT_BOUND];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_not_bound;
-  }
-  { Page_list& pl = m_page_list[Page_entry::NOT_MAPPED];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_not_mapped;
-  }
-  { Page_list& pl = m_page_list[Page_entry::CLEAN_UNUSED];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_clean_unused;
-  }
-  { Page_list& pl = m_page_list[Page_entry::DIRTY_UNUSED];
-    pl.m_off = 1;
-    pl.m_cmp = Page_entry::m_cmp_dirty_unused;
-  }
-  { Page_list& pl = m_page_list[Page_entry::ANY_USED];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_any_used;
-  }
-  { Page_list& pl = m_page_list[Page_entry::WAIT_IO];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_wait_io;
-  }
-  { Page_list& pl = m_page_list[Page_entry::REQ_DONE];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_req_done;
-  }
-  { Page_list& pl = m_page_list[Page_entry::IS_BUSY];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_is_busy;
-  }
-  { Page_list& pl = m_page_list[Page_entry::IS_LOCKED];
-    pl.m_off = 1;
-    pl.m_cmp = 0; // Page_entry::m_cmp_is_locked;
-  }
-}
 
 const char*
 Pgman::Page_entry::get_sublist_name(Uint32 list_no)
 {
   switch (list_no) {
-  case 0:
-    return "main";
   case NOT_BOUND:
     return "not_bound";
   case NOT_MAPPED:
@@ -275,7 +205,7 @@
   // TODO check state is valid
   if (state == 0)
   {
-    return 0;
+    return ZNIL;
   }
   if (! (state & BOUND))
   {
@@ -312,121 +242,6 @@
   return CLEAN_UNUSED;
 }
 
-bool
-Pgman::page_list_find(Page_list& pl, Ptr<Page_entry>& ptr, const
Page_entry& key)
-{
-  Uint32 off = pl.m_off;
-  Ptr<Page_entry> loop_ptr;
-  loop_ptr.i = pl.m_head_entry_i;
-  while (loop_ptr.i != RNIL)
-  {
-    m_page_entry_pool.getPtr(loop_ptr);
-    int k = Page_entry::m_cmp_main(*loop_ptr.p, key);
-    if (k == 0) {
-      ptr = loop_ptr;
-      return true;
-    }
-    loop_ptr.i = loop_ptr.p->m_next_entry_i[off];
-  }
-  ptr.i = RNIL;
-  ptr.p = 0;
-  return false;
-}
-
-void
-Pgman::page_list_insert(Page_list& pl, Ptr<Page_entry> ptr)
-{
-  Uint32 off = pl.m_off;
-  ndbrequire(ptr.p->m_next_entry_i[off] == RNIL);
-  ndbrequire(ptr.p->m_prev_entry_i[off] == RNIL);
-
-  if (pl.m_head_entry_i == RNIL)
-  {
-    ndbrequire(pl.m_tail_entry_i == RNIL);
-    pl.m_head_entry_i = ptr.i;
-    pl.m_tail_entry_i = ptr.i;
-  }
-  else
-  {
-    ndbrequire(pl.m_tail_entry_i != RNIL);
-
-    Ptr<Page_entry> loop_ptr;
-    loop_ptr.i = pl.m_tail_entry_i;
-    while (loop_ptr.i != RNIL)
-    {
-      m_page_entry_pool.getPtr(loop_ptr);
-      if (pl.m_cmp == 0 || pl.m_cmp(*loop_ptr.p, *ptr.p) < 0)
-        break;
-      loop_ptr.i = loop_ptr.p->m_prev_entry_i[off];
-    }
-
-    if (loop_ptr.i == RNIL)
-    {
-      Ptr<Page_entry> head_ptr;
-      m_page_entry_pool.getPtr(head_ptr, pl.m_head_entry_i);
-
-      head_ptr.p->m_prev_entry_i[off] = ptr.i;
-      ptr.p->m_next_entry_i[off] = head_ptr.i;
-      pl.m_head_entry_i = ptr.i;
-    }
-    else if (loop_ptr.p->m_next_entry_i[off] == RNIL)
-    {
-      ndbrequire(loop_ptr.i == pl.m_tail_entry_i);
-
-      loop_ptr.p->m_next_entry_i[off] = ptr.i;
-      ptr.p->m_prev_entry_i[off] = loop_ptr.i;
-      pl.m_tail_entry_i = ptr.i;
-    }
-    else
-    {
-      Ptr<Page_entry> next_ptr;
-      m_page_entry_pool.getPtr(next_ptr, loop_ptr.p->m_next_entry_i[off]);
-
-      ptr.p->m_next_entry_i[off] = next_ptr.i;
-      next_ptr.p->m_prev_entry_i[off] = ptr.i;
-
-      loop_ptr.p->m_next_entry_i[off] = ptr.i;
-      ptr.p->m_prev_entry_i[off] = loop_ptr.i;
-    }
-  }
-
-  pl.m_count++;
-}
-
-void
-Pgman::page_list_remove(Page_list& pl, Ptr<Page_entry> ptr)
-{
-  Uint32 off = pl.m_off;
-
-  if (ptr.p->m_next_entry_i[off] != RNIL)
-  {
-    Ptr<Page_entry> next_ptr;
-    m_page_entry_pool.getPtr(next_ptr, ptr.p->m_next_entry_i[off]);
-    next_ptr.p->m_prev_entry_i[off] = ptr.p->m_prev_entry_i[off];
-  }
-  else
-  {
-    pl.m_tail_entry_i = ptr.p->m_prev_entry_i[off];
-  }
-
-  if (ptr.p->m_prev_entry_i[off] != RNIL)
-  {
-    Ptr<Page_entry> prev_ptr;
-    m_page_entry_pool.getPtr(prev_ptr, ptr.p->m_prev_entry_i[off]);
-    prev_ptr.p->m_next_entry_i[off] = ptr.p->m_next_entry_i[off];
-  }
-  else
-  {
-    pl.m_head_entry_i = ptr.p->m_next_entry_i[off];
-  }
-
-  ptr.p->m_next_entry_i[off] = RNIL;
-  ptr.p->m_prev_entry_i[off] = RNIL;
-
-  ndbrequire(pl.m_count != 0);
-  pl.m_count--;
-}
-
 void
 Pgman::set_page_state(Ptr<Page_entry> ptr, Uint16 new_state)
 {
@@ -442,20 +257,20 @@
     Uint32 new_list_no = Page_entry::get_sublist_no(new_state);
     if (old_state != 0)
     {
-      ndbrequire(old_list_no != 0);
+      ndbrequire(old_list_no != ZNIL);
       if (old_list_no != new_list_no)
       {
-        Page_list& old_list = m_page_list[old_list_no];
-        page_list_remove(old_list, ptr);
+        Page_sublist& old_list = *m_page_sublist[old_list_no];
+        old_list.remove(ptr);
       }
     }
     if (new_state != 0)
     {
-      ndbrequire(new_list_no != 0);
+      ndbrequire(new_list_no != ZNIL);
       if (old_list_no != new_list_no)
       {
-        Page_list& new_list = m_page_list[new_list_no];
-        page_list_insert(new_list, ptr);
+        Page_sublist& new_list = *m_page_sublist[new_list_no];
+        new_list.add(ptr);
       }
     }
     ptr.p->m_state = new_state;
@@ -477,79 +292,32 @@
   Uint32 k;
   Uint32 tot_count = 0;
 
-  for (k = 0; k < Page_entry::LIST_COUNT; k++)
+  for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
   {
-    const Page_list& pl = m_page_list[k];
-    ndbrequire(pl.m_off == (k != 0));
+    const Page_sublist& pl = *m_page_sublist[k];
 
     Uint32 count = 0;
     Ptr<Page_entry> ptr;
-    ptr.i = pl.m_head_entry_i;
+    // for possible future use to verify order
     Ptr<Page_entry> prev_ptr;
     prev_ptr.i = RNIL;
 
-    while (ptr.i != RNIL)
+    for (pl.first(ptr); ptr.i != RNIL; pl.next(ptr))
     {
-      m_page_entry_pool.getPtr(ptr);
-      if (k > 0)
-      {
-        ndbrequire(Page_entry::get_sublist_no(ptr.p->m_state) == k);
-      }
-      ndbrequire(ptr.p->m_prev_entry_i[pl.m_off] == prev_ptr.i);
-      if (pl.m_cmp != 0 && prev_ptr.i != RNIL)
-      {
-        // require strict order
-        ndbrequire(getNodeState().startLevel != NodeState::SL_STARTED || 
-		   pl.m_cmp(*prev_ptr.p, *ptr.p) < 0);
-      }
+      ndbrequire(Page_entry::get_sublist_no(ptr.p->m_state) == k);
+      tot_count++;
       prev_ptr = ptr;
-      ptr.i = ptr.p->m_next_entry_i[pl.m_off];
-      count++;
     }
-
-    ndbrequire(pl.m_tail_entry_i == prev_ptr.i);
-    ndbrequire(pl.m_count == count);
-    if (k > 0)
-      tot_count += count;
   }
 
-  Page_list& pl_main = m_page_list[0];
-  ndbrequire(pl_main.m_count == tot_count);
-
-  Ptr<Page_entry> main_ptr;
-  main_ptr.i = pl_main.m_head_entry_i;
-  while (main_ptr.i != RNIL)
-  {
-    m_page_entry_pool.getPtr(main_ptr);
-    Uint32 found = 0;
-
-    for (k = 1; k < Page_entry::LIST_COUNT; k++)
-    {
-      const Page_list& pl = m_page_list[k];
-      Ptr<Page_entry> ptr;
-      ptr.i = pl.m_head_entry_i;
-
-      while (ptr.i != RNIL)
-      {
-        m_page_entry_pool.getPtr(ptr);
-        if (main_ptr.i == ptr.i && main_ptr.p == ptr.p)
-          found++;
-        ptr.i = ptr.p->m_next_entry_i[1];
-      }
-    }
-
-    ndbrequire(found == 1);
-    main_ptr.i = main_ptr.p->m_next_entry_i[0];
-  }
+  ndbrequire(tot_count == m_page_hashlist.count());
 
-  debugOut << "PGMAN: ";
-  for (k = 0; k < Page_entry::LIST_COUNT; k++)
+  debugOut << "PGMAN: tot:" << tot_count;
+  for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
   {
-    const Page_list& pl = m_page_list[k];
-    if (k > 0)
-      debugOut << " ";
-    debugOut << Page_entry::get_sublist_name(k) << ":";
-    debugOut << pl.m_count;
+    const Page_sublist& pl = *m_page_sublist[k];
+    debugOut << " " << Page_entry::get_sublist_name(k) << ":"
+             << pl.count();
   }
   debugOut << endl;
 }
@@ -563,10 +331,7 @@
 {
   if (m_page_entry_pool.seize(ptr)) {
     new (ptr.p) Page_entry(file_no, page_no);
-    Page_list& pl_main = m_page_list[0];
-    page_list_insert(pl_main, ptr);
-
-    m_page_entry_hash.add(ptr);
+    m_page_hashlist.add(ptr);
 
 #ifdef VM_TRACE
   debugOut << "PGMAN: seize_page_entry" << endl;
@@ -585,14 +350,12 @@
   key.m_file_no = file_no;
   key.m_page_no = page_no;
   
-  return m_page_entry_hash.find(ptr, key);
+  return m_page_hashlist.find(ptr, key);
 }
 
 bool
 Pgman::get_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no)
 {
-  // find or seize, the slow way
-
   if (find_page_entry(ptr, file_no, page_no)) {
     m_stats.m_page_hits++;
     return true;
@@ -624,9 +387,7 @@
   }
   // remove from sublist
   set_page_state(ptr, 0);
-  Page_list& pl_main = m_page_list[0];
-  page_list_remove(pl_main, ptr);
-  m_page_entry_hash.remove(ptr);
+  m_page_hashlist.remove(ptr);
   m_page_entry_pool.release(ptr);
 }
 
@@ -642,14 +403,13 @@
   m_lcp_on = true;
   ndbrequire(!m_lcp_outstanding);
   ndbrequire(m_lcp_copy_page_free);
-  Page_list& pl_main = m_page_list[0];
-  m_last_lcp_entry_i = pl_main.m_head_entry_i;
+  m_lcp_curr_bucket = 0;
 
 #ifdef VM_TRACE
   debugOut
     << "PGMAN: execLCP_FRAG_ORD"
     << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
-    << " entry=" << m_last_lcp_entry_i << endl;
+    << " bucket=" << m_lcp_curr_bucket << endl;
 #endif
 }
 
@@ -663,7 +423,8 @@
   debugOut
     << "PGMAN: execEND_LCP_REQ"
     << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
-    << " entry=" << m_last_lcp_entry_i << endl;
+    << " bucket=" << m_lcp_curr_bucket
+    << " outstanding=" << m_lcp_outstanding << endl;
 #endif
 
   if (m_last_lcp == m_last_lcp_complete)
@@ -690,64 +451,71 @@
   debugOut
     << "PGMAN: process_lcp"
     << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
-    << " entry=" << m_last_lcp_entry_i << endl;
+    << " bucket=" << m_lcp_curr_bucket
+    << " outstanding=" << m_lcp_outstanding << endl;
 #endif
 
-  while (m_last_lcp_entry_i != RNIL && --max_count > 0)
-  {
-    Ptr<Page_entry> ptr;
-    m_page_entry_pool.getPtr(ptr, m_last_lcp_entry_i);
-
-    Uint16 state = ptr.p->m_state;
+  // start or re-start from beginning of current hash bucket
+  if (m_lcp_curr_bucket != ~(Uint32)0) {
+    Page_hashlist::Iterator iter;
+    m_page_hashlist.next(m_lcp_curr_bucket, iter);
 
-    if (ptr.p->m_last_lcp < m_last_lcp &&
-        (state & Page_entry::DIRTY))
+    while (iter.curr.i != RNIL && --max_count > 0)
     {
-      if(! (state & Page_entry::BOUND))
-      {
-	ndbout << ptr << endl;
-	ndbrequire(false);
-      }
-      if (state & Page_entry::BUSY)
-      {
-        break;  // wait for it
-      }
-      if (state & Page_entry::LOCKED)
-      {
-	/**
-	 * Special handling of LOCKED pages...only write 1 at a time...
-	 *   using copy page (m_lcp_copy_page)
-	 */
-	if (!m_lcp_copy_page_free)
-	{
-	  break;
-	}
-	m_lcp_copy_page_free = false;
-	Ptr<GlobalPage> src, copy;
-	m_global_page_pool.getPtr(copy, m_lcp_copy_page);
-	m_global_page_pool.getPtr(src, ptr.p->m_real_page_i);
-	memcpy(copy.p, src.p, sizeof(GlobalPage));
-	ptr.p->m_real_page_i = copy.i;
-	ptr.p->m_copy_real_page_i = src.i;
-	ptr.p->m_state |= Page_entry::LCP;
-	pageout(signal, ptr);
-      }
-      else if (state & Page_entry::PAGEOUT)
-      {
-	set_page_state(ptr, state | Page_entry::LCP);
-      }
-      else
+      Ptr<Page_entry>& ptr = iter.curr;
+      Uint16 state = ptr.p->m_state;
+
+      if (ptr.p->m_last_lcp < m_last_lcp &&
+          (state & Page_entry::DIRTY))
       {
-	ptr.p->m_state |= Page_entry::LCP;
-	pageout(signal, ptr);
+        if(! (state & Page_entry::BOUND))
+        {
+          ndbout << ptr << endl;
+          ndbrequire(false);
+        }
+        if (state & Page_entry::BUSY)
+        {
+          break;  // wait for it
+        }
+        if (state & Page_entry::LOCKED)
+        {
+          /**
+           * Special handling of LOCKED pages...only write 1 at a time...
+           *   using copy page (m_lcp_copy_page)
+           */
+          if (!m_lcp_copy_page_free)
+          {
+            break;
+          }
+          m_lcp_copy_page_free = false;
+          Ptr<GlobalPage> src, copy;
+          m_global_page_pool.getPtr(copy, m_lcp_copy_page);
+          m_global_page_pool.getPtr(src, ptr.p->m_real_page_i);
+          memcpy(copy.p, src.p, sizeof(GlobalPage));
+          ptr.p->m_real_page_i = copy.i;
+          ptr.p->m_copy_real_page_i = src.i;
+          ptr.p->m_state |= Page_entry::LCP;
+          pageout(signal, ptr);
+        }
+        else if (state & Page_entry::PAGEOUT)
+        {
+          set_page_state(ptr, state | Page_entry::LCP);
+        }
+        else
+        {
+          ptr.p->m_state |= Page_entry::LCP;
+          pageout(signal, ptr);
+        }
+        ptr.p->m_last_lcp = m_last_lcp;
+        m_lcp_outstanding++;
       }
-      ptr.p->m_last_lcp = m_last_lcp;
-      m_lcp_outstanding++;
+      m_page_hashlist.next(iter);
     }
-    m_last_lcp_entry_i = ptr.p->m_next_entry_i[0];
+
+    m_lcp_curr_bucket = (iter.curr.i != RNIL ? iter.bucket : ~(Uint32)0);
   }
 
-  if (m_last_lcp_entry_i == RNIL && !m_lcp_outstanding)
+  if (m_lcp_curr_bucket == ~(Uint32)0  && !m_lcp_outstanding)
   {
     if (m_last_lcp == m_last_lcp_complete)
     {
@@ -756,6 +524,7 @@
     }
     m_last_lcp_complete = m_last_lcp;
     m_lcp_on = false;
+    m_lcp_curr_bucket = ~(Uint32)0;
   }
 }
 
@@ -794,21 +563,20 @@
 {
   int max_count = 32;
 
-  Page_list& pl_main = m_page_list[0];
-  Page_list& pl_not_bound = m_page_list[Page_entry::NOT_BOUND];
-  Page_list& pl_clean_unused = m_page_list[Page_entry::CLEAN_UNUSED];
+  Page_sublist& pl_not_bound = *m_page_sublist[Page_entry::NOT_BOUND];
+  Page_sublist& pl_clean_unused = *m_page_sublist[Page_entry::CLEAN_UNUSED];
 
-  while (pl_not_bound.m_count != 0 && --max_count >= 0)
+  while (! pl_not_bound.isEmpty() && --max_count >= 0)
   {
     Ptr<Page_entry> ptr;
-    m_page_entry_pool.getPtr(ptr, pl_not_bound.m_head_entry_i);
+    pl_not_bound.first(ptr);
 
-    if (pl_main.m_count - pl_not_bound.m_count >= m_stats.m_max_pages)
+    if (m_page_hashlist.count() - pl_not_bound.count() >= m_stats.m_max_pages)
     {
-      if (pl_clean_unused.m_count != 0)
+      if (! pl_clean_unused.isEmpty())
       {
         Ptr<Page_entry> tmp;
-        m_page_entry_pool.getPtr(tmp, pl_clean_unused.m_head_entry_i);
+        pl_clean_unused.first(tmp);
         release_page_entry(tmp);
       }
       else
@@ -830,7 +598,7 @@
 
     set_page_state(ptr, ptr.p->m_state | Page_entry::BOUND);
 
-    if(ptr.p->m_state & Page_entry::MAPPED)
+    if (ptr.p->m_state & Page_entry::MAPPED)
     {
       ndbassert(ptr.p->m_state & Page_entry::NEW);
       process_req_done(signal, ptr);
@@ -845,12 +613,12 @@
   if (max_count > 0)
     max_count = max_count / 2 + 1;
 
-  Page_list& pl_not_mapped = m_page_list[Page_entry::NOT_MAPPED];
+  Page_sublist& pl_not_mapped = *m_page_sublist[Page_entry::NOT_MAPPED];
 
-  while (pl_not_mapped.m_count != 0 && --max_count >= 0)
+  while (! pl_not_mapped.isEmpty() && --max_count >= 0)
   {
     Ptr<Page_entry> ptr;
-    m_page_entry_pool.getPtr(ptr, pl_not_mapped.m_head_entry_i);
+    pl_not_mapped.first(ptr);
 
 #ifdef VM_TRACE
   debugOut << "PGMAN: " << ptr << " : process_not_mapped" <<
endl;
@@ -863,27 +631,29 @@
 void
 Pgman::process_dirty_unused(Signal* signal)
 {
-  // TODO i/o rate dependent on LCP freq, free buffers
   int max_dirty_pct = 80; // in percent
   int max_count = m_stats.m_max_io_waits - m_stats.m_current_io_waits;
-  Page_list& pl_dirty_unused = m_page_list[Page_entry::DIRTY_UNUSED];
+  Page_sublist& pl_dirty_unused = *m_page_sublist[Page_entry::DIRTY_UNUSED];
+  Page_sublist& pl_not_bound = *m_page_sublist[Page_entry::NOT_BOUND];
 
   if (max_count > 0)
   {
     max_count = max_count / 2 + 1;
-    int max_dirty_pages = (m_stats.m_max_pages -
m_page_list[Page_entry::NOT_BOUND].m_count) * m_stats.m_max_dirty_pct;
-    int dirty_pages = pl_dirty_unused.m_count * 100;
-    if(dirty_pages <= max_dirty_pages)
+    int max_dirty_pages =
+      (m_stats.m_max_pages - pl_not_bound.count()) *
+      m_stats.m_max_dirty_pct;
+    int dirty_pages = pl_dirty_unused.count() * 100;
+    if (dirty_pages <= max_dirty_pages)
       return;
 
-    if(max_count > (dirty_pages - max_dirty_pages))
+    if (max_count > (dirty_pages - max_dirty_pages))
       max_count = dirty_pages - max_dirty_pages;
   }
   
-  while (pl_dirty_unused.m_count != 0 && --max_count >= 0)
+  while (! pl_dirty_unused.isEmpty() && --max_count >= 0)
   {
     Ptr<Page_entry> ptr;
-    m_page_entry_pool.getPtr(ptr, pl_dirty_unused.m_head_entry_i);
+    pl_dirty_unused.first(ptr);
     
 #ifdef VM_TRACE
     debugOut << "PGMAN: " << ptr << " : process_dirty_unused" <<
endl;
@@ -950,12 +720,12 @@
 {
   int max_count = 8;
 
-  Page_list& pl_req_done = m_page_list[Page_entry::REQ_DONE];
+  Page_sublist& pl_req_done = *m_page_sublist[Page_entry::REQ_DONE];
 
-  while (pl_req_done.m_count != 0 && max_count >= 0)
+  while (! pl_req_done.isEmpty() && max_count >= 0)
   {
     Ptr<Page_entry> ptr;
-    m_page_entry_pool.getPtr(ptr, pl_req_done.m_head_entry_i);
+    pl_req_done.first(ptr);
 
 #ifdef VM_TRACE
     debugOut << "PGMAN: " << ptr << " : process_req_done" <<
endl;
@@ -1555,34 +1325,36 @@
 {
   jamEntry();
 #ifdef VM_TRACE
-  if (signal->theData[0] == 11000)
+  if (signal->theData[0] == 11000 && signal->getLength() == 2)
   {
-    FILE* f = globalSignalLoggers.getOutputStream();
-    debugOut = *new NdbOut(*new FileOutputStream(f));
+    if (signal->theData[1])
+    {
+      FILE* f = globalSignalLoggers.getOutputStream();
+      debugOut = *new NdbOut(*new FileOutputStream(f));
+    }
+    else
+      debugOut = *new NdbOut(*new NullOutputStream());
   }
 #endif
 
   if (signal->theData[0] == 11001)
   {
-    Uint32 list= 0;
+    // XXX print hash list if no sublist
+    Uint32 list = 0;
     if (signal->getLength() > 1)
-      list= signal->theData[1];
+      list = signal->theData[1];
 
-    Uint32 i = m_page_list[list].m_head_entry_i;
+    Page_sublist& pl = *m_page_sublist[list];
+    Ptr<Page_entry> ptr;
     
-    while (i != RNIL)
+    for (pl.first(ptr); ptr.i != RNIL; pl.next(ptr))
     {
-      Ptr<Page_entry> ptr;
-      m_page_entry_pool.getPtr(ptr, i);
-      
       ndbout << ptr << endl;
       infoEvent(" PE [ file: %d page: %d ] state: %x lsn: %lld lcp: %d busy: %d req-list:
%d",
 		ptr.p->m_file_no, ptr.p->m_page_no,
 		ptr.p->m_state, ptr.p->m_lsn, ptr.p->m_last_lcp,
 		ptr.p->m_busy_count,
 		!ptr.p->m_requests.isEmpty());
-      
-      i = ptr.p->m_next_entry_i[list];
     }
   }
 
@@ -1593,11 +1365,21 @@
     key.m_page_no = signal->theData[2];
     
     Ptr<Page_entry> ptr;
-    if (m_page_entry_hash.find(ptr, key))
+    if (m_page_hashlist.find(ptr, key))
     {
       ndbout << "pageout " << ptr << endl;
       pageout(signal, ptr);
     }
   }
-}
 
+  if (signal->theData[0] == 11003)
+  {
+    Page_sublist& pl_clean_unused = *m_page_sublist[Page_entry::CLEAN_UNUSED];
+    while (! pl_clean_unused.isEmpty())
+    {
+      Ptr<Page_entry> ptr;
+      pl_clean_unused.first(ptr);
+      release_page_entry(ptr);
+    }
+  }
+}

--- 1.30/storage/ndb/src/kernel/blocks/pgman.hpp	2005-08-26 13:26:48 +02:00
+++ 1.31/storage/ndb/src/kernel/blocks/pgman.hpp	2005-09-27 14:40:24 +02:00
@@ -19,8 +19,8 @@
 
 #include <SimulatedBlock.hpp>
 
-#include <SLList.hpp>
-#include <DLFifoList.hpp>
+#include <DLCHashTable.hpp>
+#include <DLCFifoList.hpp>
 #include <NodeBitmask.hpp>
 #include <signaldata/LCP.hpp>
 #include "lgman.hpp"
@@ -57,15 +57,21 @@
  * A local check point (LCP) performs complete pageout of dirty pages
  * since given LSN.  It needs a stable entry list to iterate over.
  *
- * Page entries are put on ordered lists accordingly:
+ * Page replacement uses the LIRS algorithm.  This adds two lists:
+ * a "stack" and a "queue" (a subset of the stack for fast access).
+ * A central idea is to keep even old un-bound page entries around to
+ * see if they are accessed again.
  *
- * The "main" list contains all entries.  It is ordered by its unique
- * key (file_no, page_no).  It is used to look up page entries.  LCP
- * iterates over it to cover all candidate pages.
- *
- * Each entry also belongs to exactly one "sublist".  These drive page
- * processing.  The sublist is determined by page "state".  The state
- * contains page properties such as discussed above.
+ * Page entries are put on lists accordingly:
+ *
+ * - Hash table on (file_no, page_no) is used for fast lookup and for
+ *   LCP to iterate over.
+ *
+ * - Page entries are divided into disjoint "sublists" determined by
+ *   page state.  These lists drive page processing.
+ *
+ * - The LIRS stack and queue.  These control the exact details of page
+ *   processing.
  *
  * CLIENT TSMAN
  * [ todo ]
@@ -120,7 +126,24 @@
     Uint32 prevList;
   };
 
-  struct Page_entry {
+  struct Page_entry_sublist_ptr {
+    Uint32 nextList;
+    Uint32 prevList;
+  };
+
+  struct Page_entry_stack_ptr {
+    Uint32 nextList;
+    Uint32 prevList;
+  };
+
+  struct Page_entry_queue_ptr {
+    Uint32 nextList;
+    Uint32 prevList;
+  };
+
+  struct Page_entry : Page_entry_sublist_ptr,
+                      Page_entry_stack_ptr,
+                      Page_entry_queue_ptr {
     Page_entry() {}
     Page_entry(Uint32 file_no, Uint32 page_no);
 
@@ -142,17 +165,16 @@
     };
     
     enum List {
-      NO_SUBLIST = 0            // reserved for main list
-      ,NOT_BOUND = 1
-      ,NOT_MAPPED = 2
-      ,CLEAN_UNUSED = 3
-      ,DIRTY_UNUSED = 4
-      ,ANY_USED = 5
-      ,WAIT_IO = 6
-      ,REQ_DONE = 7
-      ,IS_BUSY = 8
-      ,IS_LOCKED = 9
-      ,LIST_COUNT = 10
+      NOT_BOUND = 0
+      ,NOT_MAPPED = 1
+      ,CLEAN_UNUSED = 2
+      ,DIRTY_UNUSED = 3
+      ,ANY_USED = 4
+      ,WAIT_IO = 5
+      ,REQ_DONE = 6
+      ,IS_BUSY = 7
+      ,IS_LOCKED = 8
+      ,SUBLIST_COUNT = 9
     };
 
     Uint16 m_state;             // flags (0 if not yet on any sublist)
@@ -176,18 +198,6 @@
     Uint32 m_prev_entry_i[2];
     Uint32 nextHash, prevHash;
 
-    // comparisons - not implemented if LILO (simple queue)
-    static int m_cmp_main(const Page_entry&, const Page_entry&);
-    static int m_cmp_not_bound(const Page_entry&, const Page_entry&);
-    static int m_cmp_not_mapped(const Page_entry&, const Page_entry&);
-    static int m_cmp_clean_unused(const Page_entry&, const Page_entry&);
-    static int m_cmp_dirty_unused(const Page_entry&, const Page_entry&);
-    static int m_cmp_any_used(const Page_entry&, const Page_entry&);
-    static int m_cmp_wait_io(const Page_entry&, const Page_entry&);
-    static int m_cmp_req_done(const Page_entry&, const Page_entry&);
-    static int m_cmp_is_busy(const Page_entry&, const Page_entry&);
-    static int m_cmp_is_locked(const Page_entry&, const Page_entry&);
-
     static const char* get_sublist_name(Uint32 list_no);
 
     // compute sublist from state
@@ -200,30 +210,21 @@
     }
   };
 
-  // TODO use a real ordered list when available
-  struct Page_list {
-    Page_list();
-    Uint32 m_count;
-
-    Uint32 m_off;               // 0-main 1-sublist
-    Uint32 m_head_entry_i;
-    Uint32 m_tail_entry_i;
-
-    int (*m_cmp)(const Page_entry&, const Page_entry&);
-  };
-
-  // TODO ordered list iterator
-  typedef Uint32 Page_entry_iterator;
+  typedef DLCHashTable<Page_entry> Page_hashlist;
+  typedef DLCFifoList<Page_entry, Page_entry_sublist_ptr> Page_sublist;
+  typedef DLCFifoList<Page_entry, Page_entry_stack_ptr> Page_stack;
+  typedef DLCFifoList<Page_entry, Page_entry_queue_ptr> Page_queue;
 
   class Dbtup *c_tup;
   Logfile_client m_lgman;
 
-  bool m_lcp_on, m_lcp_copy_page_free;
+  bool m_lcp_on;
   Uint32 m_last_lcp;
   Uint32 m_last_lcp_complete;
-  Uint32 m_last_lcp_entry_i;
+  Uint32 m_lcp_curr_bucket;
+  Uint32 m_lcp_outstanding;     // remaining i/o waits
   Uint32 m_lcp_copy_page;
-  Uint32 m_lcp_outstanding;
+  bool m_lcp_copy_page_free;
   EndLcpReq m_end_lcp_req;
 
   typedef DataBuffer<15> File_map;
@@ -232,9 +233,10 @@
   ArrayPool<Page_request> m_page_request_pool;
   File_map::DataBufferPool m_data_buffer_pool;
 
-  // 0-main list
-  DLHashTable<Page_entry> m_page_entry_hash;
-  Page_list m_page_list[Page_entry::LIST_COUNT];
+  Page_hashlist m_page_hashlist;
+  Page_sublist* m_page_sublist[Page_entry::SUBLIST_COUNT];
+  Page_stack m_page_stack;
+  Page_queue m_page_queue;
 
   struct Stats {
     Stats();
@@ -261,10 +263,6 @@
   void execFSWRITEREF(Signal*);
 
 private:
-  void init_page_lists();
-  bool page_list_find(Page_list& pl, Ptr<Page_entry>&, const
Page_entry& key);
-  void page_list_insert(Page_list& pl, Ptr<Page_entry>);
-  void page_list_remove(Page_list& pl, Ptr<Page_entry>);
   void set_page_state(Ptr<Page_entry> ptr, Uint16 new_state);
 #ifdef VM_TRACE
   void verify_page_lists();
--- New file ---
+++ storage/ndb/src/kernel/vm/DLCFifoList.hpp	05/09/27 14:37:36
/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#ifndef DLC_FIFOLIST_HPP
#define DLC_FIFOLIST_HPP

#include "DLFifoList.hpp"
#include <NdbOut.hpp>

// Adds "count" to DLFifoList
template <class T, class U = T>
class DLCFifoList : public DLFifoList<T, U> {
public:
  // List head
  struct Head : public DLFifoList<T, U>::Head {
    Head() : m_count(0) {}
    Uint32 m_count;
  };
  
  // Ctor
  DLCFifoList(ArrayPool<T> & thePool) :
    DLFifoList<T, U>(thePool)
  {}

  // Get count
  Uint32 count() const { return head.m_count; }

  // Redefine methods which do add or remove
  
  bool seize(Ptr<T>& ptr) {
    if (DLFifoList<T, U>::seize(ptr)) {
      head.m_count++;
      return true;
    }
    return false;
  }

  bool seizeId(Ptr<T>& ptr, Uint32 i) {
    if (DLFifoList<T, U>::seizeId(ptr)) {
      head.m_count++;
      return true;
    }
    return false;
  }
  
  void add(Ptr<T>& ptr) {
    DLFifoList<T, U>::add(ptr);
    head.m_count++;
  }

  void remove(Ptr<T>& ptr) {
    DLFifoList<T, U>::remove(ptr);
    head.m_count--;
  }

  void release(Uint32 i) {
    DLFifoList<T, U>::release(i);
    head.m_count--;
  }
  
  void release(Ptr<T>& ptr) {
    DLFifoList<T, U>::release(ptr);
    head.m_count--;
  }

  void release() {
    DLFifoList<T, U>::release();
    head.m_count = 0;
  }

  DLCFifoList<T>& operator=(const DLCFifoList<T>& src){
    assert(&thePool == &src.thePool);
    this->head = src.head;
    return * this;
  }

protected:
  Head head;
};

// Local variant
template <class T, class U = T>
class LocalDLCFifoList : public DLCFifoList<T, U> {
public:
  LocalDLCFifoList(ArrayPool<T> & thePool,
                   typename DLCFifoList<T, U>::Head &_src)
    : DLCFifoList<T, U>(thePool), src(_src)
  {
    this->head = src;
#ifdef VM_TRACE
    assert(src.in_use == false);
    src.in_use = true;
#endif
  }
  
  ~LocalDLCFifoList() {
#ifdef VM_TRACE
    assert(src.in_use == true);
#endif
    src = this->head;
  }
private:
  typename DLCFifoList<T, U>::Head & src;
};

#endif

--- New file ---
+++ storage/ndb/src/kernel/vm/DLCHashTable.hpp	05/09/27 14:37:37
/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#ifndef DLC_HASHTABLE_HPP
#define DLC_HASHTABLE_HPP

#include <ndb_global.h>
#include "DLHashTable.hpp"

// Adds "count" to DLHashTable
template <class T, class U = T>
class DLCHashTable : public DLHashTable<T, U> {
public:
  // Ctor
  DLCHashTable(ArrayPool<T> & thePool) :
    DLHashTable<T, U>(thePool),
    m_count(0)
  {}
  
  // Get count
  Uint32 count() const { return m_count; }

  // Redefine methods which do add or remove

  void add(Ptr<T>& ptr) {
    DLHashTable<T, U>::add(ptr);
    m_count++;
  }
  
  void remove(Ptr<T>& ptr, const T & key) {
    DLHashTable<T, U>::remove(ptr, key);
    m_count--;
  }

  void remove(Uint32 i) {
    DLHashTable<T, U>::remove(i);
    m_count--;
  }

  void remove(Ptr<T>& ptr) {
    DLHashTable<T, U>::remove(ptr);
    m_count--;
  }

  void removeAll() {
    DLHashTable<T, U>::removeAll(ptr);
    m_count = 0;
  }
  
  void release(Ptr<T>& ptr, const T & key) {
    DLHashTable<T, U>::release(ptr, key);
    m_count--;
  }

  void release(Uint32 i) {
    DLHashTable<T, U>::release(i);
    m_count--;
  }

  void release(Ptr<T>& ptr) {
    DLHashTable<T, U>::release(ptr);
    m_count--;
  }
  
private:
  Uint32 m_count;
};

#endif

Thread
bk commit into 5.1 tree (pekka:1.2022)pekka27 Sep