Below is the list of changes that have just been committed into a local
5.1 repository of pekka. When pekka does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2050 05/10/17 12:42:30 pekka@stripped +4 -0
ndb - wl#2718 PGMAN optim: LIRS and shortcuts (5.1-dd)
storage/ndb/src/kernel/vm/DLFifoList.hpp
1.12 05/10/17 12:40:39 pekka@stripped +15 -1
wl#2718 - LIRS and shortcuts
storage/ndb/src/kernel/blocks/pgman.hpp
1.35 05/10/17 12:40:39 pekka@stripped +296 -96
wl#2718 - LIRS and shortcuts
storage/ndb/src/kernel/blocks/pgman.cpp
1.42 05/10/17 12:40:39 pekka@stripped +1307 -521
wl#2718 - LIRS and shortcuts
storage/ndb/include/kernel/signaldata/PgmanContinueB.hpp
1.6 05/10/17 12:40:39 pekka@stripped +4 -2
wl#2718 - LIRS and shortcuts
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: pekka
# Host: orca.ndb.mysql.com
# Root: /space/pekka/ndb/version/my51-dd
--- 1.5/storage/ndb/include/kernel/signaldata/PgmanContinueB.hpp 2005-08-08 01:23:31
+02:00
+++ 1.6/storage/ndb/include/kernel/signaldata/PgmanContinueB.hpp 2005-10-17 12:40:39
+02:00
@@ -26,8 +26,10 @@
friend class Pgman;
private:
enum {
- PROCESS_ALL = 0,
- PROCESS_REQ_DONE = 1
+ STATS_LOOP = 0,
+ BUSY_LOOP = 1,
+ CLEANUP_LOOP = 2,
+ LCP_LOOP = 3
};
};
--- 1.41/storage/ndb/src/kernel/blocks/pgman.cpp 2005-10-10 16:48:22 +02:00
+++ 1.42/storage/ndb/src/kernel/blocks/pgman.cpp 2005-10-17 12:40:39 +02:00
@@ -32,6 +32,14 @@
Page_request::DIRTY_REQ | \
Page_request::ALLOC_REQ)
+// todo use this
+#ifdef VM_TRACE
+#define dbg(x) \
+ do { if (! debugFlag) break; debugOut << "PGMAN: " << x << endl; }
while (0)
+#else
+#define dbg(x)
+#endif
+
Pgman::Pgman(const Configuration & conf) :
SimulatedBlock(PGMAN, conf),
m_file_map(m_data_buffer_pool),
@@ -40,6 +48,7 @@
m_page_queue(m_page_entry_pool)
#ifdef VM_TRACE
,debugOut(* new NullOutputStream())
+ ,debugFlag(false)
#endif
{
BLOCK_CONSTRUCTOR(Pgman);
@@ -57,22 +66,31 @@
addRecSignal(GSN_LCP_FRAG_ORD, &Pgman::execLCP_FRAG_ORD);
addRecSignal(GSN_END_LCP_REQ, &Pgman::execEND_LCP_REQ);
- // should be larger than number of pool pages
- m_page_entry_pool.setSize(2000);
- m_page_request_pool.setSize(10000);
- m_data_buffer_pool.setSize(1);
- m_page_hashlist.setSize(512);
-
- for (Uint32 k = 0; k < Page_entry::SUBLIST_COUNT; k++)
- m_page_sublist[k] = new Page_sublist(m_page_entry_pool);
+ // loop status
+ m_stats_loop_on = false;
+ m_busy_loop_on = false;
+ m_cleanup_loop_on = false;
+ m_lcp_loop_on = false;
- m_lcp_on = false;
+ // LCP variables
m_last_lcp = 0;
m_last_lcp_complete = 0;
m_lcp_curr_bucket = ~(Uint32)0;
m_lcp_outstanding = 0;
m_lcp_copy_page = RNIL;
m_lcp_copy_page_free = false;
+
+ // clean-up variables
+ m_cleanup_ptr.i = RNIL;
+
+ // should be a factor larger than number of pool pages
+ m_page_entry_pool.setSize(2000);
+ m_page_request_pool.setSize(10000);
+ m_data_buffer_pool.setSize(1);
+ m_page_hashlist.setSize(512);
+
+ for (Uint32 k = 0; k < Page_entry::SUBLIST_COUNT; k++)
+ m_page_sublist[k] = new Page_sublist(m_page_entry_pool);
}
Pgman::~Pgman()
@@ -104,13 +122,22 @@
ReadConfigConf::SignalLength, JBB);
}
-Pgman::Stats::Stats() :
+Pgman::Param::Param() :
m_max_pages(64), // smallish for testing
+ m_max_hot_pages(56),
+ m_max_loop_count(256),
+ m_max_io_waits(64),
+ m_stats_loop_delay(1000),
+ m_cleanup_loop_delay(200),
+ m_lcp_loop_delay(200)
+{
+}
+
+Pgman::Stats::Stats() :
+ m_num_pages(0),
m_page_hits(0),
m_page_faults(0),
- m_max_io_waits(64),
- m_current_io_waits(0),
- m_max_dirty_pct(50)
+ m_current_io_waits(0)
{
}
@@ -123,8 +150,11 @@
switch (startPhase) {
case 1:
- new (&m_lgman) Logfile_client(this, (Lgman*)globalData.getBlock(LGMAN), 0);
- c_tup = (Dbtup*)globalData.getBlock(DBTUP);
+ {
+ Lgman* lgman = (Lgman*)globalData.getBlock(LGMAN);
+ new (&m_lgman) Logfile_client(this, lgman, 0);
+ c_tup = (Dbtup*)globalData.getBlock(DBTUP);
+ }
break;
case 3:
{
@@ -132,7 +162,11 @@
ndbrequire(m_global_page_pool.seize(page_ptr));
m_lcp_copy_page = page_ptr.i;
m_lcp_copy_page_free = true;
- process_all(signal, true);
+ // start forever loops
+ do_stats_loop(signal);
+ do_cleanup_loop(signal);
+ m_stats_loop_on = true;
+ m_cleanup_loop_on = true;
}
break;
case 7:
@@ -162,14 +196,21 @@
Uint32 data1 = signal->theData[1];
switch (signal->theData[0]) {
- case PgmanContinueB::PROCESS_ALL:
- process_all(signal, true);
+ case PgmanContinueB::STATS_LOOP:
+ jam();
+ do_stats_loop(signal);
break;
- case PgmanContinueB::PROCESS_REQ_DONE:{
- Ptr<Page_entry> ptr;
- m_page_entry_pool.getPtr(ptr, signal->theData[1]);
- process_req_done(signal, ptr);
- }
+ case PgmanContinueB::BUSY_LOOP:
+ jam();
+ do_busy_loop(signal);
+ break;
+ case PgmanContinueB::CLEANUP_LOOP:
+ jam();
+ do_cleanup_loop(signal);
+ break;
+ case PgmanContinueB::LCP_LOOP:
+ jam();
+ do_lcp_loop(signal);
break;
default:
ndbrequire(false);
@@ -189,79 +230,46 @@
m_busy_count(0),
m_requests()
{
- m_next_entry_i[0] = m_next_entry_i[1] = RNIL;
- m_prev_entry_i[0] = m_prev_entry_i[1] = RNIL;
}
// page lists
-const char*
-Pgman::Page_entry::get_sublist_name(Uint32 list_no)
-{
- switch (list_no) {
- case NOT_BOUND:
- return "not_bound";
- case NOT_MAPPED:
- return "not_mapped";
- case CLEAN_UNUSED:
- return "clean_unused";
- case DIRTY_UNUSED:
- return "dirty_unused";
- case ANY_USED:
- return "any_used";
- case WAIT_IO:
- return "wait_io";
- case REQ_DONE:
- return "req_done";
- case IS_BUSY:
- return "is_busy";
- case IS_LOCKED:
- return "is_locked";
- }
- return "?";
-}
-
Uint32
-Pgman::Page_entry::get_sublist_no(Uint16 state)
+Pgman::get_sublist_no(Uint16 state)
{
- // TODO check state is valid
if (state == 0)
{
return ZNIL;
}
- if (! (state & BOUND))
- {
- return NOT_BOUND;
- }
- if (state & (PAGEIN | PAGEOUT))
- {
- return WAIT_IO;
- }
- if (! (state & MAPPED))
- {
- return NOT_MAPPED;
- }
- if (state & READREQ)
- {
- return REQ_DONE;
- }
- if (state & BUSY)
+ if (state & Page_entry::REQUEST)
{
- return IS_BUSY;
- }
- if (state & LOCKED)
- {
- return IS_LOCKED;
+ if (! (state & Page_entry::BOUND))
+ {
+ return Page_entry::SL_BIND;
+ }
+ if (! (state & Page_entry::MAPPED))
+ {
+ if (! (state & Page_entry::PAGEIN))
+ {
+ return Page_entry::SL_MAP;
+ }
+ return Page_entry::SL_MAP_IO;
+ }
+ if (! (state & Page_entry::PAGEOUT))
+ {
+ return Page_entry::SL_CALLBACK;
+ }
+ return Page_entry::SL_CALLBACK_IO;
}
- if (state & USED)
+ if (state & Page_entry::BUSY)
{
- return ANY_USED;
+ return Page_entry::SL_BUSY;
}
- if (state & DIRTY)
+ if (state & Page_entry::LOCKED)
{
- return DIRTY_UNUSED;
+ return Page_entry::SL_LOCKED;
}
- return CLEAN_UNUSED;
+ return Page_entry::SL_OTHER;
}
void
@@ -269,14 +277,14 @@
{
#ifdef VM_TRACE
debugOut << "PGMAN: >set_page_state: state=" << hex << new_state
<< endl;
- debugOut << "PGMAN: " << ptr << endl;
+ debugOut << "PGMAN: " << ptr << ": before" << endl;
#endif
Uint16 old_state = ptr.p->m_state;
if (old_state != new_state)
{
- Uint32 old_list_no = Page_entry::get_sublist_no(old_state);
- Uint32 new_list_no = Page_entry::get_sublist_no(new_state);
+ Uint32 old_list_no = get_sublist_no(old_state);
+ Uint32 new_list_no = get_sublist_no(new_state);
if (old_state != 0)
{
ndbrequire(old_list_no != ZNIL);
@@ -299,65 +307,70 @@
}
#ifdef VM_TRACE
- debugOut << "PGMAN: " << ptr << endl;
+ debugOut << "PGMAN: " << ptr << ": after" << endl;
debugOut << "PGMAN: <set_page_state" << endl;
#endif
}
-// verify and print totals
+// seize/release pages and entries
-#ifdef VM_TRACE
-
-void
-Pgman::verify_page_lists()
+bool
+Pgman::seize_cache_page(Ptr<GlobalPage>& gptr)
{
- Uint32 k;
- Uint32 tot_count = 0;
-
- for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
- {
- const Page_sublist& pl = *m_page_sublist[k];
+ // page cache has no own pool yet
+ bool ok = m_global_page_pool.seize(gptr);
- Uint32 count = 0;
- Ptr<Page_entry> ptr;
- // for possible future use to verify order
- Ptr<Page_entry> prev_ptr;
- prev_ptr.i = RNIL;
+ // zero is reserved as return value for queued request
+ if (ok && gptr.i == 0)
+ ok = m_global_page_pool.seize(gptr);
- for (pl.first(ptr); ptr.i != RNIL; pl.next(ptr))
- {
- ndbrequire(Page_entry::get_sublist_no(ptr.p->m_state) == k);
- tot_count++;
- prev_ptr = ptr;
- }
+ if (ok)
+ {
+ ndbrequire(m_stats.m_num_pages < m_param.m_max_pages);
+ m_stats.m_num_pages++;
}
+ return ok;
+}
- ndbrequire(tot_count == m_page_hashlist.count());
+void
+Pgman::release_cache_page(Uint32 i)
+{
+ m_global_page_pool.release(i);
- debugOut << "PGMAN: tot:" << tot_count;
- for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
- {
- const Page_sublist& pl = *m_page_sublist[k];
- debugOut << " " << Page_entry::get_sublist_name(k) << ":"
- << pl.count();
- }
- debugOut << endl;
+ ndbrequire(m_stats.m_num_pages != 0);
+ m_stats.m_num_pages--;
}
+bool
+Pgman::find_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no)
+{
+ Page_entry key;
+ key.m_file_no = file_no;
+ key.m_page_no = page_no;
+
+ if (m_page_hashlist.find(ptr, key))
+ {
+#ifdef VM_TRACE
+ debugOut << "PGMAN: find_page_entry" << endl;
+ debugOut << "PGMAN: " << ptr << endl;
#endif
-
-// seize/release page entry
+ return true;
+ }
+ return false;
+}
Uint32
Pgman::seize_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no)
{
- if (m_page_entry_pool.seize(ptr)) {
+ if (m_page_entry_pool.seize(ptr))
+ {
new (ptr.p) Page_entry(file_no, page_no);
m_page_hashlist.add(ptr);
#ifdef VM_TRACE
- debugOut << "PGMAN: seize_page_entry" << endl;
- debugOut << "PGMAN: " << ptr << endl;
+ ptr.p->m_this = this;
+ debugOut << "PGMAN: seize_page_entry" << endl;
+ debugOut << "PGMAN: " << ptr << endl;
#endif
return true;
@@ -366,26 +379,22 @@
}
bool
-Pgman::find_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no)
-{
- Page_entry key;
- key.m_file_no = file_no;
- key.m_page_no = page_no;
-
- return m_page_hashlist.find(ptr, key);
-}
-
-bool
Pgman::get_page_entry(Ptr<Page_entry>& ptr, Uint32 file_no, Uint32 page_no)
{
- if (find_page_entry(ptr, file_no, page_no)) {
+ if (find_page_entry(ptr, file_no, page_no))
+ {
+ ndbrequire(ptr.p->m_state != 0);
m_stats.m_page_hits++;
return true;
}
- if (seize_page_entry(ptr, file_no, page_no)) {
+
+ if (seize_page_entry(ptr, file_no, page_no))
+ {
+ ndbrequire(ptr.p->m_state == 0);
m_stats.m_page_faults++;
return true;
}
+
return false;
}
@@ -396,405 +405,802 @@
debugOut << "PGMAN: release_page_entry" << endl;
debugOut << "PGMAN: " << ptr << endl;
#endif
+ Uint16 state = ptr.p->m_state;
+
+ ndbrequire(! (state & Page_entry::REQUEST));
+ ndbrequire(ptr.p->m_requests.isEmpty());
+
+ ndbrequire(! (state & Page_entry::ONSTACK));
+ ndbrequire(! (state & Page_entry::ONQUEUE));
+ ndbrequire(ptr.p->m_real_page_i == RNIL);
- if (ptr.p->m_real_page_i != RNIL)
- {
- Local_key key;
- key.m_page_no = ptr.p->m_page_no;
- key.m_file_no = ptr.p->m_file_no;
- c_tup->disk_page_unmap_callback(ptr.p->m_real_page_i);
-
- m_global_page_pool.release(ptr.p->m_real_page_i);
- ptr.p->m_real_page_i = RNIL;
- }
- // remove from sublist
set_page_state(ptr, 0);
m_page_hashlist.remove(ptr);
m_page_entry_pool.release(ptr);
}
-// LCP
+// LIRS
+/*
+ * After the hot entry at stack bottom is removed, additional entries
+ * are removed until next hot entry is found. There are 3 cases for the
+ * removed entry: 1) a bound entry is already on queue 2) an unbound
+ * entry with open requests enters queue at bind time 3) an unbound
+ * entry without requests is returned to entry pool.
+ */
void
-Pgman::execLCP_FRAG_ORD(Signal* signal)
+Pgman::lirs_stack_prune()
{
- LcpFragOrd* ord = (LcpFragOrd*)signal->getDataPtr();
- ndbrequire(ord->lcpId >= m_last_lcp_complete + 1 || m_last_lcp_complete == 0);
- m_last_lcp = ord->lcpId;
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >lirs_stack_prune" << endl;
+#endif
+ Page_stack& pl_stack = m_page_stack;
+ Page_queue& pl_queue = m_page_queue;
+ Ptr<Page_entry> ptr;
- m_lcp_on = true;
- ndbrequire(!m_lcp_outstanding);
- ndbrequire(m_lcp_copy_page_free);
- m_lcp_curr_bucket = 0;
+ while (pl_stack.first(ptr)) // first is stack bottom
+ {
+ Uint16 state = ptr.p->m_state;
+ if (state & Page_entry::HOT)
+ {
+ jam();
+ break;
+ }
#ifdef VM_TRACE
- debugOut
- << "PGMAN: execLCP_FRAG_ORD"
- << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
- << " bucket=" << m_lcp_curr_bucket << endl;
+ debugOut << "PGMAN: " << ptr << ": prune from stack" << endl;
+#endif
+
+ pl_stack.remove(ptr);
+ state &= ~ Page_entry::ONSTACK;
+ set_page_state(ptr, state);
+
+ if (state & Page_entry::BOUND)
+ {
+ jam();
+ ndbrequire(state & Page_entry::ONQUEUE);
+ }
+ else if (state & Page_entry::REQUEST)
+ {
+ // enters queue at bind
+ jam();
+ ndbrequire(! (state & Page_entry::ONQUEUE));
+ }
+ else
+ {
+ jam();
+ release_page_entry(ptr);
+ }
+ }
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <lirs_stack_prune" << endl;
#endif
}
+/*
+ * Remove the hot entry at stack bottom and make it cold and do stack
+ * pruning. There are 2 cases for the removed entry: 1) a bound entry
+ * is moved to queue 2) an unbound entry must have requests and enters
+ * queue at bind time.
+ */
void
-Pgman::execEND_LCP_REQ(Signal* signal)
+Pgman::lirs_stack_pop()
{
- EndLcpReq* req = (EndLcpReq*)signal->getDataPtr();
- m_end_lcp_req = *req;
+#ifdef VM_TRACE
+ debugOut << "PGMAN: lirs_stack_pop" << endl;
+#endif
+ Page_stack& pl_stack = m_page_stack;
+ Page_queue& pl_queue = m_page_queue;
+
+ Ptr<Page_entry> ptr;
+ bool ok = pl_stack.first(ptr);
+ ndbrequire(ok);
+ Uint16 state = ptr.p->m_state;
#ifdef VM_TRACE
- debugOut
- << "PGMAN: execEND_LCP_REQ"
- << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
- << " bucket=" << m_lcp_curr_bucket
- << " outstanding=" << m_lcp_outstanding << endl;
+ debugOut << "PGMAN: " << ptr << ": pop from stack" << endl;
#endif
- if (m_last_lcp == m_last_lcp_complete)
+ ndbrequire(state & Page_entry::HOT);
+ ndbrequire(state & Page_entry::ONSTACK);
+ pl_stack.remove(ptr);
+ state &= ~ Page_entry::HOT;
+ state &= ~ Page_entry::ONSTACK;
+ ndbrequire(! (state & Page_entry::ONQUEUE));
+
+ if (state & Page_entry::BOUND)
{
- ndbrequire(! m_lcp_on);
- signal->theData[0] = m_end_lcp_req.senderData;
- sendSignal(m_end_lcp_req.senderRef, GSN_END_LCP_CONF, signal, 1, JBB);
+ jam();
+ pl_queue.add(ptr);
+ state |= Page_entry::ONQUEUE;
+ }
+ else
+ {
+ // enters queue at bind
+ jam();
+ ndbrequire(state & Page_entry::REQUEST);
}
- m_last_lcp_complete = m_last_lcp;
+ set_page_state(ptr, state);
+ lirs_stack_prune();
}
+/*
+ * Update LIRS lists when page is referenced.
+ */
void
-Pgman::process_lcp(Signal* signal)
+Pgman::lirs_reference(Ptr<Page_entry> ptr)
{
- if (! m_lcp_on)
- return;
-
- int max_count = m_stats.m_max_io_waits - m_stats.m_current_io_waits;
- if (max_count > 0)
- max_count = max_count / 2 + 1;
-
#ifdef VM_TRACE
- debugOut
- << "PGMAN: process_lcp"
- << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
- << " bucket=" << m_lcp_curr_bucket
- << " outstanding=" << m_lcp_outstanding << endl;
+ debugOut << "PGMAN: >lirs_reference" << endl;
+ debugOut << "PGMAN: " << ptr << endl;
#endif
+ Page_stack& pl_stack = m_page_stack;
+ Page_queue& pl_queue = m_page_queue;
- // start or re-start from beginning of current hash bucket
- if (m_lcp_curr_bucket != ~(Uint32)0) {
- Page_hashlist::Iterator iter;
- m_page_hashlist.next(m_lcp_curr_bucket, iter);
+ Uint16 state = ptr.p->m_state;
+ ndbrequire(! (state & Page_entry::LOCKED));
- while (iter.curr.i != RNIL && --max_count > 0)
+ // even non-LIRS cache pages are counted on l.h.s.
+ if (m_stats.m_num_pages >= m_param.m_max_hot_pages)
+ {
+ if (state & Page_entry::HOT)
{
- Ptr<Page_entry>& ptr = iter.curr;
- Uint16 state = ptr.p->m_state;
-
- if (ptr.p->m_last_lcp < m_last_lcp &&
- (state & Page_entry::DIRTY))
+ // case 1
+ jam();
+ ndbrequire(state & Page_entry::ONSTACK);
+ bool at_bottom = ! pl_stack.hasPrev(ptr);
+ pl_stack.remove(ptr);
+ pl_stack.add(ptr);
+ if (at_bottom)
{
- if(! (state & Page_entry::BOUND))
- {
- ndbout << ptr << endl;
- ndbrequire(false);
- }
- if (state & Page_entry::BUSY)
- {
- break; // wait for it
- }
- if (state & Page_entry::LOCKED)
- {
- /**
- * Special handling of LOCKED pages...only write 1 at a time...
- * using copy page (m_lcp_copy_page)
- */
- if (!m_lcp_copy_page_free)
- {
- break;
- }
- m_lcp_copy_page_free = false;
- Ptr<GlobalPage> src, copy;
- m_global_page_pool.getPtr(copy, m_lcp_copy_page);
- m_global_page_pool.getPtr(src, ptr.p->m_real_page_i);
- memcpy(copy.p, src.p, sizeof(GlobalPage));
- ptr.p->m_real_page_i = copy.i;
- ptr.p->m_copy_real_page_i = src.i;
- ptr.p->m_state |= Page_entry::LCP;
- pageout(signal, ptr);
- }
- else if (state & Page_entry::PAGEOUT)
- {
- set_page_state(ptr, state | Page_entry::LCP);
- }
- else
- {
- ptr.p->m_state |= Page_entry::LCP;
- pageout(signal, ptr);
- }
- ptr.p->m_last_lcp = m_last_lcp;
- m_lcp_outstanding++;
+ jam();
+ lirs_stack_prune();
+ }
+ }
+ else if (state & Page_entry::ONSTACK)
+ {
+ // case 2a 3a
+ jam();
+ pl_stack.remove(ptr);
+ if (! pl_stack.isEmpty())
+ {
+ jam();
+ lirs_stack_pop();
+ }
+ pl_stack.add(ptr);
+ state |= Page_entry::HOT;
+ if (state & Page_entry::ONQUEUE)
+ {
+ jam();
+ move_cleanup_ptr(ptr);
+ pl_queue.remove(ptr);
+ state &= ~ Page_entry::ONQUEUE;
+ }
+ }
+ else
+ {
+ // case 2b 3b
+ jam();
+ pl_stack.add(ptr);
+ state |= Page_entry::ONSTACK;
+ if (state & Page_entry::ONQUEUE)
+ {
+ jam();
+ move_cleanup_ptr(ptr);
+ pl_queue.remove(ptr);
+ }
+ if (state & Page_entry::BOUND)
+ {
+ jam();
+ pl_queue.add(ptr);
+ state |= Page_entry::ONQUEUE;
+ }
+ else
+ {
+ // enters queue at bind
+ jam();
}
- m_page_hashlist.next(iter);
}
-
- m_lcp_curr_bucket = (iter.curr.i != RNIL ? iter.bucket : ~(Uint32)0);
}
-
- if (m_lcp_curr_bucket == ~(Uint32)0 && !m_lcp_outstanding)
+ else
{
- if (m_last_lcp == m_last_lcp_complete)
+#ifdef VM_TRACE
+ debugOut << "PGMAN: filling up initial hot pages: "
+ << m_stats.m_num_pages << " of "
+ << m_param.m_max_hot_pages << endl;
+#endif
+ jam();
+ if (state & Page_entry::ONSTACK)
{
- signal->theData[0] = m_end_lcp_req.senderData;
- sendSignal(m_end_lcp_req.senderRef, GSN_END_LCP_CONF, signal, 1, JBB);
+ jam();
+ pl_stack.remove(ptr);
}
- m_last_lcp_complete = m_last_lcp;
- m_lcp_on = false;
- m_lcp_curr_bucket = ~(Uint32)0;
+ pl_stack.add(ptr);
+ state |= Page_entry::ONSTACK;
+ state |= Page_entry::HOT;
}
+
+ set_page_state(ptr, state);
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <lirs_reference" << endl;
+#endif
}
-// process LCP and page and request queues
+// continueB loops
-// TODO invoke some things directly without delay loop
void
-Pgman::process_all(Signal* signal, bool continue_b)
+Pgman::do_stats_loop(Signal* signal)
{
#ifdef VM_TRACE
- debugOut << "PGMAN: process_all" << endl;
- verify_page_lists();
+ debugOut << "PGMAN: do_stats_loop" << endl;
+ verify_all();
#endif
+ Uint32 delay = m_param.m_stats_loop_delay;
+ signal->theData[0] = PgmanContinueB::STATS_LOOP;
+ sendSignalWithDelay(PGMAN_REF, GSN_CONTINUEB, signal, delay, 1);
+}
- bool busy = false;
+void
+Pgman::do_busy_loop(Signal* signal, bool direct)
+{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >do_busy_loop on=" << m_busy_loop_on
+ << " direct=" << direct << endl;
+#endif
+ Uint32 restart = false;
+ if (direct)
+ {
+ // may not cover the calling entry
+ (void)process_bind(signal);
+ (void)process_map(signal);
+ // callback must be queued
+ if (! m_busy_loop_on)
+ {
+ restart = true;
+ m_busy_loop_on = true;
+ }
+ }
+ else
+ {
+ ndbrequire(m_busy_loop_on);
+ restart += process_bind(signal);
+ restart += process_map(signal);
+ restart += process_callback(signal);
+ if (! restart)
+ {
+ m_busy_loop_on = false;
+ }
+ }
+ if (restart)
+ {
+ signal->theData[0] = PgmanContinueB::BUSY_LOOP;
+ sendSignal(PGMAN_REF, GSN_CONTINUEB, signal, 1, JBB);
+ }
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <do_busy_loop on=" << m_busy_loop_on
+ << " restart=" << restart << endl;
+#endif
+}
- process_lcp(signal);
+void
+Pgman::do_cleanup_loop(Signal* signal)
+{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: do_cleanup_loop" << endl;
+#endif
+ process_cleanup(signal);
- process_not_bound(signal);
- process_not_mapped(signal);
- process_dirty_unused(signal);
- process_req_done(signal);
+ Uint32 delay = m_param.m_cleanup_loop_delay;
+ signal->theData[0] = PgmanContinueB::CLEANUP_LOOP;
+ sendSignalWithDelay(PGMAN_REF, GSN_CONTINUEB, signal, delay, 1);
+}
- if (continue_b)
+void
+Pgman::do_lcp_loop(Signal* signal, bool direct)
+{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >do_lcp_loop on=" << m_lcp_loop_on
+ << " direct=" << direct << endl;
+#endif
+ Uint32 restart = false;
+ if (direct)
{
- signal->theData[0] = PgmanContinueB::PROCESS_ALL;
- if (busy)
- sendSignal(PGMAN_REF, GSN_CONTINUEB, signal, 1, JBB);
- else
- sendSignalWithDelay(PGMAN_REF, GSN_CONTINUEB, signal, 50, 1);
+ ndbrequire(! m_lcp_loop_on);
+ restart = true;
+ m_lcp_loop_on = true;
+ }
+ else
+ {
+ ndbrequire(m_lcp_loop_on);
+ restart += process_lcp(signal);
+ if (! restart)
+ {
+ m_lcp_loop_on = false;
+ }
+ }
+ if (restart)
+ {
+ Uint32 delay = m_param.m_lcp_loop_delay;
+ signal->theData[0] = PgmanContinueB::LCP_LOOP;
+ sendSignalWithDelay(PGMAN_REF, GSN_CONTINUEB, signal, delay, 1);
}
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <do_lcp_loop on=" << m_lcp_loop_on
+ << " restart=" << restart << endl;
+#endif
}
-void
-Pgman::process_not_bound(Signal* signal)
+// busy loop
+
+bool
+Pgman::process_bind(Signal* signal)
{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >process_bind" << endl;
+#endif
int max_count = 32;
+ Page_sublist& pl_bind = *m_page_sublist[Page_entry::SL_BIND];
- Page_sublist& pl_not_bound = *m_page_sublist[Page_entry::NOT_BOUND];
- Page_sublist& pl_clean_unused = *m_page_sublist[Page_entry::CLEAN_UNUSED];
-
- while (! pl_not_bound.isEmpty() && --max_count >= 0)
+ while (! pl_bind.isEmpty() && --max_count >= 0)
{
+ jam();
Ptr<Page_entry> ptr;
- pl_not_bound.first(ptr);
-
- if (m_page_hashlist.count() - pl_not_bound.count() >= m_stats.m_max_pages)
+ pl_bind.first(ptr);
+ if (! process_bind(signal, ptr))
{
- if (! pl_clean_unused.isEmpty())
- {
- Ptr<Page_entry> tmp;
- pl_clean_unused.first(tmp);
- release_page_entry(tmp);
- }
- else
- {
- break;
- }
+ jam();
+ break;
}
+ }
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <process_bind" << endl;
+#endif
+ return ! pl_bind.isEmpty();
+}
+bool
+Pgman::process_bind(Signal* signal, Ptr<Page_entry> ptr)
+{
#ifdef VM_TRACE
- debugOut << "PGMAN: " << ptr << " : process_not_bound" << endl;
+ debugOut << "PGMAN: " << ptr << " : process_bind" << endl;
#endif
+ Page_sublist& pl_bind = *m_page_sublist[Page_entry::SL_BIND];
+ Page_queue& pl_queue = m_page_queue;
+ Ptr<GlobalPage> gptr;
- Ptr<GlobalPage> page_ptr;
- bool ok = m_global_page_pool.seize(page_ptr);
- if (page_ptr.i == 0)
- ok = m_global_page_pool.seize(page_ptr);
+ if (m_stats.m_num_pages < m_param.m_max_pages)
+ {
+ jam();
+ bool ok = seize_cache_page(gptr);
+ // to handle failure requires some changes in LIRS
ndbrequire(ok);
- ptr.p->m_real_page_i = page_ptr.i;
-
- set_page_state(ptr, ptr.p->m_state | Page_entry::BOUND);
-
- if (ptr.p->m_state & Page_entry::MAPPED)
+ }
+ else
+ {
+ jam();
+ Ptr<Page_entry> clean_ptr;
+ if (! pl_queue.first(clean_ptr))
+ {
+ jam();
+#ifdef VM_TRACE
+ debugOut << "PGMAN: bind failed: queue empty" << endl;
+#endif
+ // XXX busy loop
+ return false;
+ }
+ Uint16 clean_state = clean_ptr.p->m_state;
+ // under unusual circumstances it could still be paging in
+ if (! (clean_state & Page_entry::MAPPED) ||
+ clean_state & Page_entry::DIRTY ||
+ clean_state & Page_entry::REQUEST)
{
- ndbassert(ptr.p->m_state & Page_entry::NEW);
- process_req_done(signal, ptr);
+ jam();
+#ifdef VM_TRACE
+ debugOut << "PGMAN: bind failed: queue front not evictable" << endl;
+ debugOut << "PGMAN: " << clean_ptr << endl;
+#endif
+ // XXX busy loop
+ return false;
}
+
+#ifdef VM_TRACE
+ debugOut << "PGMAN: " << clean_ptr << " : evict" << endl;
+#endif
+
+ ndbrequire(clean_state & Page_entry::ONQUEUE);
+ ndbrequire(clean_state & Page_entry::BOUND);
+ ndbrequire(clean_state & Page_entry::MAPPED);
+
+ move_cleanup_ptr(clean_ptr);
+ pl_queue.remove(clean_ptr);
+ clean_state &= ~ Page_entry::ONQUEUE;
+
+ gptr.i = clean_ptr.p->m_real_page_i;
+
+ c_tup->disk_page_unmap_callback(clean_ptr.p->m_real_page_i);
+ clean_ptr.p->m_real_page_i = RNIL;
+ clean_state &= ~ Page_entry::BOUND;
+ clean_state &= ~ Page_entry::MAPPED;
+
+ set_page_state(clean_ptr, clean_state);
+
+ if (! (clean_state & Page_entry::ONSTACK))
+ release_page_entry(clean_ptr);
+
+ m_global_page_pool.getPtr(gptr);
}
-}
-void
-Pgman::process_not_mapped(Signal* signal)
-{
- int max_count = m_stats.m_max_io_waits - m_stats.m_current_io_waits;
- if (max_count > 0)
- max_count = max_count / 2 + 1;
+ Uint16 state = ptr.p->m_state;
- Page_sublist& pl_not_mapped = *m_page_sublist[Page_entry::NOT_MAPPED];
+ ptr.p->m_real_page_i = gptr.i;
+ state |= Page_entry::BOUND;
+ if (state & Page_entry::EMPTY)
+ {
+ jam();
+ state |= Page_entry::MAPPED;
+ }
- while (! pl_not_mapped.isEmpty() && --max_count >= 0)
+ if (! (state & Page_entry::LOCKED) &&
+ ! (state & Page_entry::ONQUEUE) &&
+ ! (state & Page_entry::HOT))
{
- Ptr<Page_entry> ptr;
- pl_not_mapped.first(ptr);
+ jam();
#ifdef VM_TRACE
- debugOut << "PGMAN: " << ptr << " : process_not_mapped" <<
endl;
+ debugOut << "PGMAN: " << ptr << " : add to queue at bind" <<
endl;
#endif
- pagein(signal, ptr);
+ pl_queue.add(ptr);
+ state |= Page_entry::ONQUEUE;
}
+
+ set_page_state(ptr, state);
+ return true;
}
-void
-Pgman::process_dirty_unused(Signal* signal)
+bool
+Pgman::process_map(Signal* signal)
{
- int max_dirty_pct = 80; // in percent
- int max_count = m_stats.m_max_io_waits - m_stats.m_current_io_waits;
- Page_sublist& pl_dirty_unused = *m_page_sublist[Page_entry::DIRTY_UNUSED];
- Page_sublist& pl_not_bound = *m_page_sublist[Page_entry::NOT_BOUND];
-
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >process_map" << endl;
+#endif
+ int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
if (max_count > 0)
- {
max_count = max_count / 2 + 1;
- int max_dirty_pages =
- (m_stats.m_max_pages - pl_not_bound.count()) *
- m_stats.m_max_dirty_pct;
- int dirty_pages = pl_dirty_unused.count() * 100;
- if (dirty_pages <= max_dirty_pages)
- return;
+ Page_sublist& pl_map = *m_page_sublist[Page_entry::SL_MAP];
- if (max_count > (dirty_pages - max_dirty_pages))
- max_count = dirty_pages - max_dirty_pages;
- }
-
- while (! pl_dirty_unused.isEmpty() && --max_count >= 0)
+ while (! pl_map.isEmpty() && --max_count >= 0)
{
+ jam();
Ptr<Page_entry> ptr;
- pl_dirty_unused.first(ptr);
-
+ pl_map.first(ptr);
+ if (! process_map(signal, ptr))
+ {
+ jam();
+ break;
+ }
+ }
#ifdef VM_TRACE
- debugOut << "PGMAN: " << ptr << " : process_dirty_unused" <<
endl;
+ debugOut << "PGMAN: <process_map" << endl;
#endif
-
- pageout(signal, ptr);
+ return ! pl_map.isEmpty();
+}
+
+bool
+Pgman::process_map(Signal* signal, Ptr<Page_entry> ptr)
+{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: " << ptr << " : process_map" << endl;
+#endif
+ pagein(signal, ptr);
+ return true;
+}
+
+bool
+Pgman::process_callback(Signal* signal)
+{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >process_callback" << endl;
+#endif
+ int max_count = 1;
+ Page_sublist& pl_callback = *m_page_sublist[Page_entry::SL_CALLBACK];
+
+ while (! pl_callback.isEmpty() && --max_count >= 0)
+ {
+ jam();
+ Ptr<Page_entry> ptr;
+ pl_callback.first(ptr);
+ if (! process_callback(signal, ptr))
+ {
+ jam();
+ break;
+ }
}
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <process_callback" << endl;
+#endif
+ return ! pl_callback.isEmpty();
}
-void
-Pgman::process_req_done(Signal* signal, Ptr<Page_entry> ptr)
+bool
+Pgman::process_callback(Signal* signal, Ptr<Page_entry> ptr)
{
-
- SimulatedBlock* b;
- Callback callback;
- Uint16 state = 0;
- if(! (ptr.p->m_state & Page_entry::PAGEOUT) &&
!ptr.p->m_requests.isEmpty())
+#ifdef VM_TRACE
+ debugOut << "PGMAN: " << ptr << " : process_callback" << endl;
+#endif
+ int max_count = 1;
+ Uint16 state = ptr.p->m_state;
+
+ while (! ptr.p->m_requests.isEmpty() && --max_count >= 0)
{
+ jam();
+ SimulatedBlock* b;
+ Callback callback;
{
/**
- * Make sure list is in own scope if callback will access this list
- * again
+ * Make sure list is in own scope if callback will access this
+ * list again (destructor restores list head).
*/
- LocalDLFifoList<Page_request> l(m_page_request_pool, ptr.p->m_requests);
-
+ LocalDLFifoList<Page_request>
+ req_list(m_page_request_pool, ptr.p->m_requests);
Ptr<Page_request> req_ptr;
- l.first(req_ptr);
-
+
+ req_list.first(req_ptr);
#ifdef VM_TRACE
- debugOut << "PGMAN: " << req_ptr << " : process_req_done"
<< endl;
+ debugOut << "PGMAN: " << req_ptr << " : process_callback"
<< endl;
#endif
-
b = globalData.getBlock(req_ptr.p->m_block);
callback = req_ptr.p->m_callback;
- l.release(req_ptr);
-
+ req_list.release(req_ptr);
+
if (req_ptr.p->m_flags & DIRTY_FLAGS)
{
- state |= Page_entry::DIRTY;
+ jam();
+ state |= Page_entry::DIRTY;
}
}
- ndbassert(ptr.p->m_state & Page_entry::BOUND);
- ndbassert(ptr.p->m_state & Page_entry::MAPPED);
+ ndbrequire(state & Page_entry::BOUND);
+ ndbrequire(state & Page_entry::MAPPED);
+
+ // callback may re-enter PGMAN and change page state
+ set_page_state(ptr, state);
b->execute(signal, callback, ptr.p->m_real_page_i);
- ptr.p->m_state &= ~Page_entry::NO_HOOK;
-
- if (ptr.p->m_requests.isEmpty())
+ state = ptr.p->m_state;
+
+ state &= ~ Page_entry::NO_HOOK;
+ }
+
+ if (ptr.p->m_requests.isEmpty())
+ {
+ jam();
+ state &= ~ Page_entry::REQUEST;
+ }
+ set_page_state(ptr, state);
+ return true;
+}
+
+// cleanup loop
+
+bool
+Pgman::process_cleanup(Signal* signal)
+{
+#ifdef VM_TRACE
+ debugOut << "PGMAN: >process_cleanup" << endl;
+#endif
+ Page_queue& pl_queue = m_page_queue;
+
+ // XXX for now start always from beginning
+ m_cleanup_ptr.i = RNIL;
+
+ if (m_cleanup_ptr.i == RNIL && ! pl_queue.first(m_cleanup_ptr))
+ {
+ jam();
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <process_cleanup: empty queue" << endl;
+#endif
+ return false;
+ }
+
+ int max_loop_count = m_param.m_max_loop_count;
+ int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
+
+ if (max_count > 0)
+ {
+ max_count = max_count / 2 + 1;
+ /*
+ * Possibly add code here to avoid writing too rapidly. May be
+ * unnecessary since only cold pages are cleaned.
+ */
+ }
+
+ Ptr<Page_entry> ptr = m_cleanup_ptr;
+ while (max_loop_count != 0 && max_count != 0)
+ {
+ Uint16 state = ptr.p->m_state;
+ ndbrequire(! (state & Page_entry::LOCKED));
+ if (state & Page_entry::BUSY)
{
- set_page_state(ptr, (ptr.p->m_state & ~Page_entry::READREQ) | state);
+#ifdef VM_TRACE
+ debugOut << "PGMAN: process_cleanup: break on busy page" << endl;
+ debugOut << "PGMAN: " << ptr << endl;
+#endif
+ break;
}
- else
+ if (state & Page_entry::DIRTY &&
+ ! (state & Page_entry::PAGEIN) &&
+ ! (state & Page_entry::PAGEOUT))
{
- set_page_state(ptr, ptr.p->m_state | state);
- signal->theData[0] = PgmanContinueB::PROCESS_REQ_DONE;
- signal->theData[1] = ptr.i;
- sendSignal(PGMAN_REF, GSN_CONTINUEB, signal, 2, JBB);
+#ifdef VM_TRACE
+ debugOut << "PGMAN: " << ptr << " : process_cleanup" <<
endl;
+#endif
+ pageout(signal, ptr);
+ max_count--;
}
+ if (! pl_queue.hasNext(ptr))
+ break;
+ pl_queue.next(ptr);
+ max_loop_count--;
+ }
+ m_cleanup_ptr = ptr;
+#ifdef VM_TRACE
+ debugOut << "PGMAN: <process_cleanup" << endl;
+#endif
+ return true;
+}
+
+/*
+ * Call this before queue.remove(ptr). If the removed entry is the
+ * clean-up pointer, move it towards front.
+ */
+void
+Pgman::move_cleanup_ptr(Ptr<Page_entry> ptr)
+{
+ Page_queue& pl_queue = m_page_queue;
+ if (ptr.i == m_cleanup_ptr.i)
+ {
+ jam();
+ pl_queue.prev(m_cleanup_ptr);
}
}
+// LCP
+
+void
+Pgman::execLCP_FRAG_ORD(Signal* signal)
+{
+ LcpFragOrd* ord = (LcpFragOrd*)signal->getDataPtr();
+ ndbrequire(ord->lcpId >= m_last_lcp_complete + 1 || m_last_lcp_complete == 0);
+ m_last_lcp = ord->lcpId;
+
+ ndbrequire(!m_lcp_outstanding);
+ ndbrequire(m_lcp_copy_page_free);
+ m_lcp_curr_bucket = 0;
+
+#ifdef VM_TRACE
+ debugOut
+ << "PGMAN: execLCP_FRAG_ORD"
+ << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
+ << " bucket=" << m_lcp_curr_bucket << endl;
+#endif
+
+ do_lcp_loop(signal, true);
+}
+
void
-Pgman::process_req_done(Signal* signal)
+Pgman::execEND_LCP_REQ(Signal* signal)
{
- int max_count = 8;
+ EndLcpReq* req = (EndLcpReq*)signal->getDataPtr();
+ m_end_lcp_req = *req;
- Page_sublist& pl_req_done = *m_page_sublist[Page_entry::REQ_DONE];
+#ifdef VM_TRACE
+ debugOut
+ << "PGMAN: execEND_LCP_REQ"
+ << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
+ << " bucket=" << m_lcp_curr_bucket
+ << " outstanding=" << m_lcp_outstanding << endl;
+#endif
- while (! pl_req_done.isEmpty() && max_count >= 0)
+ if (m_last_lcp == m_last_lcp_complete)
{
- Ptr<Page_entry> ptr;
- pl_req_done.first(ptr);
+ ndbrequire(! m_lcp_loop_on);
+ signal->theData[0] = m_end_lcp_req.senderData;
+ sendSignal(m_end_lcp_req.senderRef, GSN_END_LCP_CONF, signal, 1, JBB);
+ }
+
+ m_last_lcp_complete = m_last_lcp;
+}
+
+bool
+Pgman::process_lcp(Signal* signal)
+{
+ Page_hashlist& pl_hash = m_page_hashlist;
+ int max_count = m_param.m_max_io_waits - m_stats.m_current_io_waits;
+ if (max_count > 0)
+ max_count = max_count / 2 + 1;
#ifdef VM_TRACE
- debugOut << "PGMAN: " << ptr << " : process_req_done" <<
endl;
+ debugOut
+ << "PGMAN: process_lcp"
+ << " this=" << m_last_lcp << " last_complete=" <<
m_last_lcp_complete
+ << " bucket=" << m_lcp_curr_bucket
+ << " outstanding=" << m_lcp_outstanding << endl;
#endif
- Uint16 state = 0;
- while (! ptr.p->m_requests.isEmpty() && --max_count >= 0)
+ // start or re-start from beginning of current hash bucket
+ if (m_lcp_curr_bucket != ~(Uint32)0)
+ {
+ Page_hashlist::Iterator iter;
+ pl_hash.next(m_lcp_curr_bucket, iter);
+
+ while (iter.curr.i != RNIL && --max_count > 0)
{
- SimulatedBlock* b;
- Callback callback;
+ Ptr<Page_entry>& ptr = iter.curr;
+ Uint16 state = ptr.p->m_state;
+
+ if (ptr.p->m_last_lcp < m_last_lcp &&
+ (state & Page_entry::DIRTY))
{
- /**
- * Make sure list is in own scope if callback will access this list
- * again
- */
- LocalDLFifoList<Page_request> list(m_page_request_pool,
- ptr.p->m_requests);
-
- Ptr<Page_request> req_ptr;
- list.first(req_ptr);
-
-#ifdef VM_TRACE
- debugOut << "PGMAN: " << req_ptr << " : process_req_done" <<
endl;
-#endif
-
- b = globalData.getBlock(req_ptr.p->m_block);
- callback = req_ptr.p->m_callback;
-
- list.release(req_ptr);
-
- if (req_ptr.p->m_flags & DIRTY_FLAGS)
- {
- state |= Page_entry::DIRTY;
- }
+ if(! (state & Page_entry::BOUND))
+ {
+ ndbout << ptr << endl;
+ ndbrequire(false);
+ }
+ if (state & Page_entry::BUSY)
+ {
+ break; // wait for it
+ }
+ if (state & Page_entry::LOCKED)
+ {
+ /**
+ * Special handling of LOCKED pages...only write 1 at a time...
+ * using copy page (m_lcp_copy_page)
+ */
+ if (!m_lcp_copy_page_free)
+ {
+ break;
+ }
+ m_lcp_copy_page_free = false;
+ Ptr<GlobalPage> src, copy;
+ m_global_page_pool.getPtr(copy, m_lcp_copy_page);
+ m_global_page_pool.getPtr(src, ptr.p->m_real_page_i);
+ memcpy(copy.p, src.p, sizeof(GlobalPage));
+ ptr.p->m_real_page_i = copy.i;
+ ptr.p->m_copy_real_page_i = src.i;
+ ptr.p->m_state |= Page_entry::LCP;
+ pageout(signal, ptr);
+ }
+ else if (state & Page_entry::PAGEOUT)
+ {
+ set_page_state(ptr, state | Page_entry::LCP);
+ }
+ else
+ {
+ ptr.p->m_state |= Page_entry::LCP;
+ pageout(signal, ptr);
+ }
+ ptr.p->m_last_lcp = m_last_lcp;
+ m_lcp_outstanding++;
}
- ndbassert(ptr.p->m_state & Page_entry::BOUND);
- ndbassert(ptr.p->m_state & Page_entry::MAPPED);
- b->execute(signal, callback, ptr.p->m_real_page_i);
- ptr.p->m_state &= ~Page_entry::NO_HOOK;
+ pl_hash.next(iter);
}
-
- if (ptr.p->m_requests.isEmpty())
- set_page_state(ptr, (ptr.p->m_state & ~ Page_entry::READREQ) | state);
- else if(state)
- set_page_state(ptr, ptr.p->m_state | state);
+
+ m_lcp_curr_bucket = (iter.curr.i != RNIL ? iter.bucket : ~(Uint32)0);
}
+ if (m_lcp_curr_bucket == ~(Uint32)0 && !m_lcp_outstanding)
+ {
+ if (m_last_lcp == m_last_lcp_complete)
+ {
+ signal->theData[0] = m_end_lcp_req.senderData;
+ sendSignal(m_end_lcp_req.senderRef, GSN_END_LCP_CONF, signal, 1, JBB);
+ }
+ m_last_lcp_complete = m_last_lcp;
+ m_lcp_curr_bucket = ~(Uint32)0;
+ return false;
+ }
+ return true;
}
// page read and write
@@ -822,24 +1228,25 @@
debugOut << "PGMAN: " << ptr << endl;
#endif
ndbrequire(ptr.p->m_state & Page_entry::PAGEIN);
- Uint16 new_state = ptr.p->m_state;
+ Uint16 state = ptr.p->m_state;
- if (!(new_state & Page_entry::NO_HOOK) &&
+ if (!(state & Page_entry::NO_HOOK) &&
c_tup->disk_page_load_hook(ptr.p->m_real_page_i))
{
- new_state |= Page_entry::DIRTY;
+ state |= Page_entry::DIRTY;
}
- new_state &= ~ Page_entry::PAGEIN;
- new_state &= ~ Page_entry::NO_HOOK;
- new_state |= Page_entry::MAPPED;
- set_page_state(ptr, new_state);
+ state &= ~ Page_entry::PAGEIN;
+ state &= ~ Page_entry::EMPTY;
+ state &= ~ Page_entry::NO_HOOK;
+ state |= Page_entry::MAPPED;
+ set_page_state(ptr, state);
ndbrequire(m_stats.m_current_io_waits > 0);
m_stats.m_current_io_waits--;
ptr.p->m_last_lcp = m_last_lcp;
- process_req_done(signal, ptr);
+ do_busy_loop(signal, true);
}
void
@@ -851,12 +1258,12 @@
#endif
Uint16 state = ptr.p->m_state;
- ndbassert(state & Page_entry::BOUND);
- ndbassert(state & Page_entry::MAPPED);
+ ndbrequire(state & Page_entry::BOUND);
+ ndbrequire(state & Page_entry::MAPPED);
ndbrequire(! (state & Page_entry::BUSY));
ndbrequire(! (state & Page_entry::PAGEOUT));
- state &= ~Page_entry::NO_HOOK;
+ state &= ~ Page_entry::NO_HOOK;
state |= Page_entry::PAGEOUT;
c_tup->disk_page_unmap_callback(ptr.p->m_real_page_i);
@@ -877,13 +1284,13 @@
{
fswritereq(signal, ptr);
m_stats.m_current_io_waits++;
- set_page_state(ptr, state);
}
else
{
ndbrequire(ret == 0);
- set_page_state(ptr, state | Page_entry::LOGSYNC);
+ state |= Page_entry::LOGSYNC;
}
+ set_page_state(ptr, state);
}
void
@@ -898,9 +1305,11 @@
#endif
// it is OK to be "busy" at this point (the commit is queued)
- ndbrequire(ptr.p->m_state & Page_entry::PAGEOUT);
- ndbrequire(ptr.p->m_state & Page_entry::LOGSYNC);
- set_page_state(ptr, ptr.p->m_state & ~ Page_entry::LOGSYNC);
+ Uint16 state = ptr.p->m_state;
+ ndbrequire(state & Page_entry::PAGEOUT);
+ ndbrequire(state & Page_entry::LOGSYNC);
+ state &= ~ Page_entry::LOGSYNC;
+ set_page_state(ptr, state);
fswritereq(signal, ptr);
m_stats.m_current_io_waits++;
@@ -910,22 +1319,21 @@
Pgman::fswriteconf(Signal* signal, Ptr<Page_entry> ptr)
{
#ifdef VM_TRACE
- debugOut << "PGMAN: pageout" << endl;
+ debugOut << "PGMAN: fswriteconf" << endl;
debugOut << "PGMAN: " << ptr << endl;
#endif
- ndbrequire(ptr.p->m_state & Page_entry::PAGEOUT);
+ Uint16 state = ptr.p->m_state;
+ ndbrequire(state & Page_entry::PAGEOUT);
- Uint16 new_state = ptr.p->m_state;
- new_state &= ~ Page_entry::PAGEOUT;
- new_state &= ~ Page_entry::NEW;
- new_state &= ~ Page_entry::DIRTY;
- set_page_state(ptr, new_state & ~Page_entry::LCP);
+ state &= ~ Page_entry::PAGEOUT;
+ state &= ~ Page_entry::EMPTY;
+ state &= ~ Page_entry::DIRTY;
ndbrequire(m_stats.m_current_io_waits > 0);
m_stats.m_current_io_waits--;
- if (new_state & Page_entry::LOCKED)
+ if (state & Page_entry::LOCKED)
{
jam();
ndbrequire(!m_lcp_copy_page_free);
@@ -934,11 +1342,15 @@
ptr.p->m_copy_real_page_i = RNIL;
}
- if (new_state & Page_entry::LCP)
+ if (state & Page_entry::LCP)
{
- ndbassert(m_lcp_outstanding);
+ ndbrequire(m_lcp_outstanding);
m_lcp_outstanding--;
}
+ state &= ~ Page_entry::LCP;
+
+ set_page_state(ptr, state);
+ do_busy_loop(signal, true);
}
// file system interface
@@ -994,7 +1406,7 @@
m_file_map.next(it, ptr.p->m_file_no);
Uint32 fd = * it.data;
- ndbassert(ptr.p->m_page_no > 0);
+ ndbrequire(ptr.p->m_page_no > 0);
FsReadWriteReq* req = (FsReadWriteReq*)signal->getDataPtrSend();
req->filePointer = fd;
@@ -1033,7 +1445,7 @@
// client methods
int
-Pgman::get_page(Ptr<Page_entry> ptr, Page_request page_req)
+Pgman::get_page(Signal* signal, Ptr<Page_entry> ptr, Page_request page_req)
{
#ifdef VM_TRACE
Ptr<Page_request> tmp = { &page_req, RNIL};
@@ -1041,49 +1453,65 @@
debugOut << "PGMAN: " << ptr << endl;
debugOut << "PGMAN: " << tmp << endl;
#endif
+ Uint32 req_flags = page_req.m_flags;
- if (page_req.m_flags & Page_request::EMPTY_PAGE)
+ if (req_flags & Page_request::EMPTY_PAGE)
{
// Only one can "init" a page at a time
//ndbrequire(ptr.p->m_requests.isEmpty());
}
- Uint16 old_state = ptr.p->m_state;
- Uint16 new_state = old_state;
+ Uint16 state = ptr.p->m_state;
+ bool is_new = (state == 0);
bool busy_count = false;
- if (page_req.m_flags & Page_request::LOCK_PAGE)
+ if (req_flags & Page_request::LOCK_PAGE)
{
- new_state |= Page_entry::LOCKED;
+ jam();
+ state |= Page_entry::LOCKED;
}
- if (page_req.m_flags & Page_request::ALLOC_REQ)
+ if (req_flags & Page_request::ALLOC_REQ)
{
- //
+ jam();
}
- else if (page_req.m_flags & Page_request::COMMIT_REQ)
+ else if (req_flags & Page_request::COMMIT_REQ)
{
busy_count = true;
- new_state |= Page_entry::BUSY;
- new_state |= Page_entry::USED; // USED == BUSY for now
+ state |= Page_entry::BUSY;
+ /*
+ * Consider commit to be correlated. Otherwise pk op + commit makes
+ * the page hot. XXX move to TUP which knows better.
+ */
+ req_flags |= Page_request::CORR_REQ;
+ }
+ else if ((req_flags & Page_request::OP_MASK) != ZREAD)
+ {
+ jam();
}
- else if ((page_req.m_flags & Page_request::OP_MASK) != ZREAD)
+
+ // update LIRS
+ if (! (state & Page_entry::LOCKED) &&
+ ! (req_flags & Page_request::CORR_REQ))
{
- //
+ jam();
+ set_page_state(ptr, state);
+ lirs_reference(ptr);
+ state = ptr.p->m_state;
}
- bool empty = ptr.p->m_requests.isEmpty();
- if (empty)
+ bool only_request = ptr.p->m_requests.isEmpty();
+
+ if (only_request &&
+ state & Page_entry::MAPPED)
{
- if ((old_state & Page_entry::MAPPED) &&
- (old_state & Page_entry::BOUND) &&
- ! (old_state & Page_entry::PAGEOUT))
+ if (! (state & Page_entry::PAGEOUT))
{
- if (page_req.m_flags & DIRTY_FLAGS)
- new_state |= Page_entry::DIRTY;
+ if (req_flags & DIRTY_FLAGS)
+ state |= Page_entry::DIRTY;
ptr.p->m_busy_count += busy_count;
- set_page_state(ptr, new_state);
+ set_page_state(ptr, state);
#ifdef VM_TRACE
debugOut << "PGMAN: <get_page: immediate" << endl;
@@ -1093,58 +1521,64 @@
return ptr.p->m_real_page_i;
}
- if (old_state & Page_entry::LOCKED &&
- ! (page_req.m_flags & Page_request::UNLOCK_PAGE))
+ if (state & Page_entry::LOCKED &&
+ ! (req_flags & Page_request::UNLOCK_PAGE))
{
- ndbassert(ptr.p->m_copy_real_page_i != m_lcp_copy_page);
+ ndbrequire(ptr.p->m_copy_real_page_i != m_lcp_copy_page);
ndbrequire(ptr.p->m_copy_real_page_i != RNIL);
return ptr.p->m_copy_real_page_i;
}
}
- if (! (page_req.m_flags & Page_request::LOCK_PAGE))
+ if (! (req_flags & Page_request::LOCK_PAGE))
{
- ndbassert(! (new_state & Page_entry::LOCKED));
+ ndbrequire(! (state & Page_entry::LOCKED));
}
// queue the request
- Ptr<Pgman::Page_request> page_req_ptr;
- LocalDLFifoList<Page_request> list(m_page_request_pool, ptr.p->m_requests);
+ Ptr<Pgman::Page_request> req_ptr;
+ {
+ LocalDLFifoList<Page_request>
+ req_list(m_page_request_pool, ptr.p->m_requests);
+ req_list.seize(req_ptr);
+ }
- if (! list.seize(page_req_ptr))
+ if (req_ptr.i == RNIL)
{
- if (old_state == 0)
+ if (is_new)
{
release_page_entry(ptr);
}
return -1;
}
- page_req_ptr.p->m_block = page_req.m_block;
- page_req_ptr.p->m_flags = page_req.m_flags;
- page_req_ptr.p->m_callback = page_req.m_callback;
+ req_ptr.p->m_block = page_req.m_block;
+ req_ptr.p->m_flags = page_req.m_flags;
+ req_ptr.p->m_callback = page_req.m_callback;
- new_state |= Page_entry::READREQ;
- if (empty && page_req.m_flags & Page_request::EMPTY_PAGE)
+ state |= Page_entry::REQUEST;
+ if (only_request && req_flags & Page_request::EMPTY_PAGE)
{
- new_state |= Page_entry::NEW | Page_entry::MAPPED;
+ state |= Page_entry::EMPTY;
}
- if (page_req.m_flags & Page_request::NO_HOOK)
+ if (req_flags & Page_request::NO_HOOK)
{
- new_state |= Page_entry::NO_HOOK;
+ state |= Page_entry::NO_HOOK;
}
- if (page_req.m_flags & Page_request::UNLOCK_PAGE)
+ if (req_flags & Page_request::UNLOCK_PAGE)
{
- new_state &= ~(Uint32)Page_entry::LOCKED;
+ state &= ~ Page_entry::LOCKED;
}
ptr.p->m_busy_count += busy_count;
- set_page_state(ptr, new_state);
+ set_page_state(ptr, state);
+
+ do_busy_loop(signal, true);
#ifdef VM_TRACE
- debugOut << "PGMAN: " << page_req_ptr << endl;
+ debugOut << "PGMAN: " << req_ptr << endl;
debugOut << "PGMAN: <get_page: queued" << endl;
#endif
return 0;
@@ -1159,20 +1593,20 @@
debugOut << "PGMAN: " << ptr << endl;
#endif
- Uint16 new_state = ptr.p->m_state;
+ Uint16 state = ptr.p->m_state;
ptr.p->m_lsn = lsn;
- if (new_state & Page_entry::BUSY)
+ if (state & Page_entry::BUSY)
{
ndbrequire(ptr.p->m_busy_count != 0);
if (--ptr.p->m_busy_count == 0)
{
- new_state &= ~ Page_entry::BUSY;
- new_state &= ~ Page_entry::USED; // USED == BUSY for now
+ state &= ~ Page_entry::BUSY;
}
}
- set_page_state(ptr, new_state | Page_entry::DIRTY);
+ state |= Page_entry::DIRTY;
+ set_page_state(ptr, state);
#ifdef VM_TRACE
debugOut << "PGMAN: " << ptr << endl;
@@ -1260,14 +1694,345 @@
*it.data = RNIL;
}
+int
+Pgman::drop_page(Ptr<Page_entry> ptr)
+{
+ Page_stack& pl_stack = m_page_stack;
+ Page_queue& pl_queue = m_page_queue;
+
+ Uint16 state = ptr.p->m_state;
+ if (! (state & (Page_entry::PAGEIN | Page_entry::PAGEOUT)))
+ {
+ ndbrequire(state & Page_entry::BOUND);
+ ndbrequire(state & Page_entry::MAPPED);
+
+ if (state & Page_entry::ONSTACK)
+ {
+ jam();
+ pl_stack.remove(ptr);
+ state &= ~ Page_entry::ONSTACK;
+ }
+
+ if (state & Page_entry::ONQUEUE)
+ {
+ jam();
+ pl_queue.remove(ptr);
+ state &= ~ Page_entry::ONQUEUE;
+ }
+
+ if (ptr.p->m_real_page_i != RNIL)
+ {
+ jam();
+ c_tup->disk_page_unmap_callback(ptr.p->m_real_page_i);
+ release_cache_page(ptr.p->m_real_page_i);
+ ptr.p->m_real_page_i = RNIL;
+ }
+
+ set_page_state(ptr, state);
+ release_page_entry(ptr);
+ return 1;
+ }
+
+ ndbrequire(false);
+ return -1;
+}
+
// debug
+#ifdef VM_TRACE
+
+void
+Pgman::verify_page_entry(Ptr<Page_entry> ptr)
+{
+ Uint32 ptrI = ptr.i;
+ Uint16 state = ptr.p->m_state;
+
+ bool has_req = state & Page_entry::REQUEST;
+ bool has_req2 = ! ptr.p->m_requests.isEmpty();
+ ndbrequire(has_req == has_req2 || dump_page_lists(ptrI));
+
+ bool is_bound = state & Page_entry::BOUND;
+ bool is_bound2 = ptr.p->m_real_page_i != RNIL;
+ ndbrequire(is_bound == is_bound2 || dump_page_lists(ptrI));
+
+ bool is_mapped = state & Page_entry::MAPPED;
+ // mapped implies bound
+ ndbrequire(! is_mapped || is_bound || dump_page_lists(ptrI));
+ // bound is mapped or has open requests
+ ndbrequire(! is_bound || is_mapped || has_req || dump_page_lists(ptrI));
+
+ bool on_stack = state & Page_entry::ONSTACK;
+ bool is_hot = state & Page_entry::HOT;
+ // hot entry must be on stack
+ ndbrequire(! is_hot || on_stack || dump_page_lists(ptrI));
+
+ bool on_queue = state & Page_entry::ONQUEUE;
+ // hot entry is not on queue
+ ndbrequire(! is_hot || ! on_queue || dump_page_lists(ptrI));
+
+ bool is_locked = state & Page_entry::LOCKED;
+ bool on_queue2 = ! is_locked && ! is_hot && is_bound;
+ ndbrequire(on_queue == on_queue2 || dump_page_lists(ptrI));
+
+ // entries waiting to enter queue
+ bool to_queue = ! is_locked && ! is_hot && ! is_bound &&
has_req;
+
+ // page is either LOCKED or under LIRS
+ bool is_lirs = on_stack || to_queue || on_queue;
+ ndbrequire(is_locked == ! is_lirs || dump_page_lists(ptrI));
+
+ bool pagein = state & Page_entry::PAGEIN;
+ bool pageout = state & Page_entry::PAGEOUT;
+ // cannot read and write at same time
+ ndbrequire(! pagein || ! pageout || dump_page_lists(ptrI));
+
+ Uint32 no = get_sublist_no(state);
+ switch (no) {
+ case Page_entry::SL_BIND:
+ ndbrequire(! pagein && ! pageout || dump_page_lists(ptrI));
+ break;
+ case Page_entry::SL_MAP:
+ ndbrequire(! pagein && ! pageout || dump_page_lists(ptrI));
+ break;
+ case Page_entry::SL_MAP_IO:
+ ndbrequire(pagein && ! pageout || dump_page_lists(ptrI));
+ break;
+ case Page_entry::SL_CALLBACK:
+ ndbrequire(! pagein && ! pageout || dump_page_lists(ptrI));
+ break;
+ case Page_entry::SL_CALLBACK_IO:
+ ndbrequire(! pagein && pageout || dump_page_lists(ptrI));
+ break;
+ case Page_entry::SL_BUSY:
+ break;
+ case Page_entry::SL_LOCKED:
+ break;
+ case Page_entry::SL_OTHER:
+ break;
+ default:
+ ndbrequire(false || dump_page_lists(ptrI));
+ break;
+ }
+}
+
+void
+Pgman::verify_page_lists()
+{
+ Page_hashlist& pl_hash = m_page_hashlist;
+ Page_stack& pl_stack = m_page_stack;
+ Page_queue& pl_queue = m_page_queue;
+ Ptr<Page_entry> ptr;
+
+ Uint32 stack_count = 0;
+ Uint32 queue_count = 0;
+ Uint32 queuewait_count = 0;
+ Uint32 locked_bound_count = 0;
+
+ Page_hashlist::Iterator iter;
+ pl_hash.next(0, iter);
+ while (iter.curr.i != RNIL)
+ {
+ verify_page_entry(iter.curr);
+
+ Uint16 state = iter.curr.p->m_state;
+ if (state & Page_entry::ONSTACK)
+ stack_count++;
+ if (state & Page_entry::ONQUEUE)
+ queue_count++;
+ if (! (state & Page_entry::LOCKED) &&
+ ! (state & Page_entry::HOT) &&
+ (state & Page_entry::REQUEST) &&
+ ! (state & Page_entry::BOUND))
+ queuewait_count++;
+ if (state & Page_entry::LOCKED &&
+ state & Page_entry::BOUND)
+ locked_bound_count++;
+ pl_hash.next(iter);
+ }
+
+ ndbrequire(stack_count == pl_stack.count() || dump_page_lists());
+ ndbrequire(queue_count == pl_queue.count() || dump_page_lists());
+
+ Uint32 hot_bound_count = 0;
+ Uint32 cold_bound_count = 0;
+
+ Uint32 i1 = RNIL;
+ for (pl_stack.first(ptr); ptr.i != RNIL; pl_stack.next(ptr))
+ {
+ ndbrequire(i1 != ptr.i);
+ i1 = ptr.i;
+ Uint16 state = ptr.p->m_state;
+ ndbrequire(state & Page_entry::ONSTACK || dump_page_lists());
+ if (! pl_stack.hasPrev(ptr))
+ ndbrequire(state & Page_entry::HOT || dump_page_lists());
+ if (state & Page_entry::HOT &&
+ state & Page_entry::BOUND)
+ hot_bound_count++;
+ }
+
+ Uint32 i2 = RNIL;
+ for (pl_queue.first(ptr); ptr.i != RNIL; pl_queue.next(ptr))
+ {
+ ndbrequire(i2 != ptr.i);
+ i2 = ptr.i;
+ Uint16 state = ptr.p->m_state;
+ ndbrequire(state & Page_entry::ONQUEUE || dump_page_lists());
+ ndbrequire(state & Page_entry::BOUND || dump_page_lists());
+ cold_bound_count++;
+ }
+
+ Uint32 tot_bound_count =
+ locked_bound_count + hot_bound_count + cold_bound_count;
+ ndbrequire(m_stats.m_num_pages == tot_bound_count || dump_page_lists());
+
+ Uint32 k;
+ Uint32 entry_count = 0;
+
+ for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
+ {
+ const Page_sublist& pl = *m_page_sublist[k];
+ for (pl.first(ptr); ptr.i != RNIL; pl.next(ptr))
+ {
+ ndbrequire(get_sublist_no(ptr.p->m_state) == k || dump_page_lists());
+ entry_count++;
+ }
+ }
+
+ ndbrequire(entry_count == pl_hash.count() || dump_page_lists());
+
+ debugOut << "PGMAN: loop"
+ << " stats=" << m_stats_loop_on
+ << " busy=" << m_busy_loop_on
+ << " cleanup=" << m_cleanup_loop_on
+ << " lcp=" << m_lcp_loop_on << endl;
+
+ debugOut << "PGMAN:"
+ << " entry:" << pl_hash.count()
+ << " cache:" << m_stats.m_num_pages
+ << "(" << locked_bound_count << "L)"
+ << " stack:" << pl_stack.count()
+ << " queue:" << pl_queue.count()
+ << " queuewait:" << queuewait_count << endl;
+
+ debugOut << "PGMAN:";
+ for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
+ {
+ const Page_sublist& pl = *m_page_sublist[k];
+ debugOut << " " << get_sublist_name(k) << ":" << pl.count();
+ }
+ debugOut << endl;
+}
+
+void
+Pgman::verify_all()
+{
+ Page_sublist& pl_bind = *m_page_sublist[Page_entry::SL_BIND];
+ Page_sublist& pl_map = *m_page_sublist[Page_entry::SL_MAP];
+ Page_sublist& pl_callback = *m_page_sublist[Page_entry::SL_CALLBACK];
+
+ if (! pl_bind.isEmpty() || ! pl_map.isEmpty() || ! pl_callback.isEmpty())
+ {
+ ndbrequire(m_busy_loop_on || dump_page_lists());
+ }
+ verify_page_lists();
+}
+
+bool
+Pgman::dump_page_lists(Uint32 ptrI)
+{
+ if (! debugFlag)
+ open_debug_file(1);
+
+ debugOut << "PGMAN: page list dump" << endl;
+ if (ptrI != RNIL)
+ debugOut << "PGMAN: error on PE [" << ptrI << "]" << endl;
+
+ Page_hashlist& pl_hash = m_page_hashlist;
+ Page_stack& pl_stack = m_page_stack;
+ Page_queue& pl_queue = m_page_queue;
+ Ptr<Page_entry> ptr;
+ Uint32 n;
+ char buf[40];
+
+ debugOut << "hash:" << endl;
+ Page_hashlist::Iterator iter;
+ pl_hash.next(0, iter);
+ n = 0;
+ while (iter.curr.i != RNIL)
+ {
+ sprintf(buf, "%03d", n++);
+ debugOut << buf << " " << iter.curr << endl;
+ pl_hash.next(iter);
+ }
+
+ debugOut << "stack:" << endl;
+ n = 0;
+ for (pl_stack.first(ptr); ptr.i != RNIL; pl_stack.next(ptr))
+ {
+ sprintf(buf, "%03d", n++);
+ debugOut << buf << " " << ptr << endl;
+ }
+
+ debugOut << "queue:" << endl;
+ n = 0;
+ for (pl_queue.first(ptr); ptr.i != RNIL; pl_queue.next(ptr))
+ {
+ sprintf(buf, "%03d", n++);
+ debugOut << buf << " " << ptr << endl;
+ }
+
+ Uint32 k;
+ for (k = 0; k < Page_entry::SUBLIST_COUNT; k++)
+ {
+ debugOut << get_sublist_name(k) << ":" << endl;
+ const Page_sublist& pl = *m_page_sublist[k];
+ for (pl.first(ptr); ptr.i != RNIL; pl.next(ptr))
+ {
+ sprintf(buf, "%03d", n++);
+ debugOut << buf << " " << ptr << endl;
+ }
+ }
+
+ if (! debugFlag)
+ open_debug_file(0);
+
+ return false;
+}
+
+#endif
+
+const char*
+Pgman::get_sublist_name(Uint32 list_no)
+{
+ switch (list_no) {
+ case Page_entry::SL_BIND:
+ return "bind";
+ case Page_entry::SL_MAP:
+ return "map";
+ case Page_entry::SL_MAP_IO:
+ return "map_io";
+ case Page_entry::SL_CALLBACK:
+ return "callback";
+ case Page_entry::SL_CALLBACK_IO:
+ return "callback_io";
+ case Page_entry::SL_BUSY:
+ return "busy";
+ case Page_entry::SL_LOCKED:
+ return "locked";
+ case Page_entry::SL_OTHER:
+ return "other";
+ }
+ return "?";
+}
+
NdbOut&
operator<<(NdbOut& out, Ptr<Pgman::Page_request> ptr)
{
const Pgman::Page_request& pr = *ptr.p;
const char* bname = getBlockName(pr.m_block, "?");
- out << "PR [" << dec << ptr.i << "]";
+ out << "PR";
+ if (ptr.i != RNIL)
+ out << " [" << dec << ptr.i << "]";
out << " block=" << bname;
out << " flags=" << hex << pr.m_flags;
out << "," << dec << (pr.m_flags & Pgman::Page_request::OP_MASK);
@@ -1282,6 +2047,12 @@
out << ",alloc_req";
if (pr.m_flags & Pgman::Page_request::COMMIT_REQ)
out << ",commit_req";
+ if (pr.m_flags & Pgman::Page_request::DIRTY_REQ)
+ out << ",dirty_req";
+ if (pr.m_flags & Pgman::Page_request::NO_HOOK)
+ out << ",no_hook";
+ if (pr.m_flags & Pgman::Page_request::CORR_REQ)
+ out << ",corr_req";
}
return out;
}
@@ -1290,18 +2061,16 @@
operator<<(NdbOut& out, Ptr<Pgman::Page_entry> ptr)
{
const Pgman::Page_entry pe = *ptr.p;
- Uint32 list_no = Pgman::Page_entry::get_sublist_no(pe.m_state);
+ Uint32 list_no = Pgman::get_sublist_no(pe.m_state);
out << "PE [" << dec << ptr.i << "]";
out << " state=" << hex << pe.m_state;
{
- if (pe.m_state & Pgman::Page_entry::READREQ)
- out << ",readreq";
+ if (pe.m_state & Pgman::Page_entry::REQUEST)
+ out << ",request";
+ if (pe.m_state & Pgman::Page_entry::EMPTY)
+ out << ",empty";
if (pe.m_state & Pgman::Page_entry::BOUND)
out << ",bound";
- if (pe.m_state & Pgman::Page_entry::NEW)
- out << ",new";
- if (pe.m_state & Pgman::Page_entry::NO_HOOK)
- out << ",no_hook";
if (pe.m_state & Pgman::Page_entry::MAPPED)
out << ",mapped";
if (pe.m_state & Pgman::Page_entry::DIRTY)
@@ -1318,13 +2087,24 @@
out << ",pageout";
if (pe.m_state & Pgman::Page_entry::LOGSYNC)
out << ",logsync";
+ if (pe.m_state & Pgman::Page_entry::NO_HOOK)
+ out << ",no_hook";
if (pe.m_state & Pgman::Page_entry::LCP)
out << ",lcp";
- }
- out << " list=" << dec << list_no;
+ if (pe.m_state & Pgman::Page_entry::HOT)
+ out << ",hot";
+ if (pe.m_state & Pgman::Page_entry::ONSTACK)
+ out << ",onstack";
+ if (pe.m_state & Pgman::Page_entry::ONQUEUE)
+ out << ",onqueue";
+ }
+ out << " list=";
+ if (list_no == ZNIL)
+ out << "NONE";
+ else
{
- if (list_no != 0)
- out << "," << Pgman::Page_entry::get_sublist_name(list_no);
+ out << dec << list_no;
+ out << "," << Pgman::get_sublist_name(list_no);
}
out << " diskpage=" << dec << pe.m_file_no << "," <<
pe.m_page_no;
if (pe.m_real_page_i == RNIL)
@@ -1333,48 +2113,51 @@
out << " realpage=" << dec << pe.m_real_page_i;
out << " lsn=" << dec << pe.m_lsn;
out << " busy_count=" << dec << pe.m_busy_count;
+#ifdef VM_TRACE
+ {
+ LocalDLFifoList<Pgman::Page_request>
+ req_list(ptr.p->m_this->m_page_request_pool, ptr.p->m_requests);
+ if (! req_list.isEmpty())
+ {
+ Ptr<Pgman::Page_request> req_ptr;
+ out << " req:";
+ for (req_list.first(req_ptr); req_ptr.i != RNIL; req_list.next(req_ptr))
+ {
+ out << " " << req_ptr;
+ }
+ }
+ }
+#endif
return out;
}
-int
-Pgman::drop_page(Ptr<Page_entry> ptr)
+#ifdef VM_TRACE
+void
+Pgman::open_debug_file(Uint32 flag)
{
- Uint16 state = ptr.p->m_state;
- if (! (state & (Page_entry::PAGEIN | Page_entry::PAGEOUT)))
+ if (flag)
{
- ndbrequire(state & Page_entry::BOUND);
- ndbrequire(state & Page_entry::MAPPED);
-
- release_page_entry(ptr);
- return 1;
+ FILE* f = globalSignalLoggers.getOutputStream();
+ debugOut = *new NdbOut(*new FileOutputStream(f));
+ }
+ else
+ {
+ debugOut = *new NdbOut(*new NullOutputStream());
}
-
- ndbrequire(false);
- return -1;
-}
-
-// page cache client
-
-Page_cache_client::Page_cache_client(SimulatedBlock* block, Pgman* pgman)
-{
- m_block = block->number();
- m_pgman = pgman;
}
+#endif
void
Pgman::execDUMP_STATE_ORD(Signal* signal)
{
jamEntry();
+ Page_hashlist& pl_hash = m_page_hashlist;
#ifdef VM_TRACE
if (signal->theData[0] == 11000 && signal->getLength() == 2)
{
- if (signal->theData[1])
- {
- FILE* f = globalSignalLoggers.getOutputStream();
- debugOut = *new NdbOut(*new FileOutputStream(f));
- }
- else
- debugOut = *new NdbOut(*new NullOutputStream());
+ Uint32 flag = signal->theData[1];
+ open_debug_file(flag);
+ debugFlag = flag;
}
#endif
@@ -1406,7 +2189,7 @@
key.m_page_no = signal->theData[2];
Ptr<Page_entry> ptr;
- if (m_page_hashlist.find(ptr, key))
+ if (pl_hash.find(ptr, key))
{
ndbout << "pageout " << ptr << endl;
pageout(signal, ptr);
@@ -1415,12 +2198,15 @@
if (signal->theData[0] == 11003)
{
- Page_sublist& pl_clean_unused = *m_page_sublist[Page_entry::CLEAN_UNUSED];
- while (! pl_clean_unused.isEmpty())
- {
- Ptr<Page_entry> ptr;
- pl_clean_unused.first(ptr);
- release_page_entry(ptr);
- }
+ verify_page_lists();
+ dump_page_lists();
}
+}
+
+// page cache client
+
+Page_cache_client::Page_cache_client(SimulatedBlock* block, Pgman* pgman)
+{
+ m_block = block->number();
+ m_pgman = pgman;
}
--- 1.34/storage/ndb/src/kernel/blocks/pgman.hpp 2005-10-10 16:48:22 +02:00
+++ 1.35/storage/ndb/src/kernel/blocks/pgman.hpp 2005-10-17 12:40:39 +02:00
@@ -39,42 +39,187 @@
* A page entry is created by first request for the disk page.
* Subsequent requests are queued under the same page entry.
*
- * A new entry must wait to be "bound" to an available buffer page
- * (called "real page" here). If the disk page is not "new" (empty),
- * the entry must also wait to be "mapped" via "pagein" from disk.
- *
- * Entries are released on demand when page requests arrive for unknown
- * pages. Release candidates are entries which point to a disk page
- * which is "clean and not used". They are ordered by some variant of
- * least recently used (LRU).
- *
- * A background clean-up process makes "dirty" pages clean via "pageout"
- * to disk. UNDO log entries are first flushed up to the maximum log
- * sequence number (LSN) of the page. This is called write ahead
- * logging (WAL). The clean-up process prefers "dirty and not used"
- * pages and lower LSN values.
- *
- * A local check point (LCP) performs complete pageout of dirty pages
- * since given LSN. It needs a stable entry list to iterate over.
- *
- * Page replacement uses the LIRS algorithm. This adds two lists:
- * a "stack" and a "queue" (a subset of the stack for fast access).
- * A central idea is to keep even old un-bound page entries around to
- * see if they are accessed again.
- *
- * Page entries are put on lists accordingly:
+ * There is a limited number of in-memory "cache pages", also called
+ * "buffer pages" or "real pages". These are used by the more numerous
+ * page entries to buffer the disk pages.
+ *
+ * A new or non-resident page entry must first be "bound" to an
+ * available cache page. Next the disk page must be "mapped" to the
+ * cache page. If the page is empty (never written) it is considered
+ * mapped trivially. Otherwise the cache page must be updated via
+ * "pagein" from disk. A bound and mapped page is called "resident".
+ *
+ * Updating a resident cache page makes it "dirty". A background
+ * clean-up process makes dirty pages "clean" via "pageout" to disk.
+ * Write ahead logging (WAL) of the page is done first i.e. UNDO log is
+ * flushed up to the page log sequence number (LSN) by calling a TSMAN
+ * method. The reason for this is obvious but not relevant to PGMAN.
+ *
+ * A local check point (LCP) periodically performs a complete pageout of
+ * dirty pages. It must iterate over a list which will cover all pages
+ * which had been dirty since LCP start.
+ *
+ * A clean page is a candidate ("victim") for being "unmapped" and
+ * "evicted" from the cache, to allow another page to become resident.
+ * This process is called "page replacement".
*
- * - Hash table on (file_no, page_no) is used for fast lookup and for
- * LCP to iterate over.
+ * PAGE REPLACEMENT
*
- * - Page entries are divided into disjoint "sublists" determined by
- * page state. These lists drive page processing.
+ * Page replacement uses the LIRS algorithm (Jiang-Zhang).
+ *
+ * The "recency" of a page is the time between now and the last request
+ * for the page. The "inter-reference recency" (IRR) of a page is the
+ * time between the last 2 requests for the page. "Time" is advanced by
+ * request for any page.
+ *
+ * Page entries are divided into "hot" ("lir") and "cold" ("hir"). Here
+ * lir/hir refers to low/high IRR. Hot pages are always resident but
+ * cold pages need not be.
+ *
+ * Number of hot pages is limited to slightly less than number of cache
+ * pages. Until this number is reached, all used cache pages are hot.
+ * Then the algorithm described next is applied. The algorithm avoids
+ * storing any of the actual recency values.
+ *
+ * Primary data structure is the "stack". It contains all hot entries
+ * and recently referenced cold entries (resident or not). The stack is
+ * in recency order with most recent (lowest recency) entry on top.
+ * Entries which are less recent than the least recent hot page are
+ * removed ("stack pruning"). So the bottom page is always hot.
+ *
+ * The cold entries on the stack are undergoing a "trial period". If
+ * they are referenced soon again (see IRR), they become hot. Otherwise
+ * they fall off the bottom of the stack.
+ *
+ * Secondary data structure is the "queue". It contains all resident
+ * cold pages (on stack or not). When a hot page is removed from the
+ * stack it is added to the end of the queue. When page replacement
+ * needs a page it removes it from the front of the queue.
+ *
+ * Page requests cause the input entry to be inserted and updated in
+ * LIRS lists. Remember that an entry can be present on both stack and
+ * queue. The rules per type of input entry are:
+ *
+ * 1. Hot. Move input entry to stack top. If input entry was at stack
+ * bottom, do stack pruning.
+ *
+ * 2. Cold resident. Move input entry to stack top. Then:
+ *
+ * 2a. If input entry was on stack, change it to hot, remove it from
+ * queue, change stack bottom entry to cold and move the bottom entry to
+ * queue end, and do stack pruning.
+ *
+ * 2b. If input entry was on queue only, leave it cold but move it to
+ * end of queue.
+ *
+ * 3. Cold non-resident. Remove entry at queue front and evict it from
+ * the cache. If the evicted entry was on stack, it remains as unbound
+ * entry on stack, to continue its trial period. Map input entry to the
+ * freed cache page. Move input entry to stack top. Then:
+ *
+ * 3a. If input entry was on stack, change it to hot, change stack
+ * bottom entry to cold and move the bottom entry to queue end, and do
+ * stack pruning.
+ *
+ * 3b. If input entry was new, leave it cold but move it to end of
+ * queue.
+ *
+ * LIRS CHANGES
+ *
+ * In LIRS the 'resident' requirement is changed as follows:
+ *
+ * Stack entries, including hot ones, can have any state. Unbound stack
+ * entries are created by new requests and by pages evicted from queue
+ * front which are still on stack.
+ *
+ * Queue entries must be bound. They become resident and evictable
+ * within a finite time. A page is "evictable" if it is mapped, clean,
+ * and has no requests.
+ *
+ * An unbound entry which should be on queue is added there at bind
+ * time. Such entries are created when an unbound entry with open
+ * requests is popped (hot) or pruned (cold) from the stack. This can
+ * happen if the cache is too small.
+ *
+ * CLEANUP PROCESS
+ *
+ * LIRS (and related algorithms) do not address dirty pages. From above
+ * it is obvious that the clean-up process should process dirty queue
+ * entries proceeding from front to end. This also favors pages with
+ * lower LSN numbers which minimizes amount of WAL to write.
+ *
+ * In fact the clean-up process holds a permanent pointer into the queue
+ * where all entries strictly towards the front are clean. For such an
+ * entry to become dirty it must be referenced again which moves it to
+ * queue end and past the clean-up pointer. (In practice, until this
+ * works, cleanup recycles back to queue front).
*
- * - The LIRS stack and queue. These control the exact details of page
- * processing.
+ * PAGE LISTS
+ *
+ * Page entries are put on a number of lists.
+ *
+ * 1. Hash table on (file_no, page_no). Used for fast lookup and for
+ * LCP to iterate over.
+ *
+ * The other lists are doubly-linked FIFOs. In general entries are
+ * added to the end (last entry) and processed from the front (first
+ * entry). When used as stack, end is top and front is bottom.
+ *
+ * 2. The LIRS stack and queue. These control page replacement.
+ *
+ * 3. Page entries are divided into disjoint "sublists" based on page
+ * "state" i.e. the set of page properties. Some sublists drive page
+ * processing and have next entry to process at the front.
+ *
+ * Current sublists are as follows. Those that drive processing are
+ * marked with a plus (+).
+ *
+ * SL_BIND + waiting for available buffer page
+ * SL_MAP + waiting to start pagein from disk
+ * SL_MAP_IO - above in i/o wait (the pagein)
+ * SL_CALLBACK + request done, waiting to invoke callbacks
+ * SL_CALLBACK_IO - above in i/o wait (pageout by cleanup)
+ * SL_BUSY - being written to by PGMAN client
+ * SL_LOCKED - permanently locked to cache
+ * SL_OTHER - default sublist
+ *
+ * PAGE PROCESSING
+ *
+ * Page processing uses a number independent continueB loops.
+ *
+ * 1. The "stats loop". Started at node start. Checks lists in debug
+ * mode. In the future could gather statistics and adjust parameters
+ * based on load. Continues via delay signal.
+ *
+ * 2. The "busy loop". Started by page request. Each loop does bind,
+ * map, and callback of a number of entries. Continues via no-delay
+ * signal until nothing to do.
+ *
+ * 3. The "cleanup loop". Started at node start. Each loop starts
+ * pageout of a number of dirty queue entries. Continues via delay
+ * signal.
+ *
+ * 4. The "LCP loop". Started periodically by NDB. Each loop starts
+ * pageout of a number of hash list entries. Continues via delay signal
+ * until done.
+ *
+ * SPECIAL CASES
+ *
+ * LOCKED pages are not put on stack or queue. They are flushed to disk
+ * by LCP but not by clean-up.
+ *
+ * A TUP scan is likely to access a page repeatedly within a short time.
+ * This can make the page hot when it should not be. Such "correlated
+ * requests" are handled by a request flag which modifies default LIRS
+ * processing. [fill in details later]
+ *
+ * Also PK operations make 2 rapid page references. The 2nd one is for
+ * commit. This too should be handled as a correlated request.
*
* CLIENT TSMAN
- * [ todo ]
+ *
+ * TSMAN reads "meta" pages such as extent headers. There are currently
+ * "locked" forever in PGMAN cache.
*
* CLIENT DBTUP
*
@@ -86,11 +231,6 @@
* The page is "busy" if any transaction is between COMMIT_REQ and LSN
* update. A busy page must be locked in buffer cache. No pageout of
* a busy page can be started by clean-up or LCP.
- *
- * NOTES
- *
- * Page "used" is meant for "in use by any tx" but the interface from
- * DBTUP is not ready. For now "used" is identical to "busy".
*/
class Pgman : public SimulatedBlock
@@ -114,10 +254,11 @@
,DIRTY_REQ = 0x0200 // make page dirty wo/ update_lsn
,NO_HOOK = 0x0400 // dont run load hook
,UNLOCK_PAGE = 0x0800
+ ,CORR_REQ = 0x1000 // correlated request (no LIRS update)
};
- Uint32 m_block;
- Uint32 m_flags;
+ Uint16 m_block;
+ Uint16 m_flags;
SimulatedBlock::Callback m_callback;
union {
@@ -127,58 +268,60 @@
Uint32 prevList;
};
- struct Page_entry_sublist_ptr {
+ struct Page_entry_stack_ptr {
Uint32 nextList;
Uint32 prevList;
};
- struct Page_entry_stack_ptr {
+ struct Page_entry_queue_ptr {
Uint32 nextList;
Uint32 prevList;
};
- struct Page_entry_queue_ptr {
+ struct Page_entry_sublist_ptr {
Uint32 nextList;
Uint32 prevList;
};
- struct Page_entry : Page_entry_sublist_ptr,
- Page_entry_stack_ptr,
- Page_entry_queue_ptr {
+ struct Page_entry : Page_entry_stack_ptr,
+ Page_entry_queue_ptr,
+ Page_entry_sublist_ptr {
Page_entry() {}
Page_entry(Uint32 file_no, Uint32 page_no);
enum State {
NO_STATE = 0x0000
- ,READREQ = 0x0001 // has outstanding request
- ,BOUND = 0x0002 // m_real_page_ptr assigned
- ,NEW = 0x0004 // new (empty) page
- ,MAPPED = 0x0008 // new or paged in from disk
+ ,REQUEST = 0x0001 // has outstanding request
+ ,EMPTY = 0x0002 // empty (never written) page
+ ,BOUND = 0x0004 // m_real_page_ptr assigned
+ ,MAPPED = 0x0008 // bound, and empty or paged in
,DIRTY = 0x0010 // page is modified
- ,USED = 0x0020 // used by some tx
- ,BUSY = 0x0040 // page in being written to
+ ,USED = 0x0020 // used by some tx (not set currently)
+ ,BUSY = 0x0040 // page is being written to
,LOCKED = 0x0080 // locked in cache (forever)
,PAGEIN = 0x0100 // paging in
,PAGEOUT = 0x0200 // paging out
,LOGSYNC = 0x0400 // undo WAL as part of pageout
- ,NO_HOOK = 0x0800 // dont run load hook
- ,LCP = 0x1000 // Page is LCP flushed
+ ,NO_HOOK = 0x0800 // don't run load hook
+ ,LCP = 0x1000 // page is LCP flushed
+ ,HOT = 0x2000 // page is hot
+ ,ONSTACK = 0x4000 // page is on LIRS stack
+ ,ONQUEUE = 0x8000 // page is on LIRS queue
};
- enum List {
- NOT_BOUND = 0
- ,NOT_MAPPED = 1
- ,CLEAN_UNUSED = 2
- ,DIRTY_UNUSED = 3
- ,ANY_USED = 4
- ,WAIT_IO = 5
- ,REQ_DONE = 6
- ,IS_BUSY = 7
- ,IS_LOCKED = 8
- ,SUBLIST_COUNT = 9
+ enum Sublist {
+ SL_BIND = 0
+ ,SL_MAP = 1
+ ,SL_MAP_IO = 2
+ ,SL_CALLBACK = 3
+ ,SL_CALLBACK_IO = 4
+ ,SL_BUSY = 5
+ ,SL_LOCKED = 6
+ ,SL_OTHER = 7
+ ,SUBLIST_COUNT = 8
};
- Uint16 m_state; // flags (0 if not yet on any sublist)
+ Uint16 m_state; // flags (0 for new entry)
Uint16 m_file_no; // disk page address set at seize
Uint32 m_page_no;
@@ -195,31 +338,35 @@
DLFifoList<Page_request>::Head m_requests;
- Uint32 m_next_entry_i[2]; // 0-main list 1-sublists
- Uint32 m_prev_entry_i[2];
- Uint32 nextHash, prevHash;
-
- static const char* get_sublist_name(Uint32 list_no);
-
- // compute sublist from state
- static Uint32 get_sublist_no(Uint16 state);
+ Uint32 nextHash;
+ Uint32 prevHash;
Uint32 hashValue() const { return m_file_no << 16 | m_page_no; }
bool equal(const Page_entry& obj) const {
return
m_file_no == obj.m_file_no && m_page_no == obj.m_page_no;
}
+
+#ifdef VM_TRACE
+ Pgman* m_this;
+#endif
};
typedef DLCHashTable<Page_entry> Page_hashlist;
- typedef DLCFifoList<Page_entry, Page_entry_sublist_ptr> Page_sublist;
typedef DLCFifoList<Page_entry, Page_entry_stack_ptr> Page_stack;
typedef DLCFifoList<Page_entry, Page_entry_queue_ptr> Page_queue;
+ typedef DLCFifoList<Page_entry, Page_entry_sublist_ptr> Page_sublist;
class Dbtup *c_tup;
Logfile_client m_lgman;
- bool m_lcp_on;
+ // loop status
+ bool m_stats_loop_on;
+ bool m_busy_loop_on;
+ bool m_cleanup_loop_on;
+ bool m_lcp_loop_on;
+
+ // LCP variables
Uint32 m_last_lcp;
Uint32 m_last_lcp_complete;
Uint32 m_lcp_curr_bucket;
@@ -227,33 +374,48 @@
Uint32 m_lcp_copy_page;
bool m_lcp_copy_page_free;
EndLcpReq m_end_lcp_req;
-
+
+ // clean-up variables
+ Ptr<Page_entry> m_cleanup_ptr;
+
+ // file map
typedef DataBuffer<15> File_map;
File_map m_file_map;
- ArrayPool<Page_entry> m_page_entry_pool;
- ArrayPool<Page_request> m_page_request_pool;
File_map::DataBufferPool m_data_buffer_pool;
+ // page entries and requests
+ ArrayPool<Page_request> m_page_request_pool;
+ ArrayPool<Page_entry> m_page_entry_pool;
Page_hashlist m_page_hashlist;
- Page_sublist* m_page_sublist[Page_entry::SUBLIST_COUNT];
Page_stack m_page_stack;
Page_queue m_page_queue;
+ Page_sublist* m_page_sublist[Page_entry::SUBLIST_COUNT];
+
+ // configuration
+ struct Param {
+ Param();
+ Uint32 m_max_pages; // max number of cache pages
+ Uint32 m_max_hot_pages; // max hot cache pages (up to 99%)
+ Uint32 m_max_loop_count; // limit purely local loops
+ Uint32 m_max_io_waits;
+ Uint32 m_stats_loop_delay;
+ Uint32 m_cleanup_loop_delay;
+ Uint32 m_lcp_loop_delay;
+ } m_param;
+ // runtime sizes and statistics
struct Stats {
Stats();
- Uint32 m_max_pages;
+ Uint32 m_num_pages; // current number of cache pages
Uint32 m_page_hits;
Uint32 m_page_faults;
- Uint32 m_max_io_waits;
Uint32 m_current_io_waits;
- Uint32 m_max_dirty_pct;
} m_stats;
protected:
void execSTTOR(Signal* signal);
void sendSTTORRY(Signal*);
void execREAD_CONFIG_REQ(Signal* signal);
- void execDUMP_STATE_ORD(Signal* signal);
void execCONTINUEB(Signal* signal);
void execLCP_FRAG_ORD(Signal*);
@@ -264,25 +426,40 @@
void execFSWRITECONF(Signal*);
void execFSWRITEREF(Signal*);
+ void execDUMP_STATE_ORD(Signal* signal);
+
private:
+ static Uint32 get_sublist_no(Uint16 state);
void set_page_state(Ptr<Page_entry> ptr, Uint16 new_state);
-#ifdef VM_TRACE
- void verify_page_lists();
-#endif
- Uint32 seize_page_entry(Ptr<Page_entry>&, Uint32 file_no, Uint32 page_no);
+ bool seize_cache_page(Ptr<GlobalPage>& gptr);
+ void release_cache_page(Uint32 i);
+
bool find_page_entry(Ptr<Page_entry>&, Uint32 file_no, Uint32 page_no);
+ Uint32 seize_page_entry(Ptr<Page_entry>&, Uint32 file_no, Uint32 page_no);
bool get_page_entry(Ptr<Page_entry>&, Uint32 file_no, Uint32 page_no);
void release_page_entry(Ptr<Page_entry>&);
- void process_lcp(Signal*);
+ void lirs_stack_prune();
+ void lirs_stack_pop();
+ void lirs_reference(Ptr<Page_entry> ptr);
+
+ void do_stats_loop(Signal*);
+ void do_busy_loop(Signal*, bool direct = false);
+ void do_cleanup_loop(Signal*);
+ void do_lcp_loop(Signal*, bool direct = false);
+
+ bool process_bind(Signal*);
+ bool process_bind(Signal*, Ptr<Page_entry> ptr);
+ bool process_map(Signal*);
+ bool process_map(Signal*, Ptr<Page_entry> ptr);
+ bool process_callback(Signal*);
+ bool process_callback(Signal*, Ptr<Page_entry> ptr);
+
+ bool process_cleanup(Signal*);
+ void move_cleanup_ptr(Ptr<Page_entry> ptr);
- void process_all(Signal*, bool continue_b);
- void process_not_bound(Signal*);
- void process_not_mapped(Signal*);
- void process_dirty_unused(Signal*);
- void process_req_done(Signal*);
- void process_req_done(Signal*, Ptr<Page_entry> ptr);
+ bool process_lcp(Signal*);
void pagein(Signal*, Ptr<Page_entry>);
void fsreadreq(Signal*, Ptr<Page_entry>);
@@ -292,18 +469,24 @@
void fswritereq(Signal*, Ptr<Page_entry>);
void fswriteconf(Signal*, Ptr<Page_entry>);
- int get_page(Ptr<Page_entry>, Page_request page_req);
+ int get_page(Signal*, Ptr<Page_entry>, Page_request page_req);
void update_lsn(Ptr<Page_entry>, Uint32 block, Uint64 lsn);
Uint32 create_data_file();
Uint32 alloc_data_file(Uint32 file_no);
void map_file_no(Uint32 file_no, Uint32 fd);
void free_data_file(Uint32 file_no, Uint32 fd = RNIL);
-
int drop_page(Ptr<Page_entry>);
#ifdef VM_TRACE
NdbOut debugOut;
+ bool debugFlag;
+ void verify_page_entry(Ptr<Page_entry> ptr);
+ void verify_page_lists();
+ void verify_all();
+ bool dump_page_lists(Uint32 ptrI = RNIL);
+ void open_debug_file(Uint32 flag);
#endif
+ static const char* get_sublist_name(Uint32 list_no);
friend class NdbOut& operator<<(NdbOut&, Ptr<Page_request>);
friend class NdbOut& operator<<(NdbOut&, Ptr<Page_entry>);
};
@@ -383,6 +566,12 @@
Uint32 file_no = req.m_page.m_file_no;
Uint32 page_no = req.m_page.m_page_no;
+#ifdef VM_TRACE
+ m_pgman->debugOut
+ << "PGCLI: get_page " << file_no << "," << page_no
+ << " flags=" << hex << flags << endl;
+#endif
+
// find or seize
bool ok = m_pgman->get_page_entry(entry_ptr, file_no, page_no);
if (! ok)
@@ -395,7 +584,7 @@
page_req.m_flags = flags;
page_req.m_callback = req.m_callback;
- int i = m_pgman->get_page(entry_ptr, page_req);
+ int i = m_pgman->get_page(signal, entry_ptr, page_req);
if (i > 0)
{
// TODO remove
@@ -411,6 +600,12 @@
Uint32 file_no = key.m_file_no;
Uint32 page_no = key.m_page_no;
+#ifdef VM_TRACE
+ m_pgman->debugOut
+ << "PGCLI: update_lsn " << file_no << "," << page_no
+ << " lsn=" << lsn << endl;
+#endif
+
bool found = m_pgman->find_page_entry(entry_ptr, file_no, page_no);
assert(found);
@@ -424,6 +619,11 @@
Ptr<Pgman::Page_entry> entry_ptr;
Uint32 file_no = key.m_file_no;
Uint32 page_no = key.m_page_no;
+
+#ifdef VM_TRACE
+ m_pgman->debugOut
+ << "PGCLI: drop_page " << file_no << "," << page_no <<
endl;
+#endif
bool found = m_pgman->find_page_entry(entry_ptr, file_no, page_no);
assert(found);
--- 1.11/storage/ndb/src/kernel/vm/DLFifoList.hpp 2005-08-30 11:50:25 +02:00
+++ 1.12/storage/ndb/src/kernel/vm/DLFifoList.hpp 2005-10-17 12:40:39 +02:00
@@ -136,11 +136,18 @@
bool prev(Ptr<T> &) const ;
/**
- * Check if next exists
+ * Check if next exists i.e. this is not last
*
* NOTE ptr must be both p & i
*/
bool hasNext(const Ptr<T> &) const;
+
+ /**
+ * Check if prev exists i.e. this is not first
+ *
+ * NOTE ptr must be both p & i
+ */
+ bool hasPrev(const Ptr<T> &) const;
Uint32 noOfElements() const {
Uint32 c = 0;
@@ -449,6 +456,13 @@
bool
DLFifoList<T,U>::hasNext(const Ptr<T> & p) const {
return p.p->U::nextList != RNIL;
+}
+
+template <class T, class U>
+inline
+bool
+DLFifoList<T,U>::hasPrev(const Ptr<T> & p) const {
+ return p.p->U::prevList != RNIL;
}
#endif
| Thread |
|---|
| • bk commit into 5.1 tree (pekka:1.2050) | pekka | 17 Oct |