4634 Frazer Clement 2012-10-30 [merge]
Merge 7.0->7.1
added:
storage/ndb/src/kernel/vm/CountingSemaphore.hpp
modified:
storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp
storage/ndb/src/kernel/blocks/ERROR_codes.txt
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
storage/ndb/test/ndbapi/testLimits.cpp
storage/ndb/test/run-test/daily-basic-tests.txt
4633 Pekka Nousiainen 2012-10-27 [merge]
merge to 7.1
modified:
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
=== modified file 'storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp'
--- a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-09-12 14:18:33 +0000
+++ b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-10-30 00:30:30 +0000
@@ -160,6 +160,7 @@ public:
DihAddFragFailCleanedUp = 7019,
// 7020
// 7021
+ DihDumpPageRecInfo = 7032,
EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP
DihSetTimeBetweenGcp = 7090,
DihStartLcpImmediately = 7099,
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2012-10-24 10:05:26 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2012-10-30 00:30:30 +0000
@@ -20,7 +20,7 @@ Next DBACC 3002
Next DBTUP 4035
Next DBLQH 5075
Next DBDICT 6026
-Next DBDIH 7232
+Next DBDIH 7236
Next DBTC 8097
Next CMVMI 9000
Next BACKUP 10042
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2012-08-24 12:01:20 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2012-10-30 00:39:20 +0000
@@ -29,6 +29,7 @@
#include <blocks/mutexes.hpp>
#include <signaldata/LCP.hpp>
#include <NdbSeqLock.hpp>
+#include <CountingSemaphore.hpp>
#ifdef DBDIH_C
@@ -106,10 +107,13 @@
/* SIZES */
/*#########*/
/*
- * Only pages enough for one table needed, since only
- * one metadata change at the time is allowed.
+ * Pages are used for flushing table definitions during LCP,
+ * and for other operations such as metadata changes etc
+ *
*/
-#define ZPAGEREC PACK_TABLE_PAGES
+#define MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES 4
+#define MAX_CONCURRENT_DIH_TAB_DEF_OPS (MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES + 2)
+#define ZPAGEREC (MAX_CONCURRENT_DIH_TAB_DEF_OPS * PACK_TABLE_PAGES)
#define ZCREATE_REPLICA_FILE_SIZE 4
#define ZPROXY_MASTER_FILE_SIZE 10
#define ZPROXY_FILE_SIZE 10
@@ -501,6 +505,7 @@ public:
enum UpdateState {
US_IDLE,
US_LOCAL_CHECKPOINT,
+ US_LOCAL_CHECKPOINT_QUEUED,
US_REMOVE_NODE,
US_COPY_TAB_REQ,
US_ADD_TABLE_MASTER,
@@ -1609,6 +1614,11 @@ private:
Uint32 c_set_initial_start_flag;
Uint64 c_current_time; // Updated approx. every 10ms
+ /* Limit the number of concurrent table definition writes during LCP
+ * This avoids exhausting the DIH page pool
+ */
+ CountingSemaphore c_lcpTabDefWritesControl;
+
public:
enum LcpMasterTakeOverState {
LMTOS_IDLE = 0,
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2011-12-15 10:01:04 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2012-10-30 00:30:30 +0000
@@ -71,6 +71,8 @@ void Dbdih::initData()
c_set_initial_start_flag = FALSE;
c_sr_wait_to = false;
c_2pass_inr = false;
+
+ c_lcpTabDefWritesControl.init(MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES);
}//Dbdih::initData()
void Dbdih::initRecords()
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2012-08-28 11:49:01 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2012-10-30 00:39:20 +0000
@@ -1191,6 +1191,14 @@ void Dbdih::execFSWRITECONF(Signal* sign
break;
case FileRecord::TABLE_WRITE:
jam();
+ if (ERROR_INSERTED(7235))
+ {
+ jam();
+ filePtr.p->reqStatus = status;
+ /* Suspend processing of WRITECONFs */
+ sendSignalWithDelay(reference(), GSN_FSWRITECONF, signal, 1000, signal->getLength());
+ return;
+ }
tableWriteLab(signal, filePtr);
break;
default:
@@ -13628,10 +13636,26 @@ void Dbdih::execLCP_FRAG_REP(Signal* sig
*/
tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE;
- tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
- signal->theData[1] = tabPtr.i;
- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
+
+ /**
+ * Check whether we should write immediately, or queue...
+ */
+ if (c_lcpTabDefWritesControl.requestMustQueue())
+ {
+ jam();
+ //ndbout_c("DIH : Queueing tab def flush op on table %u", tabPtr.i);
+ /* Mark as queued - will be started when an already running op completes */
+ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT_QUEUED;
+ }
+ else
+ {
+ /* Run immediately */
+ jam();
+ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
+ signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
+ signal->theData[1] = tabPtr.i;
+ sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
+ }
bool ret = checkLcpAllTablesDoneInLqh(__LINE__);
if (ret && ERROR_INSERTED(7209))
@@ -14383,12 +14407,48 @@ void Dbdih::tableCloseLab(Signal* signal
case TabRecord::US_LOCAL_CHECKPOINT:
jam();
releaseTabPages(tabPtr.i);
- signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
- sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
+
+ /* Check whether there's some queued table definition flush op to start */
+ if (c_lcpTabDefWritesControl.releaseMustStartQueued())
+ {
+ jam();
+ /* Some table write is queued - let's kick it off */
+ /* First find it...
+ * By using the tabUpdateState to 'queue' operations, we lose
+ * the original flush request order, which shouldn't matter.
+ * In any case, the checkpoint proceeds by table id, as does this
+ * search, so a similar order should result
+ */
+ TabRecordPtr tabPtr;
+ for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
+ {
+ ptrAss(tabPtr, tabRecord);
+ if (tabPtr.p->tabUpdateState == TabRecord::US_LOCAL_CHECKPOINT_QUEUED)
+ {
+ jam();
+ //ndbout_c("DIH : Starting queued table def flush op on table %u", tabPtr.i);
+ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
+ signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
+ signal->theData[1] = tabPtr.i;
+ sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
+ return;
+ }
+ }
+ /* No queued table write found - error */
+ ndbout_c("DIH : Error in queued table writes : inUse %u queued %u total %u",
+ c_lcpTabDefWritesControl.inUse,
+ c_lcpTabDefWritesControl.queuedRequests,
+ c_lcpTabDefWritesControl.totalResources);
+ ndbrequire(false);
+ }
+ jam();
+ signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
+ sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
+
return;
break;
case TabRecord::US_REMOVE_NODE:
@@ -18089,6 +18149,39 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal
signal->theData[1] = ptr.i;
}
+ if (arg == DumpStateOrd::DihDumpPageRecInfo)
+ {
+ jam();
+ ndbout_c("MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES %u", MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES);
+ ndbout_c("MAX_CONCURRENT_DIH_TAB_DEF_OPS %u", MAX_CONCURRENT_DIH_TAB_DEF_OPS);
+ ndbout_c("MAX_CRASHED_REPLICAS %u", MAX_CRASHED_REPLICAS);
+ ndbout_c("MAX_LCP_STORED %u", MAX_LCP_STORED);
+ ndbout_c("MAX_REPLICAS %u", MAX_REPLICAS);
+ ndbout_c("MAX_NDB_PARTITIONS %u", MAX_NDB_PARTITIONS);
+ ndbout_c("PACK_REPLICAS_WORDS %u", PACK_REPLICAS_WORDS);
+ ndbout_c("PACK_FRAGMENT_WORDS %u", PACK_FRAGMENT_WORDS);
+ ndbout_c("PACK_TABLE_WORDS %u", PACK_TABLE_WORDS);
+ ndbout_c("PACK_TABLE_PAGE_WORDS %u", PACK_TABLE_PAGE_WORDS);
+ ndbout_c("PACK_TABLE_PAGES %u", PACK_TABLE_PAGES);
+ ndbout_c("ZPAGEREC %u", ZPAGEREC);
+ ndbout_c("Total bytes : %lu", ZPAGEREC * sizeof(PageRecord));
+ ndbout_c("LCP Tab def write ops inUse %u queued %u",
+ c_lcpTabDefWritesControl.inUse,
+ c_lcpTabDefWritesControl.queuedRequests);
+ Uint32 freeCount = 0;
+ PageRecordPtr tmp;
+ tmp.i = cfirstfreepage;
+ while (tmp.i != RNIL)
+ {
+ jam();
+ ptrCheckGuard(tmp, cpageFileSize, pageRecord);
+ freeCount++;
+ tmp.i = tmp.p->nextfreepage;
+ };
+ ndbout_c("Pages in use %u/%u", cpageFileSize - freeCount, cpageFileSize);
+ return;
+ }
+
if (arg == DumpStateOrd::SchemaResourceSnapshot)
{
RSS_OP_SNAPSHOT_SAVE(cremainingfrags);
=== added file 'storage/ndb/src/kernel/vm/CountingSemaphore.hpp'
--- a/storage/ndb/src/kernel/vm/CountingSemaphore.hpp 1970-01-01 00:00:00 +0000
+++ b/storage/ndb/src/kernel/vm/CountingSemaphore.hpp 2012-10-29 18:34:05 +0000
@@ -0,0 +1,161 @@
+/*
+ Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef COUNTING_SEMAPHORE_HPP
+#define COUNTING_SEMAPHORE_HPP
+
+/**
+ * CountingSemaphore
+ *
+ * Helper for limiting concurrency on some resources.
+ * The Semaphore is created with some maximum concurrency level
+ * Up to this many resources may be concurrently used.
+ * When more than this number of resources are used concurrently,
+ * further requests must queue until a resource is released.
+ *
+ * This structure does not manage queueing and restarting of
+ * resource allocation requests, just monitors the number of
+ * resources in use, and the number of resource requests
+ * queued up.
+ *
+ * To be useful, some external request queueing and dequeuing
+ * mechanism is required.
+ */
+ class CountingSemaphore
+ {
+ public:
+ CountingSemaphore():
+ inUse(0),
+ queuedRequests(0),
+ totalResources(1)
+ {};
+
+ ~CountingSemaphore() {};
+
+ /**
+ * init
+ * Initialise the totalResources
+ */
+ void init(Uint32 _totalResources)
+ {
+ assert(inUse == 0);
+ totalResources = _totalResources;
+ }
+
+ /**
+ * requestMustQueue
+ *
+ * Part of semaphore P()/acquire()/down() implementation
+ *
+ * Called to request a resource.
+ * Returns whether the request must be queued, or
+ * can be satisfied immediately.
+ *
+ * true - no resource available, queue request.
+ * false - resource available, proceed.
+ *
+ * e.g. if (<sema>.requestMustQueue()) {
+ * queue_request;
+ * return;
+ * }
+ *
+ * proceed;
+ */
+ bool requestMustQueue()
+ {
+ assert(inUse <= totalResources);
+ if (inUse == totalResources)
+ {
+ queuedRequests++;
+ return true;
+ }
+ else
+ {
+ assert(queuedRequests == 0);
+ inUse++;
+ return false;
+ }
+ }
+
+ /**
+ * releaseMustStartQueued
+ *
+ * Part of semaphore V()/release()/up()
+ *
+ * Called to release a resource.
+ * Returns whether some queued resource request
+ * must be restarted.
+ *
+ * true - a queued request exists and must be started.
+ * false - no queued request exists, proceed.
+ *
+ * e.g.
+ * if (<sema>.releaseMustStartQueued()) {
+ * dequeue_request;
+ * begin_request_processing;
+ * }
+ *
+ * proceed;
+ */
+ bool releaseMustStartQueued()
+ {
+ assert(inUse > 0);
+ if (queuedRequests > 0)
+ {
+ assert(inUse == totalResources);
+ queuedRequests--;
+ return true;
+ }
+
+ inUse--;
+ return false;
+ }
+
+ /**
+ * getTotalRequests
+ *
+ * Returns the sum of the inuse resources and queued requests.
+ * e.g. the offered concurrency on the resource.
+ */
+ Uint32 getTotalRequests() const
+ {
+ return inUse + queuedRequests;
+ }
+
+ /**
+ * getResourcesAvailable()
+ *
+ * Returns the number of resources available currently
+ */
+ Uint32 getResourcesAvailable() const
+ {
+ assert(inUse <= totalResources);
+ return (totalResources - inUse);
+ }
+
+
+ /* inUse - number resources currently in use */
+ Uint32 inUse;
+
+ /* queuedRequests - number requests waiting 'outside' */
+ Uint32 queuedRequests;
+
+ /* totalResources - the maximum resources in use at one time */
+ Uint32 totalResources;
+ }; /* CountingSemaphore */
+
+#endif
=== modified file 'storage/ndb/test/ndbapi/testLimits.cpp'
--- a/storage/ndb/test/ndbapi/testLimits.cpp 2011-06-30 15:59:25 +0000
+++ b/storage/ndb/test/ndbapi/testLimits.cpp 2012-10-30 00:30:30 +0000
@@ -1,5 +1,4 @@
-/* Copyright (c) 2008 MySQL AB, 2008, 2009 Sun Microsystems, Inc.
- Use is subject to license terms.
+/* Copyright (c) 2008-20012 Oracle and/or its affiliates. All rights reserved
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -1041,6 +1040,329 @@ int testDropSignalFragments(NDBT_Context
return NDBT_OK;
}
+int create100Tables(NDBT_Context* ctx, NDBT_Step* step)
+{
+ Ndb* pNdb = GETNDB(step);
+ const NdbDictionary::Table* pTab= ctx->getTab();
+
+ /* Run as a 'T1' testcase - do nothing for other tables */
+ if (strcmp(pTab->getName(), "T1") != 0)
+ return NDBT_OK;
+
+ for (Uint32 t=0; t < 100; t++)
+ {
+ char tabnameBuff[10];
+ snprintf(tabnameBuff, sizeof(tabnameBuff), "TAB%u", t);
+
+ NdbDictionary::Table tab;
+ tab.setName(tabnameBuff);
+ NdbDictionary::Column pk;
+ pk.setName("PK");
+ pk.setType(NdbDictionary::Column::Varchar);
+ pk.setLength(20);
+ pk.setNullable(false);
+ pk.setPrimaryKey(true);
+ tab.addColumn(pk);
+
+ pNdb->getDictionary()->dropTable(tab.getName());
+ if(pNdb->getDictionary()->createTable(tab) != 0)
+ {
+ ndbout << "Create table failed with error : "
+ << pNdb->getDictionary()->getNdbError().code
+ << " "
+ << pNdb->getDictionary()->getNdbError().message
+ << endl;
+ return NDBT_FAILED;
+ }
+
+ ndbout << "Created table " << tabnameBuff << endl;
+ }
+
+ return NDBT_OK;
+}
+
+int drop100Tables(NDBT_Context* ctx, NDBT_Step* step)
+{
+ Ndb* pNdb = GETNDB(step);
+ const NdbDictionary::Table* pTab= ctx->getTab();
+
+ /* Run as a 'T1' testcase - do nothing for other tables */
+ if (strcmp(pTab->getName(), "T1") != 0)
+ return NDBT_OK;
+
+ for (Uint32 t=0; t < 100; t++)
+ {
+ char tabnameBuff[10];
+ snprintf(tabnameBuff, sizeof(tabnameBuff), "TAB%u", t);
+
+ if (pNdb->getDictionary()->dropTable(tabnameBuff) != 0)
+ {
+ ndbout << "Drop table failed with error : "
+ << pNdb->getDictionary()->getNdbError().code
+ << " "
+ << pNdb->getDictionary()->getNdbError().message
+ << endl;
+ }
+ else
+ {
+ ndbout << "Dropped table " << tabnameBuff << endl;
+ }
+ }
+
+ return NDBT_OK;
+}
+
+int dropTable(NDBT_Context* ctx, NDBT_Step* step, Uint32 num)
+{
+ Ndb* pNdb = GETNDB(step);
+ const NdbDictionary::Table* pTab= ctx->getTab();
+
+ /* Run as a 'T1' testcase - do nothing for other tables */
+ if (strcmp(pTab->getName(), "T1") != 0)
+ return NDBT_OK;
+
+ char tabnameBuff[10];
+ snprintf(tabnameBuff, sizeof(tabnameBuff), "TAB%u", num);
+
+ if (pNdb->getDictionary()->dropTable(tabnameBuff) != 0)
+ {
+ ndbout << "Drop table failed with error : "
+ << pNdb->getDictionary()->getNdbError().code
+ << " "
+ << pNdb->getDictionary()->getNdbError().message
+ << endl;
+ }
+ else
+ {
+ ndbout << "Dropped table " << tabnameBuff << endl;
+ }
+
+ return NDBT_OK;
+}
+
+
+enum Scenarios
+{
+// NORMAL, // Commented to save some time.
+ DROP_TABLE,
+ RESTART_MASTER,
+ RESTART_SLAVE,
+ NUM_SCENARIOS
+};
+
+
+enum Tasks
+{
+ WAIT = 0,
+ DROP_TABLE_REQ = 1,
+ MASTER_RESTART_REQ = 2,
+ SLAVE_RESTART_REQ = 3
+};
+
+int testWorker(NDBT_Context* ctx, NDBT_Step* step)
+{
+ /* Run as a 'T1' testcase - do nothing for other tables */
+ if (strcmp(ctx->getTab()->getName(), "T1") != 0)
+ return NDBT_OK;
+
+ /* Worker step to run in a separate thread for
+ * blocking activities
+ * Generally the blocking of the DIH table definition flush
+ * blocks the completion of the drop table/node restarts,
+ * so this must be done in a separate thread to avoid
+ * deadlocks.
+ */
+
+ while (!ctx->isTestStopped())
+ {
+ ndbout_c("Worker : waiting for request...");
+ ctx->getPropertyWait("DIHWritesRequest", 1);
+
+ if (!ctx->isTestStopped())
+ {
+ Uint32 req = ctx->getProperty("DIHWritesRequestType", (Uint32)0);
+
+ switch ((Tasks) req)
+ {
+ case DROP_TABLE_REQ:
+ {
+ /* Drop table */
+ ndbout_c("Worker : dropping table");
+ if (dropTable(ctx, step, 2) != NDBT_OK)
+ {
+ return NDBT_FAILED;
+ }
+ ndbout_c("Worker : table dropped.");
+ break;
+ }
+ case MASTER_RESTART_REQ:
+ {
+ ndbout_c("Worker : restarting Master");
+
+ NdbRestarter restarter;
+ int master_nodeid = restarter.getMasterNodeId();
+ ndbout_c("Worker : Restarting Master (%d)...", master_nodeid);
+ if (restarter.restartOneDbNode2(master_nodeid,
+ NdbRestarter::NRRF_NOSTART |
+ NdbRestarter::NRRF_FORCE |
+ NdbRestarter::NRRF_ABORT) ||
+ restarter.waitNodesNoStart(&master_nodeid, 1) ||
+ restarter.startAll())
+ {
+ ndbout_c("Worker : Error restarting Master.");
+ return NDBT_FAILED;
+ }
+ ndbout_c("Worker : Waiting for master to recover...");
+ if (restarter.waitNodesStarted(&master_nodeid, 1))
+ {
+ ndbout_c("Worker : Error waiting for Master restart");
+ return NDBT_FAILED;
+ }
+ ndbout_c("Worker : Master recovered.");
+ break;
+ }
+ case SLAVE_RESTART_REQ:
+ {
+ NdbRestarter restarter;
+ int slave_nodeid = restarter.getRandomNotMasterNodeId(rand());
+ ndbout_c("Worker : Restarting non-master (%d)...", slave_nodeid);
+ if (restarter.restartOneDbNode2(slave_nodeid,
+ NdbRestarter::NRRF_NOSTART |
+ NdbRestarter::NRRF_FORCE |
+ NdbRestarter::NRRF_ABORT) ||
+ restarter.waitNodesNoStart(&slave_nodeid, 1) ||
+ restarter.startAll())
+ {
+ ndbout_c("Worker : Error restarting Slave.");
+ return NDBT_FAILED;
+ }
+ ndbout_c("Worker : Waiting for slave to recover...");
+ if (restarter.waitNodesStarted(&slave_nodeid, 1))
+ {
+ ndbout_c("Worker : Error waiting for Slave restart");
+ return NDBT_FAILED;
+ }
+ ndbout_c("Worker : Slave recovered.");
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+ }
+ ctx->setProperty("DIHWritesRequestType", (Uint32) 0);
+ ctx->setProperty("DIHWritesRequest", (Uint32) 2);
+ }
+
+ ndbout_c("Worker, done.");
+ return NDBT_OK;
+}
+
+int testSlowDihFileWrites(NDBT_Context* ctx, NDBT_Step* step)
+{
+ /* Testcase checks behaviour with slow flushing of DIH table definitions
+ * This caused problems in the past by exhausting the DIH page pool
+ * Now there's a concurrent operations limit.
+ * Check that it behaves with many queued ops, parallel drop/node restarts
+ */
+
+ /* Run as a 'T1' testcase - do nothing for other tables */
+ if (strcmp(ctx->getTab()->getName(), "T1") != 0)
+ return NDBT_OK;
+
+ /* 1. Activate slow write error insert
+ * 2. Trigger LCP
+ * 3. Wait some time, periodically producing info on
+ * the internal state
+ * 4. Perform some parallel action (drop table/node restarts)
+ * 5. Wait some time, periodically producing info on
+ * the internal state
+ * 6. Clear the error insert
+ * 7. Wait a little longer
+ * 8. Done.
+ */
+ NdbRestarter restarter;
+
+ for (Uint32 scenario = 0; scenario < NUM_SCENARIOS; scenario++)
+ {
+ ndbout_c("Inserting error 7235");
+ restarter.insertErrorInAllNodes(7235);
+
+ ndbout_c("Triggering LCP");
+ int dumpArg = 7099;
+ restarter.dumpStateAllNodes(&dumpArg, 1);
+
+ const Uint32 periodSeconds = 10;
+ Uint32 waitPeriods = 6;
+ dumpArg = 7032;
+
+ for (Uint32 p=0; p<waitPeriods; p++)
+ {
+ if (p == 3)
+ {
+ switch ((Scenarios) scenario)
+ {
+ case DROP_TABLE:
+ {
+ /* Drop one of the early-created tables */
+ ndbout_c("Requesting DROP TABLE");
+ ctx->setProperty("DIHWritesRequestType", (Uint32) DROP_TABLE_REQ);
+ ctx->setProperty("DIHWritesRequest", (Uint32) 1);
+ break;
+ }
+ case RESTART_MASTER:
+ {
+ ndbout_c("Requesting Master restart");
+ ctx->setProperty("DIHWritesRequestType", (Uint32) MASTER_RESTART_REQ);
+ ctx->setProperty("DIHWritesRequest", (Uint32) 1);
+
+ break;
+ }
+ case RESTART_SLAVE:
+ {
+ ndbout_c("Requesting Slave restart");
+ ctx->setProperty("DIHWritesRequestType", (Uint32) SLAVE_RESTART_REQ);
+ ctx->setProperty("DIHWritesRequest", (Uint32) 1);
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ ndbout_c("Dumping DIH page info to ndbd stdout");
+ restarter.dumpStateAllNodes(&dumpArg, 1);
+ NdbSleep_MilliSleep(periodSeconds * 1000);
+ }
+
+ ndbout_c("Clearing error insert...");
+ restarter.insertErrorInAllNodes(0);
+
+ waitPeriods = 2;
+ for (Uint32 p=0; p<waitPeriods; p++)
+ {
+ ndbout_c("Dumping DIH page info to ndbd stdout");
+ restarter.dumpStateAllNodes(&dumpArg, 1);
+ NdbSleep_MilliSleep(periodSeconds * 1000);
+ }
+
+ ndbout_c("Waiting for worker to finish task...");
+ ctx->getPropertyWait("DIHWritesRequest", 2);
+
+ if (ctx->isTestStopped())
+ return NDBT_OK;
+
+ ndbout_c("Done.");
+ }
+
+ /* Finish up */
+ ctx->stopTest();
+
+ return NDBT_OK;
+}
+
NDBT_TESTSUITE(testLimits);
@@ -1063,6 +1385,15 @@ TESTCASE("DropSignalFragments",
INITIALIZER(testDropSignalFragments);
}
+TESTCASE("SlowDihFileWrites",
+ "Test behaviour of slow Dih table file writes")
+{
+ INITIALIZER(create100Tables);
+ STEP(testWorker);
+ STEP(testSlowDihFileWrites);
+ FINALIZER(drop100Tables);
+}
+
NDBT_TESTSUITE_END(testLimits);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2012-10-24 10:05:26 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2012-10-30 00:30:30 +0000
@@ -1883,3 +1883,7 @@ max-time : 300
cmd: testDict
args: -n Bug14645319 T1
+max-time : 1200
+cmd: testLimits
+args: -n SlowDihFileWrites T1
+
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-7.1 branch (frazer.clement:4633 to 4634) | Frazer Clement | 16 Nov |