#At file:///home/jonas/src/telco-6.2/
2760 Jonas Oreland 2008-12-03
ndb - bug#41214 - incorrectly handled commit ack marker
during TC-take-over could lead to subsequent api-failure-handling
not completing
modified:
storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
storage/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
storage/ndb/test/ndbapi/testNodeRestart.cpp
storage/ndb/test/run-test/daily-devel-tests.txt
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2008-09-19 21:49:00 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2008-12-03 19:43:30 +0000
@@ -1394,7 +1394,7 @@ private:
TcConnectRecord * const regTcPtr);
void sendCompleteLqh(Signal* signal,
TcConnectRecord * const regTcPtr);
- void sendTCKEY_FAILREF(Signal* signal, const ApiConnectRecord *);
+ void sendTCKEY_FAILREF(Signal* signal, ApiConnectRecord *);
void sendTCKEY_FAILCONF(Signal* signal, ApiConnectRecord *);
void routeTCKEY_FAILREFCONF(Signal* signal, const ApiConnectRecord *,
Uint32 gsn, Uint32 len);
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp 2008-08-11 10:41:11 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcInit.cpp 2008-12-03 19:43:30 +0000
@@ -297,14 +297,15 @@ Dbtc::Dbtc(Block_context& ctx):
#ifdef VM_TRACE
{
void* tmp[] = { &apiConnectptr,
- &tcConnectptr,
- &cachePtr,
- &attrbufptr,
- &hostptr,
- &timeOutptr,
- &scanFragptr,
- &databufptr,
- &tmpDatabufptr };
+ &tcConnectptr,
+ &cachePtr,
+ &attrbufptr,
+ &hostptr,
+ &timeOutptr,
+ &scanFragptr,
+ &databufptr,
+ &tmpDatabufptr,
+ &tcNodeFailptr };
init_globals_list(tmp, sizeof(tmp)/sizeof(tmp[0]));
}
#endif
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-12-03 19:43:30 +0000
@@ -4796,7 +4796,6 @@ void Dbtc::copyApi(ApiConnectRecordPtr c
UintR Tlqhkeyconfrec = regApiPtr.p->lqhkeyconfrec;
UintR TgcpPointer = regApiPtr.p->gcpPointer;
UintR TgcpFilesize = cgcpFilesize;
- UintR TcommitAckMarker = regApiPtr.p->commitAckMarker;
NdbNodeBitmask Tnodes = regApiPtr.p->m_transaction_nodes;
GcpRecord *localGcpRecord = gcpRecord;
@@ -4807,7 +4806,7 @@ void Dbtc::copyApi(ApiConnectRecordPtr c
copyPtr.p->transid[0] = Ttransid1;
copyPtr.p->transid[1] = Ttransid2;
copyPtr.p->lqhkeyconfrec = Tlqhkeyconfrec;
- copyPtr.p->commitAckMarker = TcommitAckMarker;
+ copyPtr.p->commitAckMarker = RNIL;
copyPtr.p->m_transaction_nodes = Tnodes;
copyPtr.p->singleUserMode = 0;
@@ -5134,6 +5133,7 @@ void Dbtc::releaseApiConCopy(Signal* sig
regApiPtr->nextApiConnect = TfirstfreeApiConnectCopyOld;
setApiConTimer(apiConnectptr.i, 0, __LINE__);
regApiPtr->apiConnectstate = CS_RESTART;
+ ndbrequire(regApiPtr->commitAckMarker == RNIL);
}//Dbtc::releaseApiConCopy()
/* ========================================================================= */
@@ -7494,6 +7494,10 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* s
if (signal->getSendersBlockRef() != reference())
{
jam();
+
+ tcNodeFailptr.i = 0;
+ ptrAss(tcNodeFailptr, tcFailRecord);
+
/**
* Node should be in queue
*/
@@ -7918,7 +7922,7 @@ void Dbtc::completeTransAtTakeOverDoOne(
}//Dbtc::completeTransAtTakeOverDoOne()
void
-Dbtc::sendTCKEY_FAILREF(Signal* signal, const ApiConnectRecord * regApiPtr){
+Dbtc::sendTCKEY_FAILREF(Signal* signal, ApiConnectRecord * regApiPtr){
jam();
const Uint32 ref = regApiPtr->ndbapiBlockref;
@@ -7941,6 +7945,14 @@ Dbtc::sendTCKEY_FAILREF(Signal* signal,
routeTCKEY_FAILREFCONF(signal, regApiPtr, GSN_TCKEY_FAILREF, 3);
}
}
+
+ const Uint32 marker = regApiPtr->commitAckMarker;
+ if(marker != RNIL)
+ {
+ jam();
+ m_commitAckMarkerHash.release(marker);
+ regApiPtr->commitAckMarker = RNIL;
+ }
}
void
@@ -8168,17 +8180,6 @@ void Dbtc::toAbortHandlingLab(Signal* si
if (apiConnectptr.p->takeOverRec != (Uint8)Z8NIL) {
jam();
sendTCKEY_FAILREF(signal, apiConnectptr.p);
- const Uint32 marker = apiConnectptr.p->commitAckMarker;
- if(marker != RNIL){
- jam();
-
- CommitAckMarkerPtr tmp;
- tmp.i = marker;
- tmp.p = m_commitAckMarkerHash.getPtr(tmp.i);
-
- m_commitAckMarkerHash.release(tmp);
- apiConnectptr.p->commitAckMarker = RNIL;
- }
/*------------------------------------------------------------*/
/* WE HAVE COMPLETED THIS TRANSACTION NOW AND CAN */
@@ -11007,6 +11008,7 @@ void Dbtc::releaseApiConnectFail(Signal*
setApiConTimer(apiConnectptr.i, 0, __LINE__);
apiConnectptr.p->nextApiConnect = cfirstfreeApiConnectFail;
cfirstfreeApiConnectFail = apiConnectptr.i;
+ ndbrequire(apiConnectptr.p->commitAckMarker == RNIL);
}//Dbtc::releaseApiConnectFail()
void Dbtc::releaseKeys()
=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-08-20 20:29:47 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-12-03 19:43:30 +0000
@@ -264,6 +264,9 @@ int runRestarter(NDBT_Context* ctx, NDBT
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int sync_threads = ctx->getProperty("SyncThreads", (unsigned)0);
+ int sleep0 = ctx->getProperty("Sleep0", (unsigned)0);
+ int sleep1 = ctx->getProperty("Sleep1", (unsigned)0);
+ int randnode = ctx->getProperty("RandNode", (unsigned)0);
NdbRestarter restarter;
int i = 0;
int lastId = 0;
@@ -282,6 +285,10 @@ int runRestarter(NDBT_Context* ctx, NDBT
while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped()){
int id = lastId % restarter.getNumDbNodes();
+ if (randnode == 1)
+ {
+ id = rand() % restarter.getNumDbNodes();
+ }
int nodeId = restarter.getDbNodeId(id);
ndbout << "Restart node " << nodeId << endl;
if(restarter.restartOneDbNode(nodeId, false, true, true) != 0){
@@ -297,6 +304,9 @@ int runRestarter(NDBT_Context* ctx, NDBT
break;
}
+ if (sleep1)
+ NdbSleep_MilliSleep(sleep1);
+
if (restarter.startNodes(&nodeId, 1))
{
g_err << "Failed to start node" << endl;
@@ -310,6 +320,9 @@ int runRestarter(NDBT_Context* ctx, NDBT
break;
}
+ if (sleep0)
+ NdbSleep_MilliSleep(sleep0);
+
ctx->sync_up_and_wait("PauseThreads", sync_threads);
lastId++;
@@ -2913,6 +2926,65 @@ loop2:
return NDBT_OK;
}
+int
+runHammer(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int result = NDBT_OK;
+ int records = ctx->getNumRecords();
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ while (!ctx->isTestStopped())
+ {
+ int r = rand() % records;
+ if (hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if ((rand() % 100) < 50)
+ {
+ if (hugoOps.pkUpdateRecord(pNdb, r, 1, rand()) != 0)
+ goto err;
+ }
+ else
+ {
+ if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0)
+ goto err;
+ }
+
+ if (hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if (hugoOps.pkDeleteRecord(pNdb, r, 1) != 0)
+ goto err;
+
+ if (hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if ((rand() % 100) < 50)
+ {
+ if (hugoOps.pkInsertRecord(pNdb, r, 1, rand()) != 0)
+ goto err;
+ }
+ else
+ {
+ if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0)
+ goto err;
+ }
+
+ if ((rand() % 100) < 90)
+ {
+ hugoOps.execute_Commit(pNdb);
+ }
+ else
+ {
+ err:
+ hugoOps.execute_Rollback(pNdb);
+ }
+
+ hugoOps.closeTransaction(pNdb);
+ }
+ return NDBT_OK;
+}
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -3343,6 +3415,15 @@ TESTCASE("Bug36245", ""){
STEP(runBug36245);
VERIFIER(runClearTable);
}
+TESTCASE("NF_Hammer", ""){
+ TC_PROPERTY("Sleep0", 9000);
+ TC_PROPERTY("Sleep1", 3000);
+ TC_PROPERTY("Rand", 1);
+ INITIALIZER(runLoadTable);
+ STEPS(runHammer, 25);
+ STEP(runRestarter);
+ VERIFIER(runClearTable);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/run-test/daily-devel-tests.txt'
--- a/storage/ndb/test/run-test/daily-devel-tests.txt 2008-08-30 05:13:47 +0000
+++ b/storage/ndb/test/run-test/daily-devel-tests.txt 2008-12-03 19:43:30 +0000
@@ -133,6 +133,10 @@ max-time: 2500
cmd: testNodeRestart
args: -n FiftyPercentStopAndWait T6 T13
+max-time: 2500
+cmd: testNodeRestart
+args: -n NF_Hammer -r 5 T1
+
#max-time: 500
#cmd: testNodeRestart
#args: -n StopOnError T1
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (jonas:2760) Bug#41214 | Jonas Oreland | 3 Dec |