2764 Jonas Oreland 2008-12-08
ndb - bug#41295 bug#41296 bug#41297
modified:
storage/ndb/src/kernel/blocks/ERROR_codes.txt
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
storage/ndb/test/include/NdbRestarter.hpp
storage/ndb/test/ndbapi/testNodeRestart.cpp
storage/ndb/test/run-test/daily-basic-tests.txt
storage/ndb/test/src/NdbRestarter.cpp
2763 Jonas Oreland 2008-12-03
ndb autotest - remove extra empty lines
modified:
storage/ndb/test/run-test/daily-basic-tests.txt
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-08-11 10:41:11 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-12-08 12:35:55 +0000
@@ -6,7 +6,7 @@ Next DBTUP 4029
Next DBLQH 5051
Next DBDICT 6008
Next DBDIH 7215
-Next DBTC 8064
+Next DBTC 8074
Next CMVMI 9000
Next BACKUP 10041
Next DBUTIL 11002
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2008-08-11 10:41:11 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2008-12-08 12:35:55 +0000
@@ -5188,16 +5188,32 @@ void Dbdih::checkGcpOutstanding(Signal*
GCPPrepareConf::SignalLength, JBB);
}//if
- if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId)) {
+ if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId))
+ {
jam();
- GCPNodeFinished* conf = (GCPNodeFinished*)signal->getDataPtrSend();
- conf->nodeId = failedNodeId;
- conf->gci_hi = Uint32(m_micro_gcp.m_old_gci >> 32);
- conf->gci_lo = Uint32(m_micro_gcp.m_old_gci);
- conf->failno = cfailurenr;
- sendSignal(reference(), GSN_GCP_NODEFINISH, signal,
- GCPNodeFinished::SignalLength, JBB);
- }//if
+
+ /**
+ * Waiting for GSN_GCP_NODEFINISH
+ * TC-take-over can generate new transactions
+ * that will be in this epoch
+ * re-run GCP_NOMORETRANS to master-TC (self) that will run
+ * take-over
+ */
+ c_GCP_COMMIT_Counter.clearWaitingFor(failedNodeId);
+ if (!c_GCP_COMMIT_Counter.isWaitingFor(getOwnNodeId()))
+ {
+ jam();
+ c_GCP_COMMIT_Counter.setWaitingFor(getOwnNodeId());
+ m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
+ }
+
+ GCPNoMoreTrans* req = (GCPNoMoreTrans*)signal->getDataPtrSend();
+ req->senderData = m_micro_gcp.m_master_ref;
+ req->gci_hi = m_micro_gcp.m_old_gci >> 32;
+ req->gci_lo = m_micro_gcp.m_old_gci & 0xFFFFFFFF;
+ sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
+ GCPNoMoreTrans::SignalLength, JBB);
+ }
if (c_GCP_SAVEREQ_Counter.isWaitingFor(failedNodeId)) {
jam();
@@ -15589,7 +15605,6 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal
SET_ERROR_INSERT_VALUE2(7214, signal->theData[1]);
return;
}
-
}//Dbdih::execDUMP_STATE_ORD()
void
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2008-11-14 11:17:53 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2008-12-08 12:35:55 +0000
@@ -944,7 +944,9 @@ public:
typedef Ptr<GcpRecord> GcpRecordPtr;
struct HostRecord {
- bool inPackedList;
+ Uint8 inPackedList;
+ Uint8 nodestatus;
+ Uint8 _unused[2];
UintR noOfPackedWordsLqh;
UintR packedWordsLqh[30];
UintR noOfPackedWordsTc;
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp 2007-11-19 10:04:24 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp 2008-12-08 12:35:55 +0000
@@ -63,6 +63,10 @@ void Dblqh::initData()
m_backup_ptr = RNIL;
clogFileSize = 16;
cmaxLogFilesInPageZero = 40;
+
+ for (Uint32 i = 0; i < 1024; i++) {
+ ctransidHash[i] = RNIL;
+ }//for
}//Dblqh::initData()
void Dblqh::initRecords()
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2008-11-14 11:17:53 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2008-12-08 12:35:55 +0000
@@ -496,6 +496,14 @@ void Dblqh::execINCL_NODEREQ(Signal* sig
cnodeStatus[i] = ZNODE_UP;
}//if
}//for
+
+ {
+ HostRecordPtr Thostptr;
+ Thostptr.i = nodeId;
+ ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+ Thostptr.p->nodestatus = ZNODE_UP;
+ }
+
signal->theData[0] = nodeId;
signal->theData[1] = cownref;
sendSignal(retRef, GSN_INCL_NODECONF, signal, 2, JBB);
@@ -712,6 +720,7 @@ void Dblqh::startphase1Lab(Signal* signa
ThostPtr.p->inPackedList = false;
ThostPtr.p->noOfPackedWordsLqh = 0;
ThostPtr.p->noOfPackedWordsTc = 0;
+ ThostPtr.p->nodestatus = ZNODE_DOWN;
}//for
cpackedListIndex = 0;
sendNdbSttorryLab(signal);
@@ -898,6 +907,14 @@ void Dblqh::execREAD_NODESCONF(Signal* s
jam();
cnodeData[ind] = i;
cnodeStatus[ind] = NdbNodeBitmask::get(readNodes->inactiveNodes, i);
+
+ {
+ HostRecordPtr Thostptr;
+ Thostptr.i = i;
+ ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+ Thostptr.p->nodestatus = cnodeStatus[ind];
+ }
+
//readNodes->getVersionId(i, readNodes->theVersionIds) not used
if (!NodeBitmask::get(readNodes->inactiveNodes, i))
{
@@ -2280,7 +2297,9 @@ void Dblqh::noFreeRecordLab(Signal* sign
const Uint32 reqInfo = lqhKeyReq->requestInfo;
if(errCode == ZNO_FREE_MARKER_RECORDS_ERROR ||
- errCode == ZNODE_SHUTDOWN_IN_PROGESS){
+ errCode == ZNODE_SHUTDOWN_IN_PROGESS ||
+ errCode == ZNODE_FAILURE_ERROR){
+ jam();
releaseTcrec(signal, tcConnectptr);
}
@@ -3503,6 +3522,17 @@ void Dblqh::execLQHKEYREQ(Signal* signal
noFreeRecordLab(signal, lqhKeyReq, ZNODE_SHUTDOWN_IN_PROGESS);
return;
}
+
+ {
+ HostRecordPtr Thostptr;
+ Thostptr.i = refToNode(sig5); // TC-ref
+ ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+ if (unlikely(Thostptr.p->nodestatus != ZNODE_UP))
+ {
+ noFreeRecordLab(signal, lqhKeyReq, ZNODE_FAILURE_ERROR);
+ return;
+ }
+ }
Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
@@ -7468,6 +7498,22 @@ void Dblqh::continueAbortLab(Signal* sig
void Dblqh::continueAfterLogAbortWriteLab(Signal* signal)
{
TcConnectionrec * const regTcPtr = tcConnectptr.p;
+
+ const Uint32 commitAckMarker = regTcPtr->commitAckMarker;
+ if(commitAckMarker != RNIL)
+ {
+ jam();
+#ifdef MARKER_TRACE
+ {
+ CommitAckMarkerPtr tmp;
+ m_commitAckMarkerHash.getPtr(tmp, commitAckMarker);
+ ndbout_c("Ab2 marker[%.8x %.8x]", tmp.p->transid1, tmp.p->transid2);
+ }
+#endif
+ m_commitAckMarkerHash.release(commitAckMarker);
+ regTcPtr->commitAckMarker = RNIL;
+ }
+
if (regTcPtr->operation == ZREAD && regTcPtr->dirtyOp)
{
jam();
@@ -7591,6 +7637,14 @@ void Dblqh::execNODE_FAILREP(Signal* sig
ndbrequire(cnoOfNodes - 1 < MAX_NDB_NODES);
for (i = 0; i < TnoOfNodes; i++) {
const Uint32 nodeId = Tdata[i];
+
+ {
+ HostRecordPtr Thostptr;
+ Thostptr.i = nodeId;
+ ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+ Thostptr.p->nodestatus = ZNODE_DOWN;
+ }
+
lcpPtr.p->m_EMPTY_LCP_REQ.clear(nodeId);
for (Uint32 j = 0; j < cnoOfNodes; j++) {
@@ -17196,9 +17250,6 @@ void Dblqh::initialiseRecordsLab(Signal*
m_sr_nodes.clear();
m_sr_exec_sr_req.clear();
m_sr_exec_sr_conf.clear();
- for (i = 0; i < 1024; i++) {
- ctransidHash[i] = RNIL;
- }//for
for (i = 0; i < 4; i++) {
cactiveCopy[i] = RNIL;
}//for
@@ -18493,6 +18544,18 @@ void Dblqh::sendLqhTransconf(Signal* sig
signal->theData[0] = ZLQH_TRANS_NEXT;
signal->theData[1] = tcNodeFailptr.i;
sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
+
+ if (0)
+ {
+ ndbout_c("sending LQH_TRANSCONF %u transid: H'%.8x, H'%.8x op: %u state: %u(%u) marker: %u",
+ tcConnectptr.i,
+ tcConnectptr.p->transid[0],
+ tcConnectptr.p->transid[1],
+ tcConnectptr.p->operation,
+ tcConnectptr.p->transactionState,
+ stat,
+ tcConnectptr.p->commitAckMarker);
+ }
}//Dblqh::sendLqhTransconf()
/* --------------------------------------------------------------------------
@@ -18887,7 +18950,10 @@ Dblqh::validate_filter(Signal* signal)
if (start == end)
{
infoEvent("No filter specified, not listing...");
- return false;
+ if (!ERROR_INSERTED(4002))
+ return false;
+ else
+ return true;
}
while(start < end)
@@ -19079,7 +19145,7 @@ Dblqh::match_and_print(Signal* signal, P
char buf[100];
BaseString::snprintf(buf, sizeof(buf),
"OP[%u]: Tab: %d frag: %d TC: %u API: %d(0x%x)"
- "transid: 0x%x 0x%x op: %s state: %s",
+ "transid: H'%.8x H'%.8x op: %s state: %s",
tcRec.i,
tcRec.p->tableref,
tcRec.p->fragmentid,
@@ -19090,7 +19156,10 @@ Dblqh::match_and_print(Signal* signal, P
op,
state);
- infoEvent(buf);
+ if (!ERROR_INSERTED(4002))
+ infoEvent(buf);
+ else
+ ndbout_c(buf);
memcpy(signal->theData, temp, 4*len);
return true;
@@ -19115,7 +19184,7 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
CommitAckMarkerIterator iter;
for(m_commitAckMarkerHash.first(iter); iter.curr.i != RNIL;
m_commitAckMarkerHash.next(iter)){
- infoEvent("CommitAckMarker: i = %d (0x%x, 0x%x)"
+ infoEvent("CommitAckMarker: i = %d (H'%.8x, H'%.8x)"
" ApiRef: 0x%x apiOprec: 0x%x TcNodeId: %d",
iter.curr.i,
iter.curr.p->transid1,
@@ -19636,7 +19705,9 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
else
{
jam();
- infoEvent("End of operation dump");
+ infoEvent("End of operation dump");
+ if (ERROR_INSERTED(4002))
+ ndbrequire(false);
}
return;
@@ -19677,7 +19748,9 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
else
{
jam();
- infoEvent("End of operation dump");
+ infoEvent("End of operation dump");
+ if (ERROR_INSERTED(4002))
+ ndbrequire(false);
}
return;
@@ -19743,6 +19816,50 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
RSS_AP_SNAPSHOT_CHECK(c_fragment_pool);
return;
}
+
+ if (arg == 4002)
+ {
+ bool ops = false;
+ for (Uint32 i = 0; i<1024; i++)
+ {
+ if (ctransidHash[i] != RNIL)
+ {
+ jam();
+ ops = true;
+ break;
+ }
+ }
+
+ bool markers = m_commitAckMarkerPool.getNoOfFree() !=
+ m_commitAckMarkerPool.getSize();
+ if (unlikely(ops || markers))
+ {
+
+ if (markers)
+ {
+ ndbout_c("LQH: m_commitAckMarkerPool: %d free size: %d",
+ m_commitAckMarkerPool.getNoOfFree(),
+ m_commitAckMarkerPool.getSize());
+
+ CommitAckMarkerIterator iter;
+ for(m_commitAckMarkerHash.first(iter); iter.curr.i != RNIL;
+ m_commitAckMarkerHash.next(iter))
+ {
+ ndbout_c("CommitAckMarker: i = %d (H'%.8x, H'%.8x)"
+ " ApiRef: 0x%x apiOprec: 0x%x TcNodeId: %d",
+ iter.curr.i,
+ iter.curr.p->transid1,
+ iter.curr.p->transid2,
+ iter.curr.p->apiRef,
+ iter.curr.p->apiOprec,
+ iter.curr.p->tcNodeId);
+ }
+ }
+ SET_ERROR_INSERT_VALUE(4002);
+ signal->theData[0] = 2350;
+ EXECUTE_DIRECT(DBLQH, GSN_DUMP_STATE_ORD, signal, 1);
+ }
+ }
}//Dblqh::execDUMP_STATE_ORD()
/* **************************************************************** */
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-12-03 19:44:54 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2008-12-08 12:35:55 +0000
@@ -3302,6 +3302,8 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
const Uint32 noOfLqhs = regTcPtr->noOfNodes;
if(commitAckMarker != RNIL){
jam();
+ ndbassert(regApiPtr->commitAckMarker == commitAckMarker);
+
LqhKeyReq::setMarkerFlag(Tdata10, 1);
CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
@@ -3594,6 +3596,7 @@ void Dbtc::releaseTcCon()
UintR TconcurrentOp = c_counters.cconcurrentOp;
UintR TtcConnectptrIndex = tcConnectptr.i;
+ ndbrequire(regTcPtr->commitAckMarker == RNIL);
regTcPtr->tcConnectstate = OS_CONNECTED;
regTcPtr->nextTcConnect = TfirstfreeTcConnect;
regTcPtr->apiConnect = RNIL;
@@ -3786,6 +3789,7 @@ void Dbtc::execLQHKEYCONF(Signal* signal
regTcPtr->lastLqhCon = tlastLqhConnect;
regTcPtr->lastLqhNodeId = refToNode(tlastLqhBlockref);
regTcPtr->noFiredTriggers = noFired;
+ regTcPtr->commitAckMarker = RNIL;
UintR Ttckeyrec = (UintR)regApiPtr.p->tckeyrec;
UintR TclientData = regTcPtr->clientData;
@@ -4519,7 +4523,7 @@ void Dbtc::commit020Lab(Signal* signal)
if (localTcConnectptr.i != RNIL) {
Tcount = Tcount + 1;
- if (Tcount < 16 && !ERROR_INSERTED(8057)) {
+ if (Tcount < 16 && !ERROR_INSERTED(8057) && !ERROR_INSERTED(8073)) {
ptrCheckGuard(localTcConnectptr,
TtcConnectFilesize, localTcConnectRecord);
jam();
@@ -4530,6 +4534,14 @@ void Dbtc::commit020Lab(Signal* signal)
CLEAR_ERROR_INSERT_VALUE;
return;
}//if
+
+ if (ERROR_INSERTED(8073))
+ {
+ execSEND_PACKED(signal);
+ signal->theData[0] = 9999;
+ sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 100, 1);
+ return;
+ }
signal->theData[0] = TcContinueB::ZSEND_COMMIT_LOOP;
signal->theData[1] = apiConnectptr.i;
signal->theData[2] = localTcConnectptr.i;
@@ -5372,8 +5384,10 @@ void Dbtc::clearCommitAckMarker(ApiConne
if (regApiPtr->commitAckMarker == RNIL)
ndbassert(commitAckMarker == RNIL);
if (commitAckMarker != RNIL)
- ndbassert(regApiPtr->commitAckMarker != RNIL);
- if(commitAckMarker != RNIL){
+ ndbassert(regApiPtr->commitAckMarker == commitAckMarker);
+
+ if(commitAckMarker != RNIL)
+ {
jam();
m_commitAckMarkerHash.release(commitAckMarker);
regTcPtr->commitAckMarker = RNIL;
@@ -7965,7 +7979,7 @@ Dbtc::sendTCKEY_FAILCONF(Signal* signal,
const Uint32 nodeId = refToNode(ref);
if(ref != 0)
{
- jam()
+ jam();
failConf->apiConnectPtr = regApiPtr->ndbapiConnect | (marker != RNIL);
failConf->transId1 = regApiPtr->transid[0];
failConf->transId2 = regApiPtr->transid[1];
@@ -8800,6 +8814,7 @@ void Dbtc::updateApiStateFail(Signal* si
tmp.p->noOfLqhs = 1;
tmp.p->lqhNodeId[0] = tnodeid;
tmp.p->apiConnectPtr = apiConnectptr.i;
+
#if defined VM_TRACE || defined ERROR_INSERT
{
CommitAckMarkerPtr check;
@@ -8814,10 +8829,23 @@ void Dbtc::updateApiStateFail(Signal* si
tmp.i = marker;
tmp.p = m_commitAckMarkerHash.getPtr(marker);
+ ndbassert(tmp.p->transid1 == ttransid1);
+ ndbassert(tmp.p->transid2 == ttransid2);
+
const Uint32 noOfLqhs = tmp.p->noOfLqhs;
+ for (Uint32 i = 0; i<noOfLqhs && i < MAX_REPLICAS; i++)
+ {
+ if (tmp.p->lqhNodeId[i] == tnodeid)
+ {
+ jam();
+ goto found;
+ }
+ }
ndbrequire(noOfLqhs < MAX_REPLICAS);
tmp.p->lqhNodeId[noOfLqhs] = tnodeid;
tmp.p->noOfLqhs = (noOfLqhs + 1);
+ found:
+ (void)1;
}
}
@@ -10809,6 +10837,7 @@ void Dbtc::initialiseTcConnect(Signal* s
tcConnectptr.p->apiConnect = RNIL;
tcConnectptr.p->noOfNodes = 0;
tcConnectptr.p->nextTcConnect = tcConnectptr.i + 1;
+ tcConnectptr.p->commitAckMarker = RNIL;
}//for
tcConnectptr.i = titcTmp - 1;
ptrAss(tcConnectptr, tcConnectRecord);
@@ -10825,6 +10854,7 @@ void Dbtc::initialiseTcConnect(Signal* s
tcConnectptr.p->apiConnect = RNIL;
tcConnectptr.p->noOfNodes = 0;
tcConnectptr.p->nextTcConnect = tcConnectptr.i + 1;
+ tcConnectptr.p->commitAckMarker = RNIL;
}//for
tcConnectptr.i = ctcConnectFilesize - 1;
ptrAss(tcConnectptr, tcConnectRecord);
@@ -10918,6 +10948,15 @@ void Dbtc::releaseAbortResources(Signal*
releaseTcCon();
tcConnectptr.i = rarTcConnectptr.i;
}//while
+
+ Uint32 marker = apiConnectptr.p->commitAckMarker;
+ if (marker != RNIL)
+ {
+ jam();
+ m_commitAckMarkerHash.release(marker);
+ apiConnectptr.p->commitAckMarker = RNIL;
+ }
+
apiConnectptr.p->firstTcConnect = RNIL;
apiConnectptr.p->lastTcConnect = RNIL;
apiConnectptr.p->m_transaction_nodes.clear();
@@ -11620,6 +11659,12 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal)
}
return;
}
+
+ if (arg == 4002)
+ {
+ ndbrequire(m_commitAckMarkerPool.getNoOfFree() ==
+ m_commitAckMarkerPool.getSize());
+ }
}//Dbtc::execDUMP_STATE_ORD()
bool
=== modified file 'storage/ndb/test/include/NdbRestarter.hpp'
--- a/storage/ndb/test/include/NdbRestarter.hpp 2008-02-21 13:57:42 +0000
+++ b/storage/ndb/test/include/NdbRestarter.hpp 2008-12-08 12:35:55 +0000
@@ -69,6 +69,7 @@ public:
int waitNodesNoStart(const int * _nodes, int _num_nodes,
unsigned int _timeout = 120);
+ int checkClusterAlive(const int * deadnodes, int num_nodes);
int getNumDbNodes();
int insertErrorInNode(int _nodeId, int error);
=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-12-03 19:44:54 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-12-08 12:35:55 +0000
@@ -2929,7 +2929,6 @@ loop2:
int
runHammer(NDBT_Context* ctx, NDBT_Step* step)
{
- int result = NDBT_OK;
int records = ctx->getNumRecords();
Ndb* pNdb = GETNDB(step);
HugoOperations hugoOps(*ctx->getTab());
@@ -2937,7 +2936,7 @@ runHammer(NDBT_Context* ctx, NDBT_Step*
{
int r = rand() % records;
if (hugoOps.startTransaction(pNdb) != 0)
- goto err;
+ continue;
if ((rand() % 100) < 50)
{
@@ -2985,6 +2984,139 @@ runHammer(NDBT_Context* ctx, NDBT_Step*
return NDBT_OK;
}
+int
+runMixedLoad(NDBT_Context* ctx, NDBT_Step* step)
+{
+ int res = 0;
+ int records = ctx->getNumRecords();
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ unsigned id = (unsigned)rand();
+ while (!ctx->isTestStopped())
+ {
+ if (ctx->getProperty("Pause", (Uint32)0))
+ {
+ ndbout_c("thread %u stopped", id);
+ ctx->sync_down("WaitThreads");
+ while (ctx->getProperty("Pause", (Uint32)0) && !ctx->isTestStopped())
+ NdbSleep_MilliSleep(15);
+
+ if (ctx->isTestStopped())
+ break;
+ ndbout_c("thread %u continue", id);
+ }
+
+ if ((res = hugoOps.startTransaction(pNdb)) != 0)
+ {
+ if (res == 4009)
+ return NDBT_FAILED;
+ continue;
+ }
+
+ for (int i = 0; i < 10; i++)
+ {
+ int r = rand() % records;
+ if ((rand() % 100) < 50)
+ {
+ if (hugoOps.pkUpdateRecord(pNdb, r, 1, rand()) != 0)
+ goto err;
+ }
+ else
+ {
+ if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0)
+ goto err;
+ }
+ }
+
+ if ((rand() % 100) < 90)
+ {
+ res = hugoOps.execute_Commit(pNdb);
+ }
+ else
+ {
+ err:
+ res = hugoOps.execute_Rollback(pNdb);
+ }
+
+ hugoOps.closeTransaction(pNdb);
+
+ if (res == 4009)
+ {
+ return NDBT_FAILED;
+ }
+ }
+ return NDBT_OK;
+}
+
+int
+runBug41295(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter res;
+
+ if (res.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+
+ int leak = 4002;
+ const int cases = 1;
+ int loops = ctx->getNumLoops();
+ if (loops <= cases)
+ loops = cases + 1;
+
+ for (int i = 0; i<loops; i++)
+ {
+ int master = res.getMasterNodeId();
+ int next = res.getNextMasterNodeId(master);
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+ if (res.dumpStateOneNode(next, val2, 2))
+ return NDBT_FAILED;
+
+ ndbout_c("stopping %u, err 8073", next);
+ res.insertErrorInNode(next, 8073);
+ ndbout_c("waiting for %u", next);
+ res.waitNodesNoStart(&next, 1);
+
+ ndbout_c("pausing all threads");
+ ctx->setProperty("Pause", 1);
+ ctx->sync_up_and_wait("WaitThreads", ctx->getProperty("Threads", 1));
+ ndbout_c("all threads paused");
+ NdbSleep_MilliSleep(5000);
+ res.dumpStateAllNodes(&leak, 1);
+ NdbSleep_MilliSleep(1000);
+ if (res.checkClusterAlive(&next, 1))
+ {
+ return NDBT_FAILED;
+ }
+ ndbout_c("restarting threads");
+ ctx->setProperty("Pause", (Uint32)0);
+
+ ndbout_c("starting %u", next);
+ res.startNodes(&next, 1);
+ ndbout_c("waiting for cluster started");
+ if (res.waitClusterStarted())
+ {
+ return NDBT_FAILED;
+ }
+
+ ndbout_c("pausing all threads");
+ ctx->setProperty("Pause", 1);
+ ctx->sync_up_and_wait("WaitThreads", ctx->getProperty("Threads", 1));
+ ndbout_c("all threads paused");
+ NdbSleep_MilliSleep(5000);
+ res.dumpStateAllNodes(&leak, 1);
+ NdbSleep_MilliSleep(1000);
+ ndbout_c("restarting threads");
+ ctx->setProperty("Pause", (Uint32)0);
+ }
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -3424,6 +3556,14 @@ TESTCASE("NF_Hammer", ""){
STEP(runRestarter);
VERIFIER(runClearTable);
}
+TESTCASE("Bug41295", "")
+{
+ TC_PROPERTY("Threads", 25);
+ INITIALIZER(runLoadTable);
+ STEPS(runMixedLoad, 25);
+ STEP(runBug41295);
+ FINALIZER(runClearTable);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2008-12-03 20:11:52 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2008-12-08 12:35:55 +0000
@@ -1172,3 +1172,7 @@ cmd: test_event
args -r 5000 -n Bug30780 T1
#EOF 2008-08-11
+max-time: 1200
+cmd: testNodeRestart
+args -n Bug41295 T1
+
=== modified file 'storage/ndb/test/src/NdbRestarter.cpp'
--- a/storage/ndb/test/src/NdbRestarter.cpp 2008-03-26 14:34:39 +0000
+++ b/storage/ndb/test/src/NdbRestarter.cpp 2008-12-08 12:35:55 +0000
@@ -22,6 +22,7 @@
#include <random.h>
#include <kernel/ndb_limits.h>
#include <ndb_version.h>
+#include <NodeBitmask.hpp>
#define MGMERR(h) \
ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
@@ -541,7 +542,7 @@ NdbRestarter::getStatus(){
}
}
const int err = ndb_mgm_get_latest_error(handle);
- ndbout << "status==NULL, retries="<<retries<<endl;
+ ndbout << "status==NULL, retries="<<retries<< " err=" << err << endl;
MGMERR(handle);
retries++;
continue;
@@ -802,5 +803,26 @@ NdbRestarter::setReconnect(bool val){
m_reconnect= val;
}
+int
+NdbRestarter::checkClusterAlive(const int * deadnodes, int num_nodes)
+{
+ if (getStatus() != 0)
+ return -1;
+
+ NdbNodeBitmask mask;
+ for (int i = 0; i<num_nodes; i++)
+ mask.set(deadnodes[i]);
+
+ for (size_t n = 0; n < ndbNodes.size(); n++)
+ {
+ if (mask.get(ndbNodes[n].node_id))
+ continue;
+
+ if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED)
+ return ndbNodes[n].node_id;
+ }
+
+ return 0;
+}
template class Vector<ndb_mgm_node_state>;
| Thread |
|---|
| • bzr push into mysql-5.1 branch (jonas:2763 to 2764) Bug#41295 Bug#41296Bug#41297 | Jonas Oreland | 8 Dec |