#At file:///home/jonas/src/telco-6.2/
2802 Jonas Oreland 2009-01-29
ndb - bug#42422 - fix in node-failure during sp1
modified:
storage/ndb/src/kernel/blocks/ERROR_codes.txt
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
storage/ndb/test/ndbapi/testNodeRestart.cpp
storage/ndb/test/run-test/daily-basic-tests.txt
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2008-12-08 12:35:55 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2009-01-29 10:56:52 +0000
@@ -1,4 +1,4 @@
-Next QMGR 937
+Next QMGR 938
Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-01-29 10:56:52 +0000
@@ -1010,6 +1010,13 @@ void Qmgr::execCM_REGCONF(Signal* signal
c_start.m_gsn = GSN_CM_NODEINFOREQ;
c_start.m_nodes = c_clusterNodes;
+ if (ERROR_INSERTED(937))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ signal->theData[0] = 9999;
+ sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
+ }
+
return;
}//Qmgr::execCM_REGCONF()
@@ -2847,7 +2854,13 @@ void Qmgr::node_failed(Signal* signal, U
jam();
return;
case ZSTARTING:
- c_start.reset();
+ /**
+ * bug#42422
+ * Force "real" failure handling
+ */
+ failedNodePtr.p->phase = ZRUNNING;
+ failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
+ return;
// Fall-through
default:
jam();
@@ -3410,6 +3423,8 @@ void Qmgr::execPREP_FAILREQ(Signal* sign
NodeRecPtr myNodePtr;
jamEntry();
+ c_start.reset();
+
if (check_multi_node_shutdown(signal))
{
jam();
=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2008-12-15 19:35:37 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2009-01-29 10:56:52 +0000
@@ -281,7 +281,7 @@ int runRestarter(NDBT_Context* ctx, NDBT
return NDBT_FAILED;
}
- loops *= restarter.getNumDbNodes();
+ loops *= (restarter.getNumDbNodes() > 4 ? 4 : restarter.getNumDbNodes());
while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped()){
int id = lastId % restarter.getNumDbNodes();
@@ -2506,7 +2506,10 @@ runMNF(NDBT_Context* ctx, NDBT_Step* ste
{
for (int i = 0; i<cnt; i++)
{
- res.insertErrorInNode(nodes[i], 7180);
+ if (res.getNextMasterNodeId(master) == nodes[i])
+ res.insertErrorInNode(nodes[i], 7180);
+ else
+ res.insertErrorInNode(nodes[i], 7205);
}
int lcp = 7099;
@@ -3157,6 +3160,65 @@ runBug41469(NDBT_Context* ctx, NDBT_Step
return NDBT_OK;
}
+int
+runBug42422(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter res;
+
+ if (res.getNumDbNodes() < 4)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ int loops = ctx->getNumLoops();
+ while (--loops)
+ {
+ int master = res.getMasterNodeId();
+ ndbout_c("master: %u", master);
+ int nodeId = res.getRandomNodeSameNodeGroup(master, rand());
+ ndbout_c("target: %u", nodeId);
+ int node2 = res.getRandomNodeOtherNodeGroup(nodeId, rand());
+ ndbout_c("node 2: %u", node2);
+
+ res.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true);
+
+ res.waitNodesNoStart(&nodeId, 1);
+
+ int dump[] = { 9000, 0 };
+ dump[1] = node2;
+
+ if (res.dumpStateOneNode(nodeId, dump, 2))
+ return NDBT_FAILED;
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+ if (res.dumpStateOneNode(nodeId, val2, 2))
+ return NDBT_FAILED;
+
+ res.insertErrorInNode(nodeId, 937);
+ ndbout_c("%u : starting %u", __LINE__, nodeId);
+ res.startNodes(&nodeId, 1);
+ NdbSleep_SecSleep(3);
+ ndbout_c("%u : waiting for %u to not get not-started", __LINE__, nodeId);
+ res.waitNodesNoStart(&nodeId, 1);
+
+ ndbout_c("%u : starting %u", __LINE__, nodeId);
+ res.startNodes(&nodeId, 1);
+
+ ndbout_c("%u : waiting for cluster started", __LINE__);
+ if (res.waitClusterStarted())
+ {
+ return NDBT_FAILED;
+ }
+ }
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -3610,6 +3672,9 @@ TESTCASE("Bug41469", ""){
STEP(runScanUpdateUntilStopped);
FINALIZER(runClearTable);
}
+TESTCASE("Bug42422", ""){
+ INITIALIZER(runBug42422);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2008-12-16 17:12:00 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2009-01-29 10:56:52 +0000
@@ -1180,3 +1180,7 @@ max-time: 1200
cmd: testNodeRestart
args: -n Bug41295 T1
+max-time: 1200
+cmd: testNodeRestart
+args: -n Bug42422 -l 1 T1
+
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.2 branch (jonas:2802) Bug#42422 | Jonas Oreland | 29 Jan |