List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:January 29 2009 10:57am
Subject:bzr commit into mysql-5.1-telco-6.2 branch (jonas:2802) Bug#42422
View as plain text  
#At file:///home/jonas/src/telco-6.2/

 2802 Jonas Oreland	2009-01-29
      ndb - bug#42422 - fix in node-failure during sp1
modified:
  storage/ndb/src/kernel/blocks/ERROR_codes.txt
  storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
  storage/ndb/test/ndbapi/testNodeRestart.cpp
  storage/ndb/test/run-test/daily-basic-tests.txt

=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2008-12-08 12:35:55 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2009-01-29 10:56:52 +0000
@@ -1,4 +1,4 @@
-Next QMGR 937
+Next QMGR 938
 Next NDBCNTR 1002
 Next NDBFS 2000
 Next DBACC 3002

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2009-01-29 10:56:52 +0000
@@ -1010,6 +1010,13 @@ void Qmgr::execCM_REGCONF(Signal* signal
   c_start.m_gsn = GSN_CM_NODEINFOREQ;
   c_start.m_nodes = c_clusterNodes;
 
+  if (ERROR_INSERTED(937))
+  {
+    CLEAR_ERROR_INSERT_VALUE;
+    signal->theData[0] = 9999;
+    sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
+  }
+
   return;
 }//Qmgr::execCM_REGCONF()
 
@@ -2847,7 +2854,13 @@ void Qmgr::node_failed(Signal* signal, U
     jam();
     return;
   case ZSTARTING:
-    c_start.reset();
+    /**
+     * bug#42422
+     *   Force "real" failure handling
+     */
+    failedNodePtr.p->phase = ZRUNNING;
+    failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
+    return;
     // Fall-through
   default:
     jam();
@@ -3410,6 +3423,8 @@ void Qmgr::execPREP_FAILREQ(Signal* sign
   NodeRecPtr myNodePtr;
   jamEntry();
   
+  c_start.reset();
+  
   if (check_multi_node_shutdown(signal))
   {
     jam();

=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp	2008-12-15 19:35:37 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp	2009-01-29 10:56:52 +0000
@@ -281,7 +281,7 @@ int runRestarter(NDBT_Context* ctx, NDBT
     return NDBT_FAILED;
   }
   
-  loops *= restarter.getNumDbNodes();
+  loops *= (restarter.getNumDbNodes() > 4 ? 4 : restarter.getNumDbNodes());
   while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped()){
 
     int id = lastId % restarter.getNumDbNodes();
@@ -2506,7 +2506,10 @@ runMNF(NDBT_Context* ctx, NDBT_Step* ste
     {
       for (int i = 0; i<cnt; i++)
       {
-        res.insertErrorInNode(nodes[i], 7180);
+        if (res.getNextMasterNodeId(master) == nodes[i])
+          res.insertErrorInNode(nodes[i], 7180);
+        else
+          res.insertErrorInNode(nodes[i], 7205);
       }
 
       int lcp = 7099;
@@ -3157,6 +3160,65 @@ runBug41469(NDBT_Context* ctx, NDBT_Step
   return NDBT_OK;
 }
 
+int
+runBug42422(NDBT_Context* ctx, NDBT_Step* step)
+{
+  NdbRestarter res;
+  
+  if (res.getNumDbNodes() < 4)
+  {
+    ctx->stopTest();
+    return NDBT_OK;
+  }
+  
+  int loops = ctx->getNumLoops();
+  while (--loops)
+  {
+    int master = res.getMasterNodeId();
+    ndbout_c("master: %u", master);
+    int nodeId = res.getRandomNodeSameNodeGroup(master, rand()); 
+    ndbout_c("target: %u", nodeId);
+    int node2 = res.getRandomNodeOtherNodeGroup(nodeId, rand());
+    ndbout_c("node 2: %u", node2);
+    
+    res.restartOneDbNode(nodeId,
+                         /** initial */ false, 
+                         /** nostart */ true,
+                         /** abort   */ true);
+    
+    res.waitNodesNoStart(&nodeId, 1);
+    
+    int dump[] = { 9000, 0 };
+    dump[1] = node2;
+    
+    if (res.dumpStateOneNode(nodeId, dump, 2))
+      return NDBT_FAILED;
+    
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    if (res.dumpStateOneNode(nodeId, val2, 2))
+      return NDBT_FAILED;
+    
+    res.insertErrorInNode(nodeId, 937);
+    ndbout_c("%u : starting %u", __LINE__, nodeId);
+    res.startNodes(&nodeId, 1);
+    NdbSleep_SecSleep(3);
+    ndbout_c("%u : waiting for %u to not get not-started", __LINE__, nodeId);
+    res.waitNodesNoStart(&nodeId, 1);
+    
+    ndbout_c("%u : starting %u", __LINE__, nodeId);
+    res.startNodes(&nodeId, 1);
+    
+    ndbout_c("%u : waiting for cluster started", __LINE__);
+    if (res.waitClusterStarted())
+    {
+      return NDBT_FAILED;
+    }
+  }
+
+  ctx->stopTest();
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -3610,6 +3672,9 @@ TESTCASE("Bug41469", ""){
   STEP(runScanUpdateUntilStopped);
   FINALIZER(runClearTable);
 }
+TESTCASE("Bug42422", ""){
+  INITIALIZER(runBug42422);
+}
 NDBT_TESTSUITE_END(testNodeRestart);
 
 int main(int argc, const char** argv){

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2008-12-16 17:12:00 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2009-01-29 10:56:52 +0000
@@ -1180,3 +1180,7 @@ max-time: 1200
 cmd: testNodeRestart
 args: -n Bug41295 T1
 
+max-time: 1200
+cmd: testNodeRestart
+args: -n Bug42422 -l 1 T1
+

Thread
bzr commit into mysql-5.1-telco-6.2 branch (jonas:2802) Bug#42422Jonas Oreland29 Jan