List:Commits« Previous MessageNext Message »
From:jonas Date:January 3 2007 5:17am
Subject:bk commit into 5.0 tree (jonas:1.2281) BUG#25364
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-01-03 06:17:34+01:00, jonas@stripped +6 -0
  ndb - bug#25364
    on master node failure during qmgr-commitreq
      make sure to remove all committed failed nodes from failed/prepfailed arrays

  ndb/include/kernel/signaldata/DumpStateOrd.hpp@stripped, 2007-01-03 06:17:32+01:00, jonas@stripped +1 -0
    new error code

  ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-01-03 06:17:32+01:00, jonas@stripped +3 -0
    new error code

  ndb/src/kernel/blocks/qmgr/Qmgr.hpp@stripped, 2007-01-03 06:17:32+01:00, jonas@stripped +4 -0
    extra error insert variable

  ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2007-01-03 06:17:32+01:00, jonas@stripped +59 -13
    make sure to remove all committed failed nodes from failed/prepfailed arrays

  ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-01-03 06:17:32+01:00, jonas@stripped +43 -0
    testcase

  ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-01-03 06:17:32+01:00, jonas@stripped +4 -0
    testcase

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/50-work

--- 1.45/ndb/test/run-test/daily-basic-tests.txt	2007-01-03 06:17:38 +01:00
+++ 1.46/ndb/test/run-test/daily-basic-tests.txt	2007-01-03 06:17:38 +01:00
@@ -469,6 +469,10 @@
 cmd: testNodeRestart
 args: -n Bug24717 T1
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug25364 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

--- 1.8/ndb/include/kernel/signaldata/DumpStateOrd.hpp	2007-01-03 06:17:38 +01:00
+++ 1.9/ndb/include/kernel/signaldata/DumpStateOrd.hpp	2007-01-03 06:17:38 +01:00
@@ -68,6 +68,7 @@
     // 100-105 TUP and ACC  
     // 200-240 UTIL
     // 300-305 TRIX
+    QmgrErr935 = 935,
     NdbfsDumpFileStat = 400,
     NdbfsDumpAllFiles = 401,
     NdbfsDumpOpenFiles = 402,

--- 1.24/ndb/src/kernel/blocks/ERROR_codes.txt	2007-01-03 06:17:38 +01:00
+++ 1.25/ndb/src/kernel/blocks/ERROR_codes.txt	2007-01-03 06:17:38 +01:00
@@ -21,6 +21,9 @@
 
 910: Crash new president after node crash
 
+935 : Crash master on node failure (delayed) 
+      and skip sending GSN_COMMIT_FAILREQ to specified node
+
 ERROR CODES FOR TESTING NODE FAILURE, GLOBAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------
 

--- 1.11/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2007-01-03 06:17:38 +01:00
+++ 1.12/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2007-01-03 06:17:38 +01:00
@@ -426,6 +426,10 @@
   
   StopReq c_stopReq;
   bool check_multi_node_shutdown(Signal* signal);
+
+#ifdef ERROR_INSERT
+  Uint32 c_error_insert_extra;
+#endif
 };
 
 #endif

--- 1.34/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2007-01-03 06:17:38 +01:00
+++ 1.35/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2007-01-03 06:17:38 +01:00
@@ -3110,6 +3110,18 @@
   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
     jam();
     ptrAss(nodePtr, nodeRec);
+
+#ifdef ERROR_INSERT    
+    if (ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
+    {
+      ndbout_c("skipping node %d", c_error_insert_extra);
+      CLEAR_ERROR_INSERT_VALUE;
+      signal->theData[0] = 9999;
+      sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
+      continue;
+    }
+#endif
+
     if (nodePtr.p->phase == ZRUNNING) {
       jam();
       nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
@@ -3180,6 +3192,33 @@
   return;
 }//Qmgr::execPREP_FAILREF()
 
+static
+Uint32
+clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt, const Uint16 src[])
+{
+  if (srccnt == 0)
+    return dstcnt;
+  
+  Uint32 pos = 0;
+  for (Uint32 i = 0; i<dstcnt; i++)
+  {
+    Uint32 node = dst[i];
+    for (Uint32 j = 0; j<srccnt; j++)
+    {
+      if (node == dst[j])
+      {
+	node = RNIL;
+	break;
+      }
+    }
+    if (node != RNIL)
+    {
+      dst[pos++] = node;
+    }
+  }
+  return pos;
+}
+
 /*---------------------------------------------------------------------------*/
 /*    THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE.  */
 /*---------------------------------------------------------------------------*/
@@ -3267,19 +3306,18 @@
 		   NodeFailRep::SignalLength, JBB);
       }//if
     }//for
-    if (cpresident != getOwnNodeId()) {
-      jam();
-      cnoFailedNodes = cnoCommitFailedNodes - cnoFailedNodes;
-      if (cnoFailedNodes > 0) {
-        jam();
-        guard0 = cnoFailedNodes - 1;
-        arrGuard(guard0 + cnoCommitFailedNodes, MAX_NDB_NODES);
-        for (Tj = 0; Tj <= guard0; Tj++) {
-          jam();
-          cfailedNodes[Tj] = cfailedNodes[Tj + cnoCommitFailedNodes];
-        }//for
-      }//if
-    }//if
+
+    /**
+     * Remove committed nodes from failed/prepared
+     */
+    cnoFailedNodes = clear_nodes(cnoFailedNodes, 
+				 cfailedNodes, 
+				 cnoCommitFailedNodes, 
+				 ccommitFailedNodes);
+    cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes, 
+				     cprepFailedNodes,
+				     cnoCommitFailedNodes,
+				     ccommitFailedNodes);
     cnoCommitFailedNodes = 0;
   }//if
   /**-----------------------------------------------------------------------
@@ -4658,6 +4696,14 @@
   default:
     ;
   }//switch
+
+#ifdef ERROR_INSERT
+  if (signal->theData[0] == 935 && signal->getLength() == 2)
+  {
+    SET_ERROR_INSERT_VALUE(935);
+    c_error_insert_extra = signal->theData[1];
+  }
+#endif
 }//Qmgr::execDUMP_STATE_ORD()
 
 void Qmgr::execSET_VAR_REQ(Signal* signal) 

--- 1.24/ndb/test/ndbapi/testNodeRestart.cpp	2007-01-03 06:17:38 +01:00
+++ 1.25/ndb/test/ndbapi/testNodeRestart.cpp	2007-01-03 06:17:38 +01:00
@@ -955,6 +955,46 @@
   return NDBT_OK;
 }
 
+int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
+  int result = NDBT_OK;
+  NdbRestarter restarter;
+  Ndb* pNdb = GETNDB(step);
+  int loops = ctx->getNumLoops();
+  
+  if (restarter.getNumDbNodes() < 4)
+    return NDBT_OK;
+
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+
+  for (; loops; loops --)
+  {
+    int master = restarter.getMasterNodeId();
+    int victim = restarter.getRandomNodeOtherNodeGroup(master, rand());
+    int second = restarter.getRandomNodeSameNodeGroup(victim, rand());
+    
+    int dump[] = { 935, victim } ;
+    if (restarter.dumpStateOneNode(master, dump, 2))
+      return NDBT_FAILED;
+  
+    if (restarter.dumpStateOneNode(master, val2, 2))
+      return NDBT_FAILED;
+  
+    if (restarter.restartOneDbNode(second, false, true, true))
+      return NDBT_FAILED;
+
+    int nodes[2] = { master, second };
+    if (restarter.waitNodesNoStart(nodes, 2))
+      return NDBT_FAILED;
+
+    restarter.startNodes(nodes, 2);
+
+    if (restarter.waitNodesStarted(nodes, 2))
+      return NDBT_FAILED;
+  }
+  
+  return NDBT_OK;
+}
+
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -1270,6 +1310,9 @@
 }
 TESTCASE("Bug24717", ""){
   INITIALIZER(runBug24717);
+}
+TESTCASE("Bug25364", ""){
+  INITIALIZER(runBug25364);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
 
Thread
bk commit into 5.0 tree (jonas:1.2281) BUG#25364jonas3 Jan