List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:October 30 2009 2:10pm
Subject:bzr commit into mysql-5.1-telco-7.0 branch (jonas:3179)
View as plain text  
#At file:///home/jonas/src/telco-6.4/ based on revid:jonas@stripped

 3179 Jonas Oreland	2009-10-30 [merge]
      merge 63 to 70

    modified:
      storage/ndb/src/kernel/blocks/ERROR_codes.txt
      storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
      storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
      storage/ndb/test/ndbapi/testSystemRestart.cpp
      storage/ndb/test/run-test/daily-basic-tests.txt
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2009-10-27 10:13:15 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2009-10-30 14:10:30 +0000
@@ -3,7 +3,7 @@ Next NDBCNTR 1002
 Next NDBFS 2000
 Next DBACC 3002
 Next DBTUP 4031
-Next DBLQH 5054
+Next DBLQH 5055
 Next DBDICT 6025
 Next DBDIH 7221
 Next DBTC 8083

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2009-10-16 06:28:24 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2009-10-30 14:10:30 +0000
@@ -1087,11 +1087,12 @@ private:
                     Uint32 tfstStopGci,
                     Uint32& tfstStartGci,
                     Uint32& tfstLcp);
-  void newCrashedReplica(Uint32 nodeId, ReplicaRecordPtr ncrReplicaPtr);
+  void newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr);
   void packCrashedReplicas(ReplicaRecordPtr pcrReplicaPtr);
   void releaseReplicas(Uint32 * replicaPtr);
-  void removeOldCrashedReplicas(ReplicaRecordPtr rocReplicaPtr);
+  void removeOldCrashedReplicas(Uint32, Uint32, ReplicaRecordPtr rocReplicaPtr);
   void removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr);
+  void mergeCrashedReplicas(ReplicaRecordPtr pcrReplicaPtr);
   void seizeReplicaRec(ReplicaRecordPtr& replicaPtr);
 
 //------------------------------------

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2009-10-27 11:19:08 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2009-10-30 14:10:30 +0000
@@ -83,6 +83,8 @@ extern EventLogger * g_eventLogger;
 
 #define SYSFILE ((Sysfile *)&sysfileData[0])
 #define MAX_CRASHED_REPLICAS 8
+#define ZINIT_CREATE_GCI Uint32(0)
+#define ZINIT_REPLICA_LAST_GCI Uint32(-1)
 
 #define RETURN_IF_NODE_NOT_ALIVE(node) \
   if (!checkNodeAlive((node))) { \
@@ -3972,7 +3974,7 @@ Dbdih::execUPDATE_TOCONF(Signal* signal)
     CRASH_INSERTION(7154);
     
     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_CREATE_FRAG_STORED;
-    sendCreateFragReq(signal, 0, CreateFragReq::STORED, takeOverPtr.i);
+    sendCreateFragReq(signal, ZINIT_CREATE_GCI, CreateFragReq::STORED, takeOverPtr.i);
     return;
   case TakeOverRecord::TO_UPDATE_AFTER_STORED:
     jam();
@@ -10643,7 +10645,8 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPt
 
   const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
   
-  for(Uint32 i = 0; i<tabPtr.p->totalfragments; i++){
+  for(Uint32 i = 0; i<tabPtr.p->totalfragments; i++)
+  {
     FragmentstorePtr fragPtr;
     getFragstore(tabPtr.p, i, fragPtr);
     
@@ -10668,7 +10671,9 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPt
       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
 
       const Uint32 noCrashedReplicas = replicaPtr.p->noCrashedReplicas;
-      if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
+
+      if (nodePtr.p->nodeStatus == NodeRecord::ALIVE)
+      {
 	jam();
 	switch (nodePtr.p->activeStatus) {
 	case Sysfile::NS_Active:
@@ -10679,33 +10684,17 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPt
 	  /* THE NODE IS ALIVE AND KICKING AND ACTIVE, LET'S USE IT.         */
 	  /* --------------------------------------------------------------- */
 	  arrGuardErr(noCrashedReplicas, MAX_CRASHED_REPLICAS, NDBD_EXIT_MAX_CRASHED_REPLICAS);
-	  Uint32 lastGci = replicaPtr.p->replicaLastGci[noCrashedReplicas];
-	  if(lastGci >= newestRestorableGCI){
-	    jam();
-	    /** -------------------------------------------------------------
-	     * THE REPLICA WAS ALIVE AT THE SYSTEM FAILURE. WE WILL SET THE 
-	     * LAST REPLICA GCI TO MINUS ONE SINCE IT HASN'T FAILED YET IN THE
-	     * NEW SYSTEM.                                                    
-	     *-------------------------------------------------------------- */
-	    replicaPtr.p->replicaLastGci[noCrashedReplicas] = (Uint32)-1;
-	  } else {
-	    jam();
-	    /*--------------------------------------------------------------
-	     * SINCE IT WAS NOT ALIVE AT THE TIME OF THE SYSTEM CRASH THIS IS 
-	     * A COMPLETELY NEW REPLICA. WE WILL SET THE CREATE GCI TO BE THE 
-	     * NEXT GCI TO BE EXECUTED.                                       
-	     *--------_----------------------------------------------------- */
-            if (noCrashedReplicas + 1 == MAX_CRASHED_REPLICAS)
-            {
-              jam();
-              packCrashedReplicas(replicaPtr);
-            }
-	    const Uint32 nextCrashed = replicaPtr.p->noCrashedReplicas + 1;
-	    replicaPtr.p->noCrashedReplicas = nextCrashed;
-	    arrGuardErr(nextCrashed, MAX_CRASHED_REPLICAS, NDBD_EXIT_MAX_CRASHED_REPLICAS);
-	    replicaPtr.p->createGci[nextCrashed] = newestRestorableGCI + 1;
-	    replicaPtr.p->replicaLastGci[nextCrashed] = (Uint32)-1;
-	  }//if
+
+          // Create new crashed replica
+          newCrashedReplica(replicaPtr);
+
+          // Create a new redo-interval
+          Uint32 nextCrashed = replicaPtr.p->noCrashedReplicas;
+          replicaPtr.p->createGci[nextCrashed] = newestRestorableGCI + 1;
+          replicaPtr.p->replicaLastGci[nextCrashed] = ZINIT_REPLICA_LAST_GCI;
+
+          // merge
+          mergeCrashedReplicas(replicaPtr);
 
 	  resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
 
@@ -12735,7 +12724,7 @@ Dbdih::reportLcpCompletion(const LcpFrag
   replicaPtr.p->lcpIdStarted = lcpId;
   replicaPtr.p->lcpOngoingFlag = false;
   
-  removeOldCrashedReplicas(replicaPtr);
+  removeOldCrashedReplicas(tableId, fragId, replicaPtr);
   replicaPtr.p->lcpId[lcpNo] = lcpId;
   replicaPtr.p->lcpStatus[lcpNo] = ZVALID;
   replicaPtr.p->maxGciStarted[lcpNo] = maxGciStarted;
@@ -13675,8 +13664,8 @@ void Dbdih::allocStoredReplica(Fragments
   newReplicaPtr.p->noCrashedReplicas = 0;
   newReplicaPtr.p->initialGci = (Uint32)(m_micro_gcp.m_current_gci >> 32);
   for (i = 0; i < MAX_CRASHED_REPLICAS; i++) {
-    newReplicaPtr.p->replicaLastGci[i] = (Uint32)-1;
-    newReplicaPtr.p->createGci[i] = 0;
+    newReplicaPtr.p->replicaLastGci[i] = ZINIT_REPLICA_LAST_GCI;
+    newReplicaPtr.p->createGci[i] = ZINIT_CREATE_GCI;
   }//for
   newReplicaPtr.p->createGci[0] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
   newReplicaPtr.p->nextLcp = 0;
@@ -14023,30 +14012,43 @@ void Dbdih::findMinGci(ReplicaRecordPtr 
                        Uint32& keepGci,
                        Uint32& oldestRestorableGci)
 {
-  for (Uint32 i = 0; i < MAX_LCP_STORED; i++) {
-    jam();
-    if ((fmgReplicaPtr.p->lcpStatus[i] == ZVALID) &&
-        ((fmgReplicaPtr.p->lcpId[i] + MAX_LCP_STORED) <= (SYSFILE->latestLCP_ID + 1))) {
-      jam();
-      /*--------------------------------------------------------------------*/
-      // We invalidate the checkpoint we are preparing to overwrite. 
-      // The LCP id is still the old lcp id, 
-      // this is the reason of comparing with lcpId + 1.
-      /*---------------------------------------------------------------------*/
-      fmgReplicaPtr.p->lcpStatus[i] = ZINVALID;
-    }//if
-  }//for
   keepGci = (Uint32)-1;
   oldestRestorableGci = 0;
-  Uint32 lastLcpNo = prevLcpNo(fmgReplicaPtr.p->nextLcp);
-  if (fmgReplicaPtr.p->lcpStatus[lastLcpNo] == ZVALID)
+
+  for (Uint32 i = 0; i < MAX_LCP_STORED; i++)
   {
     jam();
-    keepGci = fmgReplicaPtr.p->maxGciCompleted[lastLcpNo];
-    oldestRestorableGci = fmgReplicaPtr.p->maxGciStarted[lastLcpNo];
-    ndbassert(fmgReplicaPtr.p->maxGciStarted[lastLcpNo] <c_newest_restorable_gci);
-  } 
-  else 
+    if (fmgReplicaPtr.p->lcpStatus[i] == ZVALID)
+    {
+      if ((fmgReplicaPtr.p->lcpId[i] + MAX_LCP_STORED) <= (SYSFILE->latestLCP_ID + 1))
+      {
+        jam();
+        /*-----------------------------------------------------------------*/
+        // We invalidate the checkpoint we are preparing to overwrite.
+        // The LCP id is still the old lcp id,
+        // this is the reason of comparing with lcpId + 1.
+        /*-----------------------------------------------------------------*/
+        fmgReplicaPtr.p->lcpStatus[i] = ZINVALID;
+      }
+      else
+      {
+        jam();
+        if (fmgReplicaPtr.p->maxGciCompleted[i] < keepGci)
+        {
+          jam();
+          keepGci = fmgReplicaPtr.p->maxGciCompleted[i];
+        }
+
+        if (fmgReplicaPtr.p->maxGciStarted[i] > oldestRestorableGci)
+        {
+          jam();
+          oldestRestorableGci = fmgReplicaPtr.p->maxGciStarted[i];
+        }
+      }
+    }
+  }
+
+  if (oldestRestorableGci == 0 && keepGci == Uint32(-1))
   {
     jam();
     if (fmgReplicaPtr.p->createGci[0] == fmgReplicaPtr.p->initialGci)
@@ -14056,6 +14058,10 @@ void Dbdih::findMinGci(ReplicaRecordPtr 
       //oldestRestorableGci = fmgReplicaPtr.p->createGci[0];
     }
   }
+  else
+  {
+    ndbassert(oldestRestorableGci < c_newest_restorable_gci);
+  }
   return;
 }//Dbdih::findMinGci()
 
@@ -15031,7 +15037,7 @@ void
 /*************************************************************************/
 /*       A NEW CRASHED REPLICA IS ADDED BY A NODE FAILURE.               */
 /*************************************************************************/
-void Dbdih::newCrashedReplica(Uint32 nodeId, ReplicaRecordPtr ncrReplicaPtr) 
+void Dbdih::newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr)
 {
   /*----------------------------------------------------------------------*/
   /*       SET THE REPLICA_LAST_GCI OF THE CRASHED REPLICA TO LAST GCI    */
@@ -15041,6 +15047,7 @@ void Dbdih::newCrashedReplica(Uint32 nod
   /*       THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/
   /*       SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET.                 */
   /*----------------------------------------------------------------------*/
+  Uint32 nodeId = ncrReplicaPtr.p->procNode;
   Uint32 lastGCI = SYSFILE->lastCompletedGCI[nodeId];
   if (ncrReplicaPtr.p->noCrashedReplicas + 1 == MAX_CRASHED_REPLICAS)
   {
@@ -15048,14 +15055,48 @@ void Dbdih::newCrashedReplica(Uint32 nod
     packCrashedReplicas(ncrReplicaPtr);
   }
   
+  Uint32 noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas;
   arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, MAX_CRASHED_REPLICAS,
               NDBD_EXIT_MAX_CRASHED_REPLICAS);
-  ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] = 
-    lastGCI;
-  ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
-  ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] = 0;
-  ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] = 
-    (Uint32)-1;
+
+  if (noCrashedReplicas > 0 &&
+      ncrReplicaPtr.p->replicaLastGci[noCrashedReplicas - 1] == lastGCI)
+  {
+    jam();
+    /**
+     * Don't add another redo-interval, that already exist
+     *  instead initalize new
+     */
+    ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] =
+      ZINIT_CREATE_GCI;
+    ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
+      ZINIT_REPLICA_LAST_GCI;
+  }
+  else if (ncrReplicaPtr.p->createGci[noCrashedReplicas] <= lastGCI)
+  {
+    jam();
+    ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
+      lastGCI;
+    ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
+    ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] =
+      ZINIT_CREATE_GCI;
+    ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
+      ZINIT_REPLICA_LAST_GCI;
+  }
+  else
+  {
+    /**
+     * This can happen if createGci is set
+     *   (during sendCreateFragReq(COMMIT_STORED))
+     *   but SYSFILE->lastCompletedGCI[nodeId] has not been updated
+     *   as node has not yet completed it's first LCP, causing it to return
+     *   GCP_SAVEREF (which makes SYSFILE->lastCompletedGCI[nodeId] be left
+     *   untouched)
+     *
+     * I.e crash during node-restart
+     */
+    ncrReplicaPtr.p->createGci[noCrashedReplicas] = ZINIT_CREATE_GCI;
+  }
   
 }//Dbdih::newCrashedReplica()
 
@@ -15125,8 +15166,37 @@ void Dbdih::packCrashedReplicas(ReplicaR
     replicaPtr.p->replicaLastGci[i] = replicaPtr.p->replicaLastGci[i + 1];
   }//for
   replicaPtr.p->noCrashedReplicas--;
+  replicaPtr.p->createGci[replicaPtr.p->noCrashedReplicas + 1] =
+    ZINIT_CREATE_GCI;
+  replicaPtr.p->replicaLastGci[replicaPtr.p->noCrashedReplicas + 1] =
+    ZINIT_REPLICA_LAST_GCI;
 }//Dbdih::packCrashedReplicas()
 
+void
+Dbdih::mergeCrashedReplicas(ReplicaRecordPtr replicaPtr)
+{
+  /**
+   * merge adjacent redo-intervals
+   */
+  for (Uint32 i = replicaPtr.p->noCrashedReplicas; i > 0; i--)
+  {
+    jam();
+    if (replicaPtr.p->createGci[i] == 1 + replicaPtr.p->replicaLastGci[i-1])
+    {
+      jam();
+      replicaPtr.p->replicaLastGci[i-1] = replicaPtr.p->replicaLastGci[i];
+      replicaPtr.p->createGci[i] = ZINIT_CREATE_GCI;
+      replicaPtr.p->replicaLastGci[i] = ZINIT_REPLICA_LAST_GCI;
+      replicaPtr.p->noCrashedReplicas--;
+    }
+    else
+    {
+      jam();
+      break;
+    }
+  }
+}
+
 void Dbdih::prepareReplicas(FragmentstorePtr fragPtr)
 {
   ReplicaRecordPtr prReplicaPtr;
@@ -15218,10 +15288,8 @@ void Dbdih::readReplica(RWFragment* rf, 
   for(i = noCrashedReplicas; i<MAX_CRASHED_REPLICAS; i++){
     readReplicaPtr.p->createGci[i] = readPageWord(rf);
     readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
-    // They are not initialized...
-    readReplicaPtr.p->createGci[i] = 0;
-    readReplicaPtr.p->replicaLastGci[i] = ~0;
   }
+
   /* ---------------------------------------------------------------------- */
   /*       IF THE LAST COMPLETED LOCAL CHECKPOINT IS VALID AND LARGER THAN  */
   /*       THE LAST COMPLETED CHECKPOINT THEN WE WILL INVALIDATE THIS LOCAL */
@@ -15261,13 +15329,6 @@ void Dbdih::readReplica(RWFragment* rf, 
    */
   //removeOldCrashedReplicas(readReplicaPtr);
   
-  /* --------------------------------------------------------------------- */
-  // We set the last GCI of the replica that was alive before the node
-  // crashed last time. We set it to the last GCI which the node participated in.
-  /* --------------------------------------------------------------------- */
-  ndbrequire(readReplicaPtr.p->noCrashedReplicas < MAX_CRASHED_REPLICAS);
-  readReplicaPtr.p->replicaLastGci[readReplicaPtr.p->noCrashedReplicas] = 
-    SYSFILE->lastCompletedGCI[readReplicaPtr.p->procNode];
   /* ---------------------------------------------------------------------- */
   /*       FIND PROCESSOR RECORD                                            */
   /* ---------------------------------------------------------------------- */
@@ -15422,7 +15483,7 @@ void Dbdih::removeNodeFromStored(Uint32 
   if (!temporary)
   {
     jam();
-    newCrashedReplica(nodeId, replicatePtr);
+    newCrashedReplica(replicatePtr);
   }
   else
   {
@@ -15436,8 +15497,10 @@ void Dbdih::removeNodeFromStored(Uint32 
 /*************************************************************************/
 /*       REMOVE ANY OLD CRASHED REPLICAS THAT ARE NOT RESTORABLE ANY MORE*/
 /*************************************************************************/
-void Dbdih::removeOldCrashedReplicas(ReplicaRecordPtr rocReplicaPtr) 
+void Dbdih::removeOldCrashedReplicas(Uint32 tab, Uint32 frag,
+                                     ReplicaRecordPtr rocReplicaPtr)
 {
+  mergeCrashedReplicas(rocReplicaPtr);
   while (rocReplicaPtr.p->noCrashedReplicas > 0) {
     jam();
     /* --------------------------------------------------------------------- */
@@ -15454,15 +15517,30 @@ void Dbdih::removeOldCrashedReplicas(Rep
       break;
     }//if
   }//while
-  if (rocReplicaPtr.p->createGci[0] < SYSFILE->keepGCI){
+
+  while (rocReplicaPtr.p->createGci[0] < SYSFILE->keepGCI)
+  {
     jam();
     /* --------------------------------------------------------------------- */
     /*       MOVE FORWARD THE CREATE GCI TO A GCI THAT CAN BE USED. WE HAVE  */
     /*       NO CERTAINTY IN FINDING ANY LOG RECORDS FROM OLDER GCI'S.       */
     /* --------------------------------------------------------------------- */
     rocReplicaPtr.p->createGci[0] = SYSFILE->keepGCI;
-  }//if
-}//Dbdih::removeOldCrashedReplicas()
+
+    if (rocReplicaPtr.p->noCrashedReplicas)
+    {
+      /**
+       * a REDO interval while is from 78 to 14 is not usefull
+       *   but rather harmful, remove it...
+       */
+      if (rocReplicaPtr.p->createGci[0] > rocReplicaPtr.p->replicaLastGci[0])
+      {
+        jam();
+        packCrashedReplicas(rocReplicaPtr);
+      }
+    }
+  }
+}
 
 void Dbdih::removeOldStoredReplica(FragmentstorePtr fragPtr,
                                    ReplicaRecordPtr replicatePtr) 
@@ -15533,9 +15611,9 @@ void Dbdih::removeTooNewCrashedReplicas(
         SYSFILE->newestRestorableGCI){
       jam();
       rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] = 
-	(Uint32)-1;
+	ZINIT_CREATE_GCI;
       rtnReplicaPtr.p->replicaLastGci[rtnReplicaPtr.p->noCrashedReplicas - 1] = 
-	(Uint32)-1;
+	ZINIT_REPLICA_LAST_GCI;
       rtnReplicaPtr.p->noCrashedReplicas--;
     } else {
       break;

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2009-10-26 14:25:11 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2009-10-30 14:10:30 +0000
@@ -12342,6 +12342,7 @@ void Dblqh::execLCP_FRAG_ORD(Signal* sig
   if (lcpFragOrd->lastFragmentFlag)
   {
     jam();
+    CRASH_INSERTION(5054);
     if (lcpPtr.p->lcpState == LcpRecord::LCP_IDLE) {
       jam();
       /* ----------------------------------------------------------

=== modified file 'storage/ndb/test/ndbapi/testSystemRestart.cpp'
--- a/storage/ndb/test/ndbapi/testSystemRestart.cpp	2009-10-27 10:13:15 +0000
+++ b/storage/ndb/test/ndbapi/testSystemRestart.cpp	2009-10-30 14:10:30 +0000
@@ -2193,6 +2193,80 @@ loop:
   return NDBT_OK;
 }
 
+int
+runBug48436(NDBT_Context* ctx, NDBT_Step* step)
+{
+  NdbRestarter res;
+  Uint32 loops = ctx->getNumLoops();
+  const Uint32 nodeCount = res.getNumDbNodes();
+  if(nodeCount < 2)
+  {
+    return NDBT_OK;
+  }
+
+  for (Uint32 l = 0; l<loops; l++)
+  {
+    int nodes[2];
+    nodes[0] = res.getNode(NdbRestarter::NS_RANDOM);
+    nodes[1] = res.getRandomNodeSameNodeGroup(nodes[0], rand());
+    int val = 7099;
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+
+    ndbout_c("nodes %u %u", nodes[0], nodes[1]);
+
+    for (Uint32 j = 0; j<5; j++)
+    {
+      int c = (rand()) % 10;
+      ndbout_c("case: %u", c);
+      switch(c){
+      case 0:
+      case 1:
+        res.dumpStateAllNodes(&val, 1);
+      case 2:
+      case 3:
+      case 4:
+      case 5:
+        res.restartOneDbNode(nodes[0], false, true, true);
+        res.waitNodesNoStart(nodes+0,1);
+        res.dumpStateOneNode(nodes[0], val2, 2);
+        res.insertErrorInNode(nodes[0], 5054); // crash during restart
+        res.startAll();
+        sleep(3);
+        res.waitNodesNoStart(nodes+0,1);
+        res.startAll();
+        break;
+      case 6:
+        res.restartOneDbNode(nodes[0], false, true, true);
+        res.waitNodesNoStart(nodes+0, 1);
+        res.startAll();
+        break;
+      case 7:
+        res.dumpStateAllNodes(&val, 1);
+      case 8:
+        res.restartOneDbNode(nodes[1], false, true, true);
+        res.waitNodesNoStart(nodes+1,1);
+        res.dumpStateOneNode(nodes[1], val2, 2);
+        res.insertErrorInNode(nodes[1], 5054); // crash during restart
+        res.startAll();
+        sleep(3);
+        res.waitNodesNoStart(nodes+1,1);
+        res.startAll();
+        break;
+      case 9:
+        res.restartAll(false, true, true);
+        res.waitClusterNoStart();
+        res.startAll();
+      }
+      res.waitClusterStarted();
+    }
+    res.restartAll(false, true, true);
+    res.waitClusterNoStart();
+    res.startAll();
+    res.waitClusterStarted();
+  }
+
+  return NDBT_OK;
+}
 
 NDBT_TESTSUITE(testSystemRestart);
 TESTCASE("SR1", 
@@ -2531,6 +2605,10 @@ TESTCASE("Bug46412", "")
 {
   INITIALIZER(runBug46412);
 }
+TESTCASE("Bug48436", "")
+{
+  INITIALIZER(runBug48436);
+}
 NDBT_TESTSUITE_END(testSystemRestart);
 
 int main(int argc, const char** argv){

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2009-10-26 20:51:04 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2009-10-30 14:10:30 +0000
@@ -1223,6 +1223,10 @@ max-time: 1500
 cmd: testSystemRestart
 args: -n Bug41915 D2
 
+max-time: 1500
+cmd: testSystemRestart
+args: -n Bug48436 T1
+
 max-time: 300
 cmd: test_event
 args: -n Bug31701 T1


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20091030141030-q6lm3hh915nbfezt.bundle
Thread
bzr commit into mysql-5.1-telco-7.0 branch (jonas:3179)Jonas Oreland30 Oct