List:Commits« Previous MessageNext Message »
From:jonas Date:November 30 2007 12:00pm
Subject:bk commit into 5.1 tree (jonas:1.2696)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-11-30 12:00:31+01:00, jonas@stripped +13 -0
  ndb - TO during SR
    now config option, 
    wait-to-sr = true - wait for all nodes to do TO before running last startphases (old
behaviour)
    wait-to-sr = false - all cluster to start, and let TO nodes start (parallel)
afterwards
  
          

  storage/ndb/include/kernel/signaldata/CntrStart.hpp@stripped, 2007-11-30 12:00:28+01:00,
jonas@stripped +19 -0
    Create signal data-struct for CntrWaitRep

  storage/ndb/include/kernel/signaldata/StartRec.hpp@stripped, 2007-11-30 12:00:28+01:00,
jonas@stripped +3 -1
    Add ndb-node bitmask for nodes actually doing StartRecReq *now*

  storage/ndb/src/common/debugger/signaldata/StartRec.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +15 -5
    Add ndb-node bitmask for nodes actually doing StartRecReq *now*

  storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +4 -0
    Add bitmask of node(s) that participate/been excluded from SR
      (due to TO)

  storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +1 -0
    Add new config (not in config.ini yet)

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +114 -135
    Fix TO during SR

  storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +7 -2
    Add enum for different values of cstartRecReq

  storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +41 -12
    Add enum for different values of cstartRecReq

  storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +7 -10
    1) create signal data-struct for CntrWaitRep
    2) Add new WaitRep "data", when node is forced to doing TO
    3) When that happens, let Cntr send START_COPYREQ
       (would be nice if it did that always, + START_(PERM/ME)_REQ

  storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +3 -0
    Add signal receptors

  storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +135 -22
    Handle TO during SR
    - note need to keep "fake" dihStartType...

  storage/ndb/src/kernel/blocks/suma/Suma.cpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +32 -27
    Fix SUMA to do READ_NODES req late enough so that TO during SR work(s) correctly

  storage/ndb/src/kernel/blocks/suma/Suma.hpp@stripped, 2007-11-30 12:00:29+01:00,
jonas@stripped +2 -0
    Fix SUMA to do READ_NODES req late enough so that TO during SR work(s) correctly

diff -Nrup a/storage/ndb/include/kernel/signaldata/CntrStart.hpp
b/storage/ndb/include/kernel/signaldata/CntrStart.hpp
--- a/storage/ndb/include/kernel/signaldata/CntrStart.hpp	2006-12-31 01:06:41 +01:00
+++ b/storage/ndb/include/kernel/signaldata/CntrStart.hpp	2007-11-30 12:00:28 +01:00
@@ -81,4 +81,23 @@ private:
   Uint32 startingNodes[NdbNodeBitmask::Size];
 };
 
+struct CntrWaitRep
+{
+  Uint32 nodeId;
+  Uint32 waitPoint;
+
+  enum WaitPos
+  {
+    ZWAITPOINT_4_1  = 1
+    ,ZWAITPOINT_4_2 = 2
+    ,ZWAITPOINT_5_1 = 3
+    ,ZWAITPOINT_5_2 = 4
+    ,ZWAITPOINT_6_1 = 5
+    ,ZWAITPOINT_6_2 = 6
+    ,ZWAITPOINT_7_1 = 7
+    ,ZWAITPOINT_7_2 = 8
+    ,ZWAITPOINT_4_2_TO = 9 // We are forced to TO (during SR)
+  };
+};
+
 #endif
diff -Nrup a/storage/ndb/include/kernel/signaldata/StartRec.hpp
b/storage/ndb/include/kernel/signaldata/StartRec.hpp
--- a/storage/ndb/include/kernel/signaldata/StartRec.hpp	2007-11-23 08:15:12 +01:00
+++ b/storage/ndb/include/kernel/signaldata/StartRec.hpp	2007-11-30 12:00:28 +01:00
@@ -17,6 +17,7 @@
 #define START_REC_HPP
 
 #include "SignalData.hpp"
+#include <NodeBitmask.hpp>
 
 class StartRecReq {
   /**
@@ -30,7 +31,7 @@ class StartRecReq {
 
   friend bool printSTART_REC_REQ(FILE *, const Uint32 *, Uint32, Uint16);  
 public:
-  STATIC_CONST( SignalLength = 6 );
+  STATIC_CONST( SignalLength = 6 + NdbNodeBitmask::Size);
 private:
   
   Uint32 receivingNodeId;
@@ -39,6 +40,7 @@ private:
   Uint32 lastCompletedGci;
   Uint32 newestGci;
   Uint32 senderData;
+  Uint32 sr_nodes[NdbNodeBitmask::Size];
 };
 
 class StartRecConf {
diff -Nrup a/storage/ndb/src/common/debugger/signaldata/StartRec.cpp
b/storage/ndb/src/common/debugger/signaldata/StartRec.cpp
--- a/storage/ndb/src/common/debugger/signaldata/StartRec.cpp	2006-12-23 20:20:11 +01:00
+++ b/storage/ndb/src/common/debugger/signaldata/StartRec.cpp	2007-11-30 12:00:29 +01:00
@@ -30,10 +30,19 @@ printSTART_REC_REQ(FILE * output, 
 	  refToNode(sig->senderRef),
 	  refToBlock(sig->senderRef));
   
-  fprintf(output, " keepGci: %d lastCompletedGci: %d newestGci: %d\n",
+  fprintf(output, 
+          " keepGci: %d lastCompletedGci: %d newestGci: %d senderData: %x\n",
 	  sig->keepGci, 
 	  sig->lastCompletedGci,
-	  sig->newestGci);
+	  sig->newestGci,
+          sig->senderData);
+
+  NdbNodeBitmask mask;
+  mask.assign(NdbNodeBitmask::Size, sig->sr_nodes);
+  
+  char buf[100];
+  fprintf(output,
+          " sr_nodes: %s", mask.getText(buf));
 
   return true;
 }
@@ -45,9 +54,10 @@ printSTART_REC_CONF(FILE * output, 
 		    Uint16 recBlockNo){
   StartRecConf * sig = (StartRecConf *) theData;
 
-  fprintf(output, " startingNodeId: %d\n",
-	  sig->startingNodeId);
-
+  fprintf(output, " startingNodeId: %d senderData\n",
+	  sig->startingNodeId,
+          sig->senderData);
+  
   return true;
 }
 
diff -Nrup a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
--- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2007-11-23 16:08:44 +01:00
+++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2007-11-30 12:00:29 +01:00
@@ -1731,6 +1731,10 @@ private:
 #endif
 
   bool check_enable_micro_gcp(Signal* signal, bool broadcast);
+
+  bool c_sr_wait_to;
+  NdbNodeBitmask m_sr_nodes;
+  NdbNodeBitmask m_to_nodes;
 };
 
 #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32)
diff -Nrup a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2007-11-23 08:15:12 +01:00
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2007-11-30 12:00:29 +01:00
@@ -66,6 +66,7 @@ void Dbdih::initData() 
   c_blockCommitNo  = 1;
   cntrlblockref    = RNIL;
   c_set_initial_start_flag = FALSE;
+  c_sr_wait_to = false;
 }//Dbdih::initData()
 
 void Dbdih::initRecords() 
diff -Nrup a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-11-26 14:22:15 +01:00
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-11-30 12:00:29 +01:00
@@ -241,6 +241,13 @@ void Dbdih::sendSTART_INFOREQ(Signal* si
 
 void Dbdih::sendSTART_RECREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
 {
+  if (!m_sr_nodes.get(nodeId))
+  {
+    jam();
+    c_START_RECREQ_Counter.clearWaitingFor(nodeId);
+    return;
+  }
+  
   StartRecReq * const req = (StartRecReq*)&signal->theData[0];
   BlockReference ref = calcLqhBlockRef(nodeId);
   req->receivingNodeId = nodeId;
@@ -249,6 +256,7 @@ void Dbdih::sendSTART_RECREQ(Signal* sig
   req->lastCompletedGci = SYSFILE->lastCompletedGCI[nodeId];
   req->newestGci = SYSFILE->newestRestorableGCI;
   req->senderData = extra;
+  m_sr_nodes.copyto(NdbNodeBitmask::Size, req->sr_nodes);
   sendSignal(ref, GSN_START_RECREQ, signal, StartRecReq::SignalLength, JBB);
 
   signal->theData[0] = NDB_LE_StartREDOLog;
@@ -1470,10 +1478,6 @@ void Dbdih::execNDB_STTOR(Signal* signal
       return;
     case NodeState::ST_SYSTEM_RESTART:
       jam();
-      if (isMaster()) {
-	jam();
-	systemRestartTakeOverLab(signal);
-      }
       ndbsttorry10Lab(signal, __LINE__);
       return;
     case NodeState::ST_INITIAL_NODE_RESTART:
@@ -1527,7 +1531,6 @@ void Dbdih::execNDB_STTOR(Signal* signal
       jam();
       {
         StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
-        req->startingNodeId = getOwnNodeId();
         req->senderRef = reference();
         req->senderData = RNIL;
         req->flags = StartCopyReq::WAIT_LCP;
@@ -1709,6 +1712,26 @@ void Dbdih::ndbStartReqLab(Signal* signa
   
   ndbrequire(isMaster());
   copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
+
+  /**
+   * Keep bitmap of nodes that can be restored...
+   *   and nodes that need take-over
+   *   
+   */
+  m_sr_nodes.clear();
+  m_to_nodes.clear();
+
+  // Start with assumption that all can restore
+  {
+    NodeRecordPtr specNodePtr;
+    specNodePtr.i = cfirstAliveNode;
+    do {
+      jam();
+      m_sr_nodes.set(specNodePtr.i);
+      ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);            
+      specNodePtr.i = specNodePtr.p->nextNode;
+    } while (specNodePtr.i != RNIL);
+  }
 }//Dbdih::ndbStartReqLab()
 
 void Dbdih::execREAD_NODESCONF(Signal* signal) 
@@ -1974,10 +1997,44 @@ void Dbdih::execSTART_MECONF(Signal* sig
 void Dbdih::execSTART_COPYCONF(Signal* signal) 
 {
   jamEntry();
-  Uint32 nodeId = signal->theData[0];
-  ndbrequire(nodeId == cownNodeId);
-  CRASH_INSERTION(7132);
-  ndbsttorry10Lab(signal, __LINE__);
+  
+  StartCopyConf* conf = (StartCopyConf*)signal->getDataPtr();
+  Uint32 nodeId = conf->startingNodeId;
+  Uint32 senderData = conf->senderData;
+
+  if (senderData == RNIL)
+  {
+    /**
+     * This is NR
+     */
+    jam();
+    ndbrequire(nodeId == cownNodeId);
+    CRASH_INSERTION(7132);
+    ndbsttorry10Lab(signal, __LINE__);
+  }
+  else
+  {
+    /**
+     * This is TO during SR...waiting for all nodes
+     */
+    infoEvent("Take-over of %u complete", nodeId);
+
+    ndbrequire(senderData == getOwnNodeId());
+    ndbrequire(m_to_nodes.get(nodeId));
+    m_to_nodes.clear(nodeId);
+    m_sr_nodes.set(nodeId);
+    if (!m_to_nodes.isclear())
+    {
+      jam();
+      return;
+    }
+
+    signal->theData[0] = reference();
+    m_sr_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
+    sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal, 
+               1 + NdbNodeBitmask::Size, JBB);
+    return;
+  }
   return;
 }//Dbdih::execSTART_COPYCONF()
 
@@ -2603,114 +2660,6 @@ void Dbdih::execINCL_NODEREQ(Signal* sig
 // both the master and the slaves.
 /* ------------------------------------------------------------------------- */
 
-/*****************************************************************************/
-/***********     TAKE OVER DECISION  MODULE                      *************/
-/*****************************************************************************/
-// This module contains the subroutines that take the decision whether to take
-// over a node now or not.
-/* ------------------------------------------------------------------------- */
-/*                       MASTER LOGIC FOR SYSTEM RESTART                     */
-/* ------------------------------------------------------------------------- */
-// WE ONLY COME HERE IF WE ARE THE MASTER AND WE ARE PERFORMING A SYSTEM
-// RESTART. WE ALSO COME HERE DURING THIS SYSTEM RESTART ONE TIME PER NODE
-// THAT NEEDS TAKE OVER.
-/*---------------------------------------------------------------------------*/
-// WE CHECK IF ANY NODE NEEDS TO BE TAKEN OVER AND THE TAKE OVER HAS NOT YET
-// BEEN STARTED OR COMPLETED.
-/*---------------------------------------------------------------------------*/
-void
-Dbdih::systemRestartTakeOverLab(Signal* signal) 
-{
-  NodeRecordPtr nodePtr;
-  StartCopyReq req;
-  req.senderData = RNIL;
-  req.senderRef = reference();
-  req.flags = 0;
-
-  for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
-    jam();
-    ptrAss(nodePtr, nodeRecord);
-    switch (nodePtr.p->activeStatus) {
-    case Sysfile::NS_Active:
-    case Sysfile::NS_ActiveMissed_1:
-      jam();
-      break;
-      /*---------------------------------------------------------------------*/
-      // WE HAVE NOT REACHED A STATE YET WHERE THIS NODE NEEDS TO BE TAKEN OVER
-      /*---------------------------------------------------------------------*/
-    case Sysfile::NS_ActiveMissed_2:
-    case Sysfile::NS_NotActive_NotTakenOver:
-      jam();
-      /*---------------------------------------------------------------------*/
-      // THIS NODE IS IN TROUBLE. 
-      // WE MUST SUCCEED WITH A LOCAL CHECKPOINT WITH THIS NODE TO REMOVE THE 
-      // DANGER. IF THE NODE IS NOT ALIVE THEN THIS WILL NOT BE
-      // POSSIBLE AND WE CAN START THE TAKE OVER IMMEDIATELY IF WE HAVE ANY 
-      // NODES THAT CAN PERFORM A TAKE OVER.
-      /*---------------------------------------------------------------------*/
-      if (nodePtr.p->nodeStatus != NodeRecord::ALIVE) {
-        jam();
-        Uint32 ThotSpareNode = findHotSpare();
-        if (ThotSpareNode != RNIL) {
-          jam();
-          startTakeOver(signal, ThotSpareNode, nodePtr.i, &req);
-        }//if
-      } else if(nodePtr.p->activeStatus == Sysfile::NS_NotActive_NotTakenOver){
-        jam();
-	/*-------------------------------------------------------------------*/
-	// NOT ACTIVE NODES THAT HAVE NOT YET BEEN TAKEN OVER NEEDS TAKE OVER
-	// IMMEDIATELY. IF WE ARE ALIVE WE TAKE OVER OUR OWN NODE.
-	/*-------------------------------------------------------------------*/
-	infoEvent("Take over of node %d started", 
-		  nodePtr.i);
-	startTakeOver(signal, nodePtr.i, nodePtr.i, &req);
-      }//if
-      break;
-    case Sysfile::NS_TakeOver:
-      /**-------------------------------------------------------------------
-       * WE MUST HAVE FAILED IN THE MIDDLE OF THE TAKE OVER PROCESS. 
-       * WE WILL CONCLUDE THE TAKE OVER PROCESS NOW.
-       *-------------------------------------------------------------------*/
-      if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
-        jam();
-        Uint32 takeOverNode = Sysfile::getTakeOverNode(nodePtr.i, 
-						       SYSFILE->takeOver);
-	if(takeOverNode == 0){
-	  jam();
-	  warningEvent("Bug in take-over code restarting");
-	  takeOverNode = nodePtr.i;
-	}
-        startTakeOver(signal, nodePtr.i, takeOverNode, &req);
-      } else {
-        jam();
-	/**-------------------------------------------------------------------
-	 * We are not currently taking over, change our active status.
-	 *-------------------------------------------------------------------*/
-        nodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
-        setNodeRestartInfoBits();
-      }//if
-      break;
-    case Sysfile::NS_HotSpare:
-      jam();
-      break;
-      /*---------------------------------------------------------------------*/
-      // WE NEED NOT TAKE OVER NODES THAT ARE HOT SPARE.
-      /*---------------------------------------------------------------------*/
-    case Sysfile::NS_NotDefined:
-      jam();
-      break;
-      /*---------------------------------------------------------------------*/
-      // WE NEED NOT TAKE OVER NODES THAT DO NOT EVEN EXIST IN THE CLUSTER.
-      /*---------------------------------------------------------------------*/
-    default:
-      ndbrequire(false);
-      break;
-    }//switch
-  }//for
-  /*-------------------------------------------------------------------------*/
-  /* NO TAKE OVER HAS BEEN INITIATED.                                        */
-  /*-------------------------------------------------------------------------*/
-}//Dbdih::systemRestartTakeOverLab()
 
 void Dbdih::changeNodeGroups(Uint32 startNode, Uint32 nodeTakenOver)
 {
@@ -3467,6 +3416,7 @@ done:
 void
 Dbdih::nr_run_redo(Signal* signal, TakeOverRecordPtr takeOverPtr)
 {
+  m_sr_nodes.set(takeOverPtr.p->toStartingNode);
   takeOverPtr.p->toCurrentTabref = 0;
   takeOverPtr.p->toCurrentFragid = 0;
   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_RUN_REDO;
@@ -9214,6 +9164,8 @@ Dbdih::resetReplicaSr(TabRecordPtr tabPt
 			" for table %d fragment: %d",
 			nodePtr.i, tabPtr.i, i);
 	      
+              m_sr_nodes.clear(nodePtr.i);
+              m_to_nodes.set(nodePtr.i);
 	      setNodeActiveStatus(nodePtr.i, 
 				  Sysfile::NS_NotActive_NotTakenOver);
 	    }
@@ -9829,19 +9781,55 @@ void Dbdih::execSTART_RECCONF(Signal* si
     Ptr<TakeOverRecord> takeOverPtr;
     c_takeOverPool.getPtr(takeOverPtr, senderData);
     sendStartTo(signal, takeOverPtr);
+    return;
   }
-  else
+
+  /* --------------------------------------------------------------------- */
+  // This was the system restart case. We set the state indicating that the
+  // node has completed restoration of all fragments.
+  /* --------------------------------------------------------------------- */
+  receiveLoopMacro(START_RECREQ, senderNodeId);
+  
+  /**
+   * Remove each node that has to TO from LCP/LQH
+   */
+  Uint32 i = 0;
+  while ((i = m_to_nodes.find(i + 1)) != NdbNodeBitmask::NotFound)
   {
-    /* --------------------------------------------------------------------- */
-    // This was the system restart case. We set the state indicating that the
-    // node has completed restoration of all fragments.
-    /* --------------------------------------------------------------------- */
-    receiveLoopMacro(START_RECREQ, senderNodeId);
+    jam();
+    NodeRecordPtr nodePtr;
+    nodePtr.i = i;
+    ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
+    nodePtr.p->copyCompleted = 0;
+  }
 
-    signal->theData[0] = reference();
-    sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal, 1, JBB);
+  if (!m_to_nodes.isclear() && c_sr_wait_to)
+  {
+    jam();
+
+    StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
+    req->senderRef = reference();
+    req->senderData = getOwnNodeId();
+    req->flags = 0; // Note dont wait for LCP
+
+    i = 0;
+    while ((i = m_to_nodes.find(i + 1)) != NdbNodeBitmask::NotFound)
+    {
+      jam();
+      req->startingNodeId = i;
+      sendSignal(calcDihBlockRef(i), GSN_START_COPYREQ, signal, 
+                 StartCopyReq::SignalLength, JBB);
+    }
+
+    char buf[100];
+    infoEvent("Starting take-over of %s", m_to_nodes.getText(buf));    
     return;
-  }//if
+  }
+  
+  signal->theData[0] = reference();
+  m_sr_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
+  sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal, 
+             1 + NdbNodeBitmask::Size, JBB);
 }//Dbdih::execSTART_RECCONF()
 
 void Dbdih::copyNodeLab(Signal* signal, Uint32 tableId) 
@@ -10464,11 +10452,6 @@ Dbdih::startLcpMutex_locked(Signal* sign
   req->participatingLQH = c_lcpState.m_participatingLQH;
   req->participatingDIH = c_lcpState.m_participatingDIH;
   sendLoopMacro(START_LCP_REQ, sendSTART_LCP_REQ, RNIL);
-  
-  char buf0[100], buf1[100];
-  infoEvent("SLR: %s %s", 
-            c_lcpState.m_participatingDIH.getText(buf0),
-            c_lcpState.m_participatingLQH.getText(buf1));
 }
 
 void
@@ -12054,10 +12037,6 @@ void Dbdih::checkKeepGci(TabRecordPtr ta
         jam();
         c_lcpState.oldestRestorableGci = oldestRestorableGci;
       }//if
-    }
-    else
-    {
-      ndbout_c("dont consicider LCP for node %u", ckgReplicaPtr.p->procNode);
     }
     ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
   }//while
diff -Nrup a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2007-11-23 08:22:01 +01:00
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2007-11-30 12:00:29 +01:00
@@ -2774,9 +2774,14 @@ private:
 /*RECEPTION OF THIS SIGNAL INDICATES THAT ALL FRAGMENTS THAT THIS NODE       */
 /*SHOULD START HAVE BEEN RECEIVED.                                           */
 /* ------------------------------------------------------------------------- */
-  Uint8 cstartRecReq;
+  enum { 
+    SRR_INITIAL                = 0
+    ,SRR_START_REC_REQ_ARRIVED = 1
+    ,SRR_REDO_COMPLETE         = 2
+    ,SRR_FIRST_LCP_DONE        = 3
+  } cstartRecReq;
   Uint32 cstartRecReqData;
-
+  
 /* ------------------------------------------------------------------------- */
 /*THIS VARIABLE KEEPS TRACK OF HOW MANY FRAGMENTS THAT PARTICIPATE IN        */
 /*EXECUTING THE LOG. IF ZERO WE DON'T NEED TO EXECUTE THE LOG AT ALL.        */
diff -Nrup a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-11-23 16:08:44 +01:00
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2007-11-30 12:00:29 +01:00
@@ -446,7 +446,7 @@ void Dblqh::execCONTINUEB(Signal* signal
     else
     {
       jam();
-      cstartRecReq = 2;
+      cstartRecReq = SRR_REDO_COMPLETE;
       ndbrequire(c_lcp_complete_fragments.isEmpty());
       StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
       conf->startingNodeId = getOwnNodeId();
@@ -11999,11 +11999,11 @@ void Dblqh::sendLCP_COMPLETE_REP(Signal*
     sendEMPTY_LCP_CONF(signal, true);
   }
 
-  if (getNodeState().getNodeRestartInProgress() && cstartRecReq != 3)
+  if (cstartRecReq < SRR_FIRST_LCP_DONE)
   {
     jam();
-    ndbrequire(cstartRecReq == 2);
-    cstartRecReq = 3;
+    ndbrequire(cstartRecReq == SRR_REDO_COMPLETE);
+    cstartRecReq = SRR_FIRST_LCP_DONE;
   }
   return;
   
@@ -12248,8 +12248,11 @@ void Dblqh::execGCP_SAVEREQ(Signal* sign
     return;
   }
 
-  if (getNodeState().getNodeRestartInProgress() && cstartRecReq < 2)
+  if (cstartRecReq < SRR_REDO_COMPLETE)
   {
+    /**
+     * REDO running is not complete
+     */
     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
     saveRef->dihPtr = dihPtr;
     saveRef->nodeId = getOwnNodeId();
@@ -12301,8 +12304,11 @@ void Dblqh::execGCP_SAVEREQ(Signal* sign
     cnewestGci = gci;
   }//if
 
-  if(getNodeState().getNodeRestartInProgress() && cstartRecReq < 3)
+  if(cstartRecReq < SRR_FIRST_LCP_DONE)
   {
+    /**
+     * First LCP has not been done
+     */
     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
     saveRef->dihPtr = dihPtr;
     saveRef->nodeId = getOwnNodeId();
@@ -14387,7 +14393,8 @@ void Dblqh::execRESTORE_LCP_CONF(Signal*
     return;
   }
 
-  if (c_lcp_restoring_fragments.isEmpty() && cstartRecReq == 1)
+  if (c_lcp_restoring_fragments.isEmpty() && 
+      cstartRecReq == SRR_START_REC_REQ_ARRIVED)
   {
     jam();
     /* ----------------------------------------------------------------
@@ -14429,7 +14436,29 @@ void Dblqh::execSTART_RECREQ(Signal* sig
   ndbrequire(req->receivingNodeId == cownNodeid);
 
   cnewestCompletedGci = cnewestGci;
-  cstartRecReq = 1;
+  cstartRecReq = SRR_START_REC_REQ_ARRIVED; // StartRecReq has arrived
+  
+  if (signal->getLength() == StartRecReq::SignalLength)
+  {
+    jam();
+    NdbNodeBitmask tmp;
+    tmp.assign(NdbNodeBitmask::Size, req->sr_nodes);
+    if (!tmp.equal(m_sr_nodes))
+    {
+      char buf0[100], buf1[100];
+      ndbout_c("execSTART_RECREQ chaning srnodes from %s to %s",
+               m_sr_nodes.getText(buf0),
+               tmp.getText(buf1));
+      
+    }
+    m_sr_nodes.assign(NdbNodeBitmask::Size, req->sr_nodes);
+  }
+  else
+  {
+    jam();
+    cstartRecReqData = RNIL;
+  }
+  
   for (logPartPtr.i = 0; logPartPtr.i < 4; logPartPtr.i++) {
     ptrAss(logPartPtr, logPartRecord);
     logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci;
@@ -14497,7 +14526,7 @@ void Dblqh::execSTART_RECCONF(Signal* si
   if(cstartType == NodeState::ST_INITIAL_NODE_RESTART)
   {
     jam();
-    cstartRecReq = 2;
+    cstartRecReq = SRR_REDO_COMPLETE; // REDO complete
 
     StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
     conf->startingNodeId = getOwnNodeId();
@@ -16259,7 +16288,7 @@ void Dblqh::srFourthComp(Signal* signal)
     
     ndbrequire(cinitialStartOngoing == ZTRUE);
     cinitialStartOngoing = ZFALSE;
-
+    cstartRecReq = SRR_REDO_COMPLETE;
     checkStartCompletedLab(signal);
     return;
   } else if ((cstartType == NodeState::ST_NODE_RESTART) ||
@@ -16278,7 +16307,7 @@ void Dblqh::srFourthComp(Signal* signal)
 	return;
       }
     }
-    cstartRecReq = 2;
+    cstartRecReq = SRR_REDO_COMPLETE; // REDO complete
     StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
     conf->startingNodeId = getOwnNodeId();
     conf->senderData = cstartRecReqData;
@@ -17172,7 +17201,7 @@ void Dblqh::initialiseRecordsLab(Signal*
     cnoActiveCopy = 0;
     ccurrentGcprec = RNIL;
     caddNodeState = ZFALSE;
-    cstartRecReq = 0;
+    cstartRecReq = SRR_INITIAL; // Initial
     cnewestGci = 0;
     cnewestCompletedGci = 0;
     crestartOldestGci = 0;
diff -Nrup a/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
b/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp
--- a/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp	2007-11-23 08:15:12 +01:00
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/Ndbcntr.hpp	2007-11-30 12:00:29 +01:00
@@ -60,14 +60,6 @@
 #define ZSTART_PHASE_8 8
 #define ZSTART_PHASE_9 9
 #define ZSTART_PHASE_END 255
-#define ZWAITPOINT_4_1 1
-#define ZWAITPOINT_4_2 2
-#define ZWAITPOINT_5_1 3
-#define ZWAITPOINT_5_2 4
-#define ZWAITPOINT_6_1 5
-#define ZWAITPOINT_6_2 6
-#define ZWAITPOINT_7_1 7
-#define ZWAITPOINT_7_2 8
 #define ZSYSTAB_VERSION 1
 #endif
 
@@ -87,9 +79,10 @@ public:
     
     void reset();
     NdbNodeBitmask m_starting;
-    NdbNodeBitmask m_waiting; // == (m_withLog | m_withoutLog)
+    NdbNodeBitmask m_waiting; // == (m_withLog | m_withoutLog | m_waitTO)
     NdbNodeBitmask m_withLog;
     NdbNodeBitmask m_withoutLog;
+    NdbNodeBitmask m_waitTO;
     Uint32 m_lastGci;
     Uint32 m_lastGciNodeId;
 
@@ -258,12 +251,15 @@ private:
   void ph7ALab(Signal* signal);
   void ph8ALab(Signal* signal);
 
-
   void waitpoint41Lab(Signal* signal);
   void waitpoint51Lab(Signal* signal);
   void waitpoint52Lab(Signal* signal);
   void waitpoint61Lab(Signal* signal);
   void waitpoint71Lab(Signal* signal);
+  void waitpoint42To(Signal* signal);
+
+  void execSTART_COPYREF(Signal*);
+  void execSTART_COPYCONF(Signal*);
 
   void updateNodeState(Signal* signal, const NodeState & newState) const ;
   void getNodeGroup(Signal* signal);
@@ -308,6 +304,7 @@ private:
   Uint16 cnoStartNodes;
   UintR cnoWaitrep;
   NodeState::StartType ctypeOfStart;
+  NodeState::StartType cdihStartType;
   Uint16 cdynamicNodeId;
 
   Uint32 c_fsRemoveCount;
diff -Nrup a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp	2007-11-23 08:15:13 +01:00
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrInit.cpp	2007-11-30 12:00:29 +01:00
@@ -106,6 +106,9 @@ Ndbcntr::Ndbcntr(Block_context& ctx):
   addRecSignal(GSN_READ_CONFIG_CONF, &Ndbcntr::execREAD_CONFIG_CONF);
 
   addRecSignal(GSN_FSREMOVECONF, &Ndbcntr::execFSREMOVECONF);
+
+  addRecSignal(GSN_START_COPYREF, &Ndbcntr::execSTART_COPYREF);
+  addRecSignal(GSN_START_COPYCONF, &Ndbcntr::execSTART_COPYCONF);
   
   initData();
   ctypeOfStart = NodeState::ST_ILLEGAL_TYPE;
diff -Nrup a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-11-23 08:15:13 +01:00
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-11-30 12:00:29 +01:00
@@ -50,6 +50,8 @@
 #include <NdbOut.hpp>
 #include <NdbTick.h>
 
+#include <signaldata/TakeOver.hpp>
+
 // used during shutdown for reporting current startphase
 // accessed from Emulator.cpp, NdbShutdown()
 Uint32 g_currentStartPhase;
@@ -461,6 +463,7 @@ void Ndbcntr::execDIH_RESTARTCONF(Signal
   //cmasterDihId = signal->theData[0];
   c_start.m_lastGci = signal->theData[1];
   ctypeOfStart = NodeState::ST_SYSTEM_RESTART;
+  cdihStartType = ctypeOfStart;
   ph2ALab(signal);
   return;
 }//Ndbcntr::execDIH_RESTARTCONF()
@@ -472,6 +475,7 @@ void Ndbcntr::execDIH_RESTARTREF(Signal*
 {
   jamEntry();
   ctypeOfStart = NodeState::ST_INITIAL_START;
+  cdihStartType = ctypeOfStart;
   ph2ALab(signal);
   return;
 }//Ndbcntr::execDIH_RESTARTREF()
@@ -582,6 +586,7 @@ Ndbcntr::StartRecord::reset(){
   m_waiting.clear();
   m_withLog.clear();
   m_withoutLog.clear();
+  m_waitTO.clear();
   m_lastGci = m_lastGciNodeId = 0;
   m_startPartialTimeout = ~0;
   m_startPartitionedTimeout = ~0;
@@ -597,6 +602,7 @@ Ndbcntr::execCNTR_START_CONF(Signal * si
 
   cnoStartNodes = conf->noStartNodes;
   ctypeOfStart = (NodeState::StartType)conf->startType;
+  cdihStartType = ctypeOfStart;
   c_start.m_lastGci = conf->startGci;
   cmasterNodeId = conf->masterNodeId;
   NdbNodeBitmask tmp; 
@@ -640,6 +646,9 @@ Ndbcntr::execCNTR_START_REP(Signal* sign
   for(Uint32 i = 0; i<ALL_BLOCKS_SZ; i++){
     sendSignal(ALL_BLOCKS[i].Ref, GSN_NODE_START_REP, signal, 1, JBB);
   }
+
+  signal->theData[0] = nodeId;
+  execSTART_PERMREP(signal);
 }
 
 void
@@ -772,12 +781,42 @@ void
 Ndbcntr::startWaitingNodes(Signal * signal){
 
 #if ! PARALLELL_NR
+  if (!c_start.m_waitTO.isclear())
+  {
+    jam();
+
+    {
+      char buf[100];
+      ndbout_c("starting (TO) %s", c_start.m_waitTO.getText(buf));
+    }
+
+    /**
+     * TO during SR
+     *   this can run in parallel (nowadays :-)
+     */
+    NodeReceiverGroup rg(NDBCNTR, c_start.m_waitTO);
+    c_start.m_starting.bitOR(c_start.m_waitTO);
+    c_start.m_waiting.bitANDC(c_start.m_waitTO);
+    c_start.m_waitTO.clear();
+
+    /**
+     * They are stuck in CntrWaitRep::ZWAITPOINT_4_1
+     *   have all meta data ok...but needs START_COPYREQ
+     */
+    CntrWaitRep* rep = (CntrWaitRep*)signal->getDataPtrSend();
+    rep->nodeId = getOwnNodeId();
+    rep->waitPoint = CntrWaitRep::ZWAITPOINT_4_2_TO;
+    sendSignal(rg, GSN_CNTR_WAITREP, signal, 2, JBB);
+    return;
+  }
+
   const Uint32 nodeId = c_start.m_waiting.find(0);
   const Uint32 Tref = calcNdbCntrBlockRef(nodeId);
   ndbrequire(nodeId != c_start.m_waiting.NotFound);
 
   NodeState::StartType nrType = NodeState::ST_NODE_RESTART;
-  if(c_start.m_withoutLog.get(nodeId)){
+  if(c_start.m_withoutLog.get(nodeId))
+  {
     jam();
     nrType = NodeState::ST_INITIAL_NODE_RESTART;
   }
@@ -1066,13 +1105,52 @@ void Ndbcntr::waitpoint41Lab(Signal* sig
 /* RECEIVE A WAIT REPORT FROM THE MASTER*/
 /*--------------------------------------*/
     signal->theData[0] = getOwnNodeId();
-    signal->theData[1] = ZWAITPOINT_4_1;
+    signal->theData[1] = CntrWaitRep::ZWAITPOINT_4_1;
     sendSignal(calcNdbCntrBlockRef(cmasterNodeId), 
 	       GSN_CNTR_WAITREP, signal, 2, JBB);
   }//if
   return;
 }//Ndbcntr::waitpoint41Lab()
 
+void
+Ndbcntr::waitpoint42To(Signal* signal)
+{
+  jam();
+  
+  /**
+   * This is a ugly hack
+   * To "easy" enable TO during SR
+   *   a better solution would be to move "all" start handling 
+   *   from DIH to cntr...which knows what's going on
+   */
+  cdihStartType = NodeState::ST_SYSTEM_RESTART;
+  ctypeOfStart = NodeState::ST_NODE_RESTART;
+
+  /**
+   * We were forced to perform TO
+   */
+  StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
+  req->senderRef = reference();
+  req->senderData = RNIL;
+  req->flags = StartCopyReq::WAIT_LCP;
+  req->startingNodeId = getOwnNodeId();
+  sendSignal(DBDIH_REF, GSN_START_COPYREQ, signal, 
+             StartCopyReq::SignalLength, JBB);
+}
+
+void
+Ndbcntr::execSTART_COPYREF(Signal* signal)
+{
+
+}
+
+void
+Ndbcntr::execSTART_COPYCONF(Signal* signal)
+{
+  sendSttorry(signal);  
+}
+
+
 /*******************************/
 /*  NDB_STARTCONF              */
 /*******************************/
@@ -1080,9 +1158,35 @@ void Ndbcntr::execNDB_STARTCONF(Signal* 
 {
   jamEntry();
 
+  NdbNodeBitmask tmp;
+  if (signal->getLength() >= 1 + NdbNodeBitmask::Size)
+  {
+    jam();
+    tmp.assign(NdbNodeBitmask::Size, signal->theData+1);
+    if (!c_start.m_starting.equal(tmp))
+    {
+      /**
+       * Some nodes has been "excluded" from SR
+       */
+      char buf0[100], buf1[100];
+      ndbout_c("execNDB_STARTCONF: changing from %s to %s",
+               c_start.m_starting.getText(buf0),
+               tmp.getText(buf1));
+      
+      NdbNodeBitmask waiting = c_start.m_starting;
+      waiting.bitANDC(tmp);
+
+      c_start.m_waiting.bitOR(waiting);
+      c_start.m_waitTO.bitOR(waiting);
+      
+      c_start.m_starting.assign(tmp);
+      cnoStartNodes = c_start.m_starting.count();
+    }
+  }
+
   NodeReceiverGroup rg(NDBCNTR, c_start.m_starting);
   signal->theData[0] = getOwnNodeId();
-  signal->theData[1] = ZWAITPOINT_4_2;
+  signal->theData[1] = CntrWaitRep::ZWAITPOINT_4_2;
   sendSignal(rg, GSN_CNTR_WAITREP, signal, 2, JBB);
   return;
 }//Ndbcntr::execNDB_STARTCONF()
@@ -1164,7 +1268,7 @@ void Ndbcntr::ph5ALab(Signal* signal) 
     req->senderRef = reference();
     req->nodeId = getOwnNodeId();
     req->internalStartPhase = cinternalStartphase;
-    req->typeOfStart = ctypeOfStart;
+    req->typeOfStart = cdihStartType;
     req->masterNodeId = cmasterNodeId;
     
     //#define TRACE_STTOR
@@ -1186,7 +1290,7 @@ void Ndbcntr::ph5ALab(Signal* signal) 
     /* WHEN THE MASTER HAS FINISHED HIS WORK*/
     /*--------------------------------------*/
     signal->theData[0] = getOwnNodeId();
-    signal->theData[1] = ZWAITPOINT_5_2;
+    signal->theData[1] = CntrWaitRep::ZWAITPOINT_5_2;
     sendSignal(calcNdbCntrBlockRef(cmasterNodeId), 
 	       GSN_CNTR_WAITREP, signal, 2, JBB);
     return;
@@ -1218,7 +1322,7 @@ void Ndbcntr::waitpoint52Lab(Signal* sig
     req->senderRef = reference();
     req->nodeId = getOwnNodeId();
     req->internalStartPhase = cinternalStartphase;
-    req->typeOfStart = ctypeOfStart;
+    req->typeOfStart = cdihStartType;
     req->masterNodeId = cmasterNodeId;
 #ifdef TRACE_STTOR
     ndbout_c("sending NDB_STTOR(%d) to DIH", cinternalStartphase);
@@ -1244,7 +1348,7 @@ void Ndbcntr::ph6ALab(Signal* signal) 
   NodeReceiverGroup rg(NDBCNTR, c_start.m_starting);
   rg.m_nodes.clear(getOwnNodeId());
   signal->theData[0] = getOwnNodeId();
-  signal->theData[1] = ZWAITPOINT_5_1;
+  signal->theData[1] = CntrWaitRep::ZWAITPOINT_5_1;
   sendSignal(rg, GSN_CNTR_WAITREP, signal, 2, JBB);
 
   waitpoint51Lab(signal);
@@ -1293,14 +1397,14 @@ void Ndbcntr::waitpoint61Lab(Signal* sig
       NodeReceiverGroup rg(NDBCNTR, c_start.m_starting);
       rg.m_nodes.clear(getOwnNodeId());
       signal->theData[0] = getOwnNodeId();
-      signal->theData[1] = ZWAITPOINT_6_2;
+      signal->theData[1] = CntrWaitRep::ZWAITPOINT_6_2;
       sendSignal(rg, GSN_CNTR_WAITREP, signal, 2, JBB);
       sendSttorry(signal);
     }
   } else {
     jam();
     signal->theData[0] = getOwnNodeId();
-    signal->theData[1] = ZWAITPOINT_6_1;
+    signal->theData[1] = CntrWaitRep::ZWAITPOINT_6_1;
     sendSignal(calcNdbCntrBlockRef(cmasterNodeId), GSN_CNTR_WAITREP, signal, 2, JBB);
   }
 }
@@ -1339,14 +1443,14 @@ void Ndbcntr::waitpoint71Lab(Signal* sig
       NodeReceiverGroup rg(NDBCNTR, c_start.m_starting);
       rg.m_nodes.clear(getOwnNodeId());
       signal->theData[0] = getOwnNodeId();
-      signal->theData[1] = ZWAITPOINT_7_2;
+      signal->theData[1] = CntrWaitRep::ZWAITPOINT_7_2;
       sendSignal(rg, GSN_CNTR_WAITREP, signal, 2, JBB);
       sendSttorry(signal);
     }
   } else {
     jam();
     signal->theData[0] = getOwnNodeId();
-    signal->theData[1] = ZWAITPOINT_7_1;
+    signal->theData[1] = CntrWaitRep::ZWAITPOINT_7_1;
     sendSignal(calcNdbCntrBlockRef(cmasterNodeId), GSN_CNTR_WAITREP, signal, 2, JBB);
   }
 }
@@ -1375,43 +1479,47 @@ void Ndbcntr::ph8ALab(Signal* signal)
 /*******************************/
 void Ndbcntr::execCNTR_WAITREP(Signal* signal) 
 {
-  Uint16 twaitPoint;
-
   jamEntry();
-  twaitPoint = signal->theData[1];
+  CntrWaitRep* rep = (CntrWaitRep*)signal->getDataPtr();
+
+  Uint32 twaitPoint = rep->waitPoint;
   switch (twaitPoint) {
-  case ZWAITPOINT_4_1:
+  case CntrWaitRep::ZWAITPOINT_4_1:
     jam();
     waitpoint41Lab(signal);
     break;
-  case ZWAITPOINT_4_2:
+  case CntrWaitRep::ZWAITPOINT_4_2:
     jam();
     sendSttorry(signal);
     break;
-  case ZWAITPOINT_5_1:
+  case CntrWaitRep::ZWAITPOINT_5_1:
     jam();
     waitpoint51Lab(signal);
     break;
-  case ZWAITPOINT_5_2:
+  case CntrWaitRep::ZWAITPOINT_5_2:
     jam();
     waitpoint52Lab(signal);
     break;
-  case ZWAITPOINT_6_1:
+  case CntrWaitRep::ZWAITPOINT_6_1:
     jam();
     waitpoint61Lab(signal);
     break;
-  case ZWAITPOINT_6_2:
+  case CntrWaitRep::ZWAITPOINT_6_2:
     jam();
     sendSttorry(signal);
     break;
-  case ZWAITPOINT_7_1:
+  case CntrWaitRep::ZWAITPOINT_7_1:
     jam();
     waitpoint71Lab(signal);
     break;
-  case ZWAITPOINT_7_2:
+  case CntrWaitRep::ZWAITPOINT_7_2:
     jam();
     sendSttorry(signal);
     break;
+  case CntrWaitRep::ZWAITPOINT_4_2_TO:
+    jam();
+    waitpoint42To(signal);
+    break;
   default:
     jam();
     systemErrorLab(signal, __LINE__);
@@ -1464,6 +1572,7 @@ void Ndbcntr::execNODE_FAILREP(Signal* s
   c_start.m_waiting.bitANDC(allFailed);
   c_start.m_withLog.bitANDC(allFailed);
   c_start.m_withoutLog.bitANDC(allFailed);
+  c_start.m_waitTO.bitANDC(allFailed);
   c_clusterNodes.bitANDC(allFailed);
   c_startedNodes.bitANDC(allFailed);
 
@@ -2022,6 +2131,8 @@ void Ndbcntr::sendNdbSttor(Signal* signa
 	   cinternalStartphase, 
 	   getBlockName( refToBlock(ndbBlocksPtr.p->blockref)));
 #endif
+  if (refToBlock(ndbBlocksPtr.p->blockref) == DBDIH)
+    req->typeOfStart = cdihStartType;
   sendSignal(ndbBlocksPtr.p->blockref, GSN_NDB_STTOR, signal, 22, JBB);
   cndbBlocksCount++;
 }//Ndbcntr::sendNdbSttor()
@@ -2925,6 +3036,8 @@ void Ndbcntr::Missra::sendNextSTTOR(Sign
 		 ref,
 		 currentBlockIndex);
 #endif
+        if (refToBlock(ref) == DBDIH)
+          signal->theData[7] = cntr.cdihStartType;
 	
 	cntr.sendSignal(ref, GSN_STTOR, signal, 8, JBB);
 	
diff -Nrup a/storage/ndb/src/kernel/blocks/suma/Suma.cpp
b/storage/ndb/src/kernel/blocks/suma/Suma.cpp
--- a/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2007-11-14 13:22:53 +01:00
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.cpp	2007-11-30 12:00:29 +01:00
@@ -241,46 +241,38 @@ Suma::execSTTOR(Signal* signal) {
   jamEntry();                            
 
   DBUG_ENTER("Suma::execSTTOR");
-  const Uint32 startphase  = signal->theData[1];
-  const Uint32 typeOfStart = signal->theData[7];
+  m_startphase  = signal->theData[1];
+  m_typeOfStart = signal->theData[7];
 
   DBUG_PRINT("info",("startphase = %u, typeOfStart = %u",
-		     startphase, typeOfStart));
+		     m_startphase, m_typeOfStart));
 
-  if(startphase == 3)
+  if(m_startphase == 3)
   {
     jam();
     ndbrequire((m_tup = (Dbtup*)globalData.getBlock(DBTUP)) != 0);
-    signal->theData[0] = reference();
-    sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
-    DBUG_VOID_RETURN;
   }
 
-  if(startphase == 5)
+  if(m_startphase == 5)
   {
+    jam();
+
     if (ERROR_INSERTED(13029)) /* Hold startphase 5 */
     {
       sendSignalWithDelay(SUMA_REF, GSN_STTOR, signal,
                           30, signal->getLength());
       DBUG_VOID_RETURN;
     }
-
-    c_startup.m_restart_server_node_id = 0;    
-    getNodeGroupMembers(signal);
-    if (typeOfStart == NodeState::ST_NODE_RESTART ||
-	typeOfStart == NodeState::ST_INITIAL_NODE_RESTART)
-    {
-      jam();
-      
-      send_start_me_req(signal);
-      return;
-    }
+    
+    signal->theData[0] = reference();
+    sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
+    DBUG_VOID_RETURN;
   }
   
-  if(startphase == 7)
+  if(m_startphase == 7)
   {
-    if (typeOfStart != NodeState::ST_NODE_RESTART &&
-	typeOfStart != NodeState::ST_INITIAL_NODE_RESTART)
+    if (m_typeOfStart != NodeState::ST_NODE_RESTART &&
+	m_typeOfStart != NodeState::ST_INITIAL_NODE_RESTART)
     {
       for( Uint32 i = 0; i < c_no_of_buckets; i++)
       {
@@ -310,7 +302,7 @@ Suma::execSTTOR(Signal* signal) {
     else
       m_gcp_complete_rep_count = 0; // I contribute 1 gcp complete rep
     
-    if(typeOfStart == NodeState::ST_INITIAL_START &&
+    if(m_typeOfStart == NodeState::ST_INITIAL_START &&
        c_masterNodeId == getOwnNodeId())
     {
       jam();
@@ -325,7 +317,7 @@ Suma::execSTTOR(Signal* signal) {
     }
   }//if
   
-  if(startphase == 100)
+  if(m_startphase == 100)
   {
     /**
      * Allow API's to connect
@@ -334,10 +326,10 @@ Suma::execSTTOR(Signal* signal) {
     return;
   }
 
-  if(startphase == 101)
+  if(m_startphase == 101)
   {
-    if (typeOfStart == NodeState::ST_NODE_RESTART ||
-	typeOfStart == NodeState::ST_INITIAL_NODE_RESTART)
+    if (m_typeOfStart == NodeState::ST_NODE_RESTART ||
+	m_typeOfStart == NodeState::ST_INITIAL_NODE_RESTART)
     {
       /**
        * Handover code here
@@ -463,6 +455,17 @@ Suma::execREAD_NODESCONF(Signal* signal)
   
   c_masterNodeId = conf->masterNodeId;
   
+  c_startup.m_restart_server_node_id = 0;    
+  getNodeGroupMembers(signal);
+  if (m_typeOfStart == NodeState::ST_NODE_RESTART ||
+      m_typeOfStart == NodeState::ST_INITIAL_NODE_RESTART)
+  {
+    jam();
+    
+    send_start_me_req(signal);
+    return;
+  }
+
   sendSTTORRY(signal);
 }
 
@@ -4580,6 +4583,8 @@ Suma::execSUMA_HANDOVER_REQ(Signal* sign
   Uint32 start_gci = (gci > new_gci ? gci : new_gci);
   // mark all active buckets really belonging to restarting SUMA
 
+  c_alive_nodes.set(nodeId);
+  
   Bucket_mask tmp;
   for( Uint32 i = 0; i < c_no_of_buckets; i++) 
   {
diff -Nrup a/storage/ndb/src/kernel/blocks/suma/Suma.hpp
b/storage/ndb/src/kernel/blocks/suma/Suma.hpp
--- a/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2007-09-05 15:19:57 +02:00
+++ b/storage/ndb/src/kernel/blocks/suma/Suma.hpp	2007-11-30 12:00:29 +01:00
@@ -638,6 +638,8 @@ private:
   Uint64 m_gcp_monitor;
 #endif
 
+  Uint32 m_startphase;
+  Uint32 m_typeOfStart;
 };
 
 #endif
Thread
bk commit into 5.1 tree (jonas:1.2696)jonas30 Nov