MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:tomas Date:April 4 2006 9:30am
Subject:bk commit into 5.0 tree (tomas:1.2135)
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2135 06/04/04 11:30:40 tomas@stripped +1 -0
  Merge tulin@stripped:/home/bk/mysql-5.0
  into  poseidon.ndb.mysql.com:/home/tomas/mysql-5.0

  ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
    1.91 06/04/04 11:30:30 tomas@stripped +0 -0
    Auto merged

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	poseidon.ndb.mysql.com
# Root:	/home/tomas/mysql-5.0/RESYNC

--- 1.90/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-02-13 15:47:37 +01:00
+++ 1.91/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-04-04 11:30:30 +02:00
@@ -266,6 +266,10 @@
     jam();
     checkScanActiveInFailedLqh(signal, Tdata0, Tdata1);
     return;
+  case TcContinueB::ZNF_CHECK_TRANSACTIONS:
+    jam();
+    nodeFailCheckTransactions(signal, Tdata0, Tdata1);
+    return;
   case TcContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:
     jam();
     checkWaitDropTabFailedLqh(signal, Tdata0, Tdata1);
@@ -303,8 +307,8 @@
   hostptr.i = signal->theData[1];
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
   hostptr.p->hostStatus = HS_ALIVE;
-  hostptr.p->takeOverStatus = TOS_IDLE;
   signal->theData[0] = cownref;
+  c_alive_nodes.set(hostptr.i);
   sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
 }
 
@@ -503,6 +507,7 @@
      * Finished
      */
     jam();
+    checkNodeFailComplete(signal, nodeId, HostRecord::NF_CHECK_DROP_TAB);
     return;
   }
   
@@ -868,8 +873,6 @@
       hostptr.i = i;
       ptrCheckGuard(hostptr, chostFilesize, hostRecord);
 
-      hostptr.p->takeOverStatus = TOS_IDLE;
-      
       if (NodeBitmask::get(readNodes->inactiveNodes, i)) {
         jam();
         hostptr.p->hostStatus = HS_DEAD;
@@ -877,6 +880,7 @@
         jam();
         con_lineNodes++;
         hostptr.p->hostStatus = HS_ALIVE;
+	c_alive_nodes.set(i);
       }//if
     }//if
   }//for
@@ -2378,6 +2382,7 @@
   regApiPtr->commitAckMarker = RNIL;
   regApiPtr->buddyPtr = RNIL;
   regApiPtr->currSavePointId = 0;
+  regApiPtr->m_transaction_nodes.clear();
   // Trigger data
   releaseFiredTriggerData(&regApiPtr->theFiredTriggers),
   // Index data
@@ -2986,6 +2991,10 @@
   signal->theData[0] = TdihConnectptr;
   signal->theData[1] = Ttableref;
   signal->theData[2] = TdistrHashValue;
+  signal->theData[3] = 0;
+  signal->theData[4] = 0;
+  signal->theData[5] = 0;
+  signal->theData[6] = 0;
 
   /*-------------------------------------------------------------*/
   /* FOR EFFICIENCY REASONS WE AVOID THE SIGNAL SENDING HERE AND */
@@ -3165,6 +3174,7 @@
   TcConnectRecord * const regTcPtr = tcConnectptr.p;
   ApiConnectRecord * const regApiPtr = apiConnectptr.p;
   CacheRecord * const regCachePtr = cachePtr.p;
+  UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
 #ifdef ERROR_INSERT
   if (ERROR_INSERTED(8002)) {
     systemErrorLab(signal, __LINE__);
@@ -3202,6 +3212,9 @@
   LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd);
 
   Tdata10 = 0;
+  sig0 = regCachePtr->opSimple;
+  sig1 = regTcPtr->operation;
+  bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE);
   LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen);
   LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo);
   LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock);
@@ -3211,8 +3224,8 @@
   LqhKeyReq::setApplicationAddressFlag(Tdata10, 1);
   LqhKeyReq::setDirtyFlag(Tdata10, regTcPtr->dirtyOp);
   LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec);
-  LqhKeyReq::setSimpleFlag(Tdata10, regCachePtr->opSimple);
-  LqhKeyReq::setOperation(Tdata10, regTcPtr->operation);
+  LqhKeyReq::setSimpleFlag(Tdata10, sig0);
+  LqhKeyReq::setOperation(Tdata10, sig1);
   /* ----------------------------------------------------------------------- 
    * Sequential Number of first LQH = 0, bit 22-23                           
    * IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ,
@@ -3225,18 +3238,16 @@
    * ----------------------------------------------------------------------- */
   //LqhKeyReq::setAPIVersion(Tdata10, regCachePtr->apiVersionNo);
   Uint32 commitAckMarker = regTcPtr->commitAckMarker;
+  const Uint32 noOfLqhs = regTcPtr->noOfNodes;
   if(commitAckMarker != RNIL){
     jam();
-    
     LqhKeyReq::setMarkerFlag(Tdata10, 1);
 
-    CommitAckMarker * tmp;
-    tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
+    CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
     
     /**
      * Populate LQH array
      */
-    const Uint32 noOfLqhs = regTcPtr->noOfNodes;
     tmp->noOfLqhs = noOfLqhs;
     for(Uint32 i = 0; i<noOfLqhs; i++){
       tmp->lqhNodeId[i] = regTcPtr->tcNodedata[i];
@@ -3247,7 +3258,6 @@
   /* NO READ LENGTH SENT FROM TC. SEQUENTIAL NUMBER IS 1 AND IT    */
   /* IS SENT TO A PRIMARY NODE.                                    */
   /* ************************************************************> */
-  UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
 
   LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtrSend();
 
@@ -3271,6 +3281,14 @@
   sig5 = regTcPtr->clientData;
   sig6 = regCachePtr->scanInfo;
 
+  if (! simpleRead)
+  {
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[0]);
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[1]);
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[2]);
+    regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[3]);  
+  }
+  
   lqhKeyReq->tableSchemaVersion = sig0;
   lqhKeyReq->fragmentData = sig1;
   lqhKeyReq->transId1 = sig2;
@@ -4655,6 +4673,7 @@
   UintR TgcpPointer = regTmpApiPtr->gcpPointer;
   UintR TgcpFilesize = cgcpFilesize;
   UintR TcommitAckMarker = regTmpApiPtr->commitAckMarker;
+  NdbNodeBitmask Tnodes = regTmpApiPtr->m_transaction_nodes;
   GcpRecord *localGcpRecord = gcpRecord;
 
   regApiPtr->ndbapiBlockref = regTmpApiPtr->ndbapiBlockref;
@@ -4665,6 +4684,7 @@
   regApiPtr->transid[1] = Ttransid2;
   regApiPtr->lqhkeyconfrec = Tlqhkeyconfrec;
   regApiPtr->commitAckMarker = TcommitAckMarker;
+  regApiPtr->m_transaction_nodes = Tnodes;
 
   gcpPtr.i = TgcpPointer;
   ptrCheckGuard(gcpPtr, TgcpFilesize, localGcpRecord);
@@ -4675,6 +4695,7 @@
   regTmpApiPtr->commitAckMarker = RNIL;
   regTmpApiPtr->firstTcConnect = RNIL;
   regTmpApiPtr->lastTcConnect = RNIL;
+  regTmpApiPtr->m_transaction_nodes.clear();
   releaseAllSeizedIndexOperations(regTmpApiPtr);
 }//Dbtc::copyApi()
 
@@ -4933,7 +4954,7 @@
   TcConnectRecordPtr localTcConnectptr;
   UintR TtcConnectFilesize = ctcConnectFilesize;
   TcConnectRecord *localTcConnectRecord = tcConnectRecord;
-
+  apiConnectptr.p->m_transaction_nodes.clear();
   localTcConnectptr.i = apiConnectptr.p->firstTcConnect;
   do {
     jam();
@@ -5338,7 +5359,8 @@
       break;
     case CS_ABORTING:
       jam();
-      errorCode = ZABORTINPROGRESS;
+      errorCode = regApiPtr->returncode ? 
+	regApiPtr->returncode : ZABORTINPROGRESS;
       break;
     case CS_START_SCAN:
       jam();
@@ -5877,9 +5899,9 @@
 
   if (transP->firstTcConnect == RNIL) {
     jam();
-    /*-----------------------------------------------------------------------*/
-    /*    WE HAVE NO PARTICIPANTS IN THE TRANSACTION.                        */
-    /*-----------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------*/
+    /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION.                        */
+    /*--------------------------------------------------------------------*/
     releaseAbortResources(signal);
     return;
   }//if
@@ -6156,10 +6178,12 @@
     if (api_timer != 0) {
       time_out_value= time_out_param + (api_con_ptr & mask_value);
       time_passed= tc_timer - api_timer;
-      if (time_passed > time_out_value) {
+      if (time_passed > time_out_value) 
+      {
         jam();
-        timeOutFoundLab(signal, api_con_ptr);
-        return;
+        timeOutFoundLab(signal, api_con_ptr, ZTIME_OUT_ERROR);
+	api_con_ptr++;
+	break;
       }
     }
   }
@@ -6179,10 +6203,8 @@
   return;
 }//Dbtc::timeOutLoopStartLab()
 
-void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr) 
+void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode) 
 {
-  sendContinueTimeOutControl(signal, TapiConPtr + 1);
-  
   apiConnectptr.i = TapiConPtr;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
   /*------------------------------------------------------------------*/
@@ -6195,7 +6217,8 @@
 	<< "Time-out in state = " << apiConnectptr.p->apiConnectstate
 	<< " apiConnectptr.i = " << apiConnectptr.i 
 	<< " - exec: " << apiConnectptr.p->m_exec_flag
-	<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
+	<< " - place: " << c_apiConTimer_line[apiConnectptr.i]
+	<< " code: " << errCode);
   switch (apiConnectptr.p->apiConnectstate) {
   case CS_STARTED:
     if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
@@ -6212,7 +6235,7 @@
       }//if
     }
     apiConnectptr.p->returnsignal = RS_TCROLLBACKREP;      
-    apiConnectptr.p->returncode = ZTIME_OUT_ERROR;
+    apiConnectptr.p->returncode = errCode;
     abort010Lab(signal);
     return;
   case CS_RECEIVING:
@@ -6225,7 +6248,7 @@
     /*       START ABORTING THE TRANSACTION. ALSO START CHECKING THE    */
     /*       REMAINING TRANSACTIONS.                                    */
     /*------------------------------------------------------------------*/
-    terrorCode = ZTIME_OUT_ERROR;
+    terrorCode = errCode;
     abortErrorLab(signal);
     return;
   case CS_COMMITTING:
@@ -6432,6 +6455,7 @@
     return;
   }
   
+  bool found = false;
   OperationState tmp[16];
   
   Uint32 TloopCount = 0;
@@ -6439,7 +6463,31 @@
     jam();
     if (tcConnectptr.i == RNIL) {
       jam();
-      if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+      ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+	       found, Tcheck, apiConnectptr.p->counter);
+#endif
+      if (found || apiConnectptr.p->counter)
+      {
+	jam();
+	/**
+	 * We sent atleast one ABORT/ABORTED
+	 *   or ZABORT_TIMEOUT_BREAK is in job buffer
+	 *   wait for reception...
+	 */
+	return;
+      }
+      
+      if (Tcheck == 1)
+      {
+	jam();
+	releaseAbortResources(signal);
+	return;
+      }
+      
+      if (Tcheck == 0)
+      {
         jam();
 	/*------------------------------------------------------------------
 	 * All nodes had already reported ABORTED for all tcConnect records.
@@ -6448,9 +6496,11 @@
 	 *------------------------------------------------------------------*/
 	char buf[96]; buf[0] = 0;
 	char buf2[96];
-	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
-		 __LINE__, apiConnectptr.i);
-	for(Uint32 i = 0; i<TloopCount; i++){
+	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+			     __LINE__, apiConnectptr.i,
+			     apiConnectptr.p->counter);
+	for(Uint32 i = 0; i<TloopCount; i++)
+	{
 	  BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
 	  BaseString::snprintf(buf, sizeof(buf), buf2);
 	}
@@ -6458,7 +6508,9 @@
 	ndbout_c(buf);
 	ndbrequire(false);
 	releaseAbortResources(signal);
+	return;
       }
+      
       return;
     }//if
     TloopCount++;
@@ -6473,7 +6525,16 @@
       signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
       signal->theData[1] = tcConnectptr.i;
       signal->theData[2] = apiConnectptr.i;      
-      sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+      if (ERROR_INSERTED(8050))
+      {
+	ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", 
+		 Tcheck, apiConnectptr.p->counter);
+	sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+      }
+      else
+      {
+	sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+      }
       return;
     }//if
     ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6496,7 +6557,7 @@
         jam();
         if (tcConnectptr.p->tcNodedata[Ti] != 0) {
           TloopCount += 31;
-          Tcheck = 1;
+	  found = true;
           hostptr.i = tcConnectptr.p->tcNodedata[Ti];
           ptrCheckGuard(hostptr, chostFilesize, hostRecord);
           if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -6869,58 +6930,44 @@
   const Uint32 tnewMasterId = nodeFail->masterNodeId;
   
   arrGuard(tnoOfNodes, MAX_NDB_NODES);
+  Uint32 i;
   int index = 0;
-  for (unsigned i = 1; i< MAX_NDB_NODES; i++) {
-    if(NodeBitmask::get(nodeFail->theNodes, i)){
+  for (i = 1; i< MAX_NDB_NODES; i++) 
+  {
+    if(NodeBitmask::get(nodeFail->theNodes, i))
+    {
       cdata[index] = i;
       index++;
     }//if
   }//for
 
+  cmasterNodeId = tnewMasterId;
+  
   tcNodeFailptr.i = 0;
   ptrAss(tcNodeFailptr, tcFailRecord);
-  Uint32 tindex;
-  for (tindex = 0; tindex < tnoOfNodes; tindex++) {
+  for (i = 0; i < tnoOfNodes; i++) 
+  {
     jam();
-    hostptr.i = cdata[tindex];
+    hostptr.i = cdata[i];
     ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+    
     /*------------------------------------------------------------*/
     /*       SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS   */
     /*       FAILED.                                              */
     /*------------------------------------------------------------*/
     hostptr.p->hostStatus = HS_DEAD;
+    hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
+    c_alive_nodes.clear(hostptr.i);
 
-    if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
-      jam();
-      /*------------------------------------------------------------*/
-      /*       A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/
-      /*       EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT.  */
-      /*       HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE.          */
-      /*------------------------------------------------------------*/
-      /*       RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE    */
-      /*       REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
-      /*       USED THEM IS COMPLETED.                              */
-      /*------------------------------------------------------------*/
-      {
-	NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
-	nfRep->blockNo      = DBTC;
-	nfRep->nodeId       = cownNodeid;
-	nfRep->failedNodeId = hostptr.i;
-      }
-      sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, 
-		 NFCompleteRep::SignalLength, JBB);
-    } else {
-      ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
-      hostptr.p->takeOverStatus = TOS_NODE_FAILED;
-    }//if
-    
-    if (tcNodeFailptr.p->failStatus == FS_LISTENING) {
+    if (tcNodeFailptr.p->failStatus == FS_LISTENING) 
+    {
       jam();
       /*------------------------------------------------------------*/
       /*       THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE   */
       /*       FAILURE.                                             */
       /*------------------------------------------------------------*/
-      if (hostptr.p->lqhTransStatus == LTS_ACTIVE) {
+      if (hostptr.p->lqhTransStatus == LTS_ACTIVE) 
+      {
 	jam();
 	/*------------------------------------------------------------*/
 	/*       WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */
@@ -6932,86 +6979,46 @@
       }//if
     }//if
     
-  }//for
-
-  const bool masterFailed = (cmasterNodeId != tnewMasterId);
-  cmasterNodeId = tnewMasterId;
-
-  if(getOwnNodeId() == cmasterNodeId && masterFailed){
-    /**
-     * Master has failed and I'm the new master
-     */
-    jam();
-    
-    for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+    if (getOwnNodeId() != tnewMasterId)
+    {
       jam();
-      ptrAss(hostptr, hostRecord);
-      if (hostptr.p->hostStatus != HS_ALIVE) {
-	jam();
-	if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
-	  jam();
-	  /*------------------------------------------------------------*/
-	  /*       SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF    */
-	  /*       TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE  */
-	  /*       THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE   */
-	  /*       OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF  */
-	  /*       MASTER FAILS AFTER SENDING CONFIRMATION TO NEW       */
-	  /*       MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE    */
-	  /*       WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES     */
-	  /*       MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
-	  /*       CRASHED NODE HAVE ALREADY RECOVERED.                 */
-	  /*------------------------------------------------------------*/
-	  for(tmpHostptr.i = 1; tmpHostptr.i < MAX_NDB_NODES;tmpHostptr.i++) {
-	    jam();
-	    ptrAss(tmpHostptr, hostRecord);
-	    if (tmpHostptr.p->hostStatus == HS_ALIVE) {
-	      jam();
-	      tblockref = calcTcBlockRef(tmpHostptr.i);
-	      signal->theData[0] = hostptr.i;
-	      sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
-	    }//if
-	  }//for
-	}//if
-      }//if
-    }//for
-  }
-
-  if(getOwnNodeId() == cmasterNodeId){
-    jam();
-    for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+      /**
+       * Only master does takeover currently
+       */
+      hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
+    }
+    else
+    {
       jam();
-      ptrAss(hostptr, hostRecord);
-      if (hostptr.p->hostStatus != HS_ALIVE) {
-        jam();
-        if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) {
-          jam();
-	  /*------------------------------------------------------------*/
-	  /*       CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL    */
-	  /*       SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/
-	  /*       BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/
-	  /*------------------------------------------------------------*/
-          hostptr.p->takeOverStatus = TOS_ACTIVE;
-          signal->theData[0] = hostptr.i;
-          sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
-        }//if
-      }//if
-    }//for
-  }//if
-  for (tindex = 0; tindex < tnoOfNodes; tindex++) {
-    jam();
-    hostptr.i = cdata[tindex];
-    ptrCheckGuard(hostptr, chostFilesize, hostRecord);
-    /*------------------------------------------------------------*/
-    /*       LOOP THROUGH AND ABORT ALL SCANS THAT WHERE          */
-    /*       CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED       */
-    /*       NODE'S LQH                                           */
-    /*------------------------------------------------------------*/
+      signal->theData[0] = hostptr.i;
+      sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
+    }
+
     checkScanActiveInFailedLqh(signal, 0, hostptr.i);
     checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
-  }//for
-
+    nodeFailCheckTransactions(signal, 0, hostptr.i);
+  }
 }//Dbtc::execNODE_FAILREP()
 
+void
+Dbtc::checkNodeFailComplete(Signal* signal, 
+			    Uint32 failedNodeId,
+			    Uint32 bit)
+{
+  hostptr.i = failedNodeId;
+  ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+  hostptr.p->m_nf_bits &= ~bit;
+  if (hostptr.p->m_nf_bits == 0)
+  {
+    NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
+    nfRep->blockNo      = DBTC;
+    nfRep->nodeId       = cownNodeid;
+    nfRep->failedNodeId = hostptr.i;
+    sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, 
+	       NFCompleteRep::SignalLength, JBB);
+  }
+}
+
 void Dbtc::checkScanActiveInFailedLqh(Signal* signal, 
 				      Uint32 scanPtrI, 
 				      Uint32 failedNodeId){
@@ -7053,8 +7060,44 @@
     sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
     return;
   }//for
+
+  checkNodeFailComplete(signal, failedNodeId, HostRecord::NF_CHECK_SCAN);
+}
+
+void
+Dbtc::nodeFailCheckTransactions(Signal* signal, 
+				Uint32 transPtrI, 
+				Uint32 failedNodeId)
+{
+  jam();
+  Ptr<ApiConnectRecord> transPtr;
+  for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
+  {
+    ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); 
+    if (transPtr.p->m_transaction_nodes.get(failedNodeId))
+    {
+      jam();
+      // Force timeout regardless of state      
+      Uint32 save = c_appl_timeout_value;
+      c_appl_timeout_value = 1;
+      setApiConTimer(transPtr.i, 0, __LINE__);
+      timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
+      c_appl_timeout_value = save;
+    }
+    
+    // Send CONTINUEB to continue later
+    signal->theData[0] = TcContinueB::ZNF_CHECK_TRANSACTIONS;
+    signal->theData[1] = transPtr.i + 1; // Check next
+    signal->theData[2] = failedNodeId;
+    sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+    return;
+  }
+
+  checkNodeFailComplete(signal, failedNodeId, 
+			HostRecord::NF_CHECK_TRANSACTION);
 }
 
+
 void
 Dbtc::checkScanFragList(Signal* signal,
 			Uint32 failedNodeId,
@@ -7070,54 +7113,14 @@
   tfailedNodeId = signal->theData[0];
   hostptr.i = tfailedNodeId;
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
-  switch (hostptr.p->takeOverStatus) {
-  case TOS_IDLE:
-    jam();
-    /*------------------------------------------------------------*/
-    /*       THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP    */
-    /*       MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS.     */
-    /*       WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT     */
-    /*       FOR THE NODE_FAILREP MESSAGE.                        */
-    /*------------------------------------------------------------*/
-    hostptr.p->takeOverStatus = TOS_COMPLETED;
-    break;
-  case TOS_NODE_FAILED:
-  case TOS_ACTIVE:
-    jam();
-    /*------------------------------------------------------------*/
-    /*       WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE  */
-    /*       ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH      */
-    /*       WE SET THE STATE TO TAKE_OVER_COMPLETED.             */
-    /*------------------------------------------------------------*/
-    /*       RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE    */
-    /*       REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
-    /*       USED THEM IS COMPLETED.                              */
-    /*------------------------------------------------------------*/
-    hostptr.p->takeOverStatus = TOS_COMPLETED;
-    {
-      NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
-      nfRep->blockNo      = DBTC;
-      nfRep->nodeId       = cownNodeid;
-      nfRep->failedNodeId = hostptr.i;
-    }
-    sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal, 
-               NFCompleteRep::SignalLength, JBB);
-    break;
-  case TOS_COMPLETED:
-    jam();
-    /*------------------------------------------------------------*/
-    /*       WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */
-    /*       LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF   */
-    /*       THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */
-    /*       WE SIMPLY IGNORE THE MESSAGE.                        */
-    /*------------------------------------------------------------*/
-    /*empty*/;
-    break;
-  default:
+
+  if (signal->getSendersBlockRef() != reference())
+  {
     jam();
-    systemErrorLab(signal, __LINE__);
     return;
-  }//switch
+  }
+  
+  checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
 }//Dbtc::execTAKE_OVERTCCONF()
 
 void Dbtc::execTAKE_OVERTCREQ(Signal* signal) 
@@ -7357,16 +7360,10 @@
     /*       TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL     */
     /*       NODES THAT ARE ALIVE.                                */
     /*------------------------------------------------------------*/
-    for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
-      jam();
-      ptrAss(hostptr, hostRecord);
-      if (hostptr.p->hostStatus == HS_ALIVE) {
-        jam();
-        tblockref = calcTcBlockRef(hostptr.i);
-        signal->theData[0] = tcNodeFailptr.p->takeOverNode;
-        sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
-      }//if
-    }//for
+    NodeReceiverGroup rg(DBTC, c_alive_nodes);
+    signal->theData[0] = tcNodeFailptr.p->takeOverNode;
+    sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+    
     if (tcNodeFailptr.p->queueIndex > 0) {
       jam();
       /*------------------------------------------------------------*/
@@ -8048,6 +8045,7 @@
   apiConnectptr.p->ndbapiBlockref = 0;
   apiConnectptr.p->ndbapiConnect = 0;
   apiConnectptr.p->buddyPtr = RNIL;
+  apiConnectptr.p->m_transaction_nodes.clear();
   setApiConTimer(apiConnectptr.i, 0, __LINE__);
   switch(ttransStatus){
   case LqhTransConf::Committed:
@@ -9875,6 +9873,7 @@
     apiConnectptr.p->executingIndexOp = RNIL;
     apiConnectptr.p->buddyPtr = RNIL;
     apiConnectptr.p->currSavePointId = 0;
+    apiConnectptr.p->m_transaction_nodes.clear();
   }//for
   apiConnectptr.i = tiacTmp - 1;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9902,6 +9901,7 @@
       apiConnectptr.p->executingIndexOp = RNIL;
       apiConnectptr.p->buddyPtr = RNIL;
       apiConnectptr.p->currSavePointId = 0;
+      apiConnectptr.p->m_transaction_nodes.clear();
     }//for
   apiConnectptr.i = (2 * tiacTmp) - 1;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9929,6 +9929,7 @@
     apiConnectptr.p->executingIndexOp = RNIL;
     apiConnectptr.p->buddyPtr = RNIL;
     apiConnectptr.p->currSavePointId = 0;
+    apiConnectptr.p->m_transaction_nodes.clear();
   }//for
   apiConnectptr.i = (3 * tiacTmp) - 1;
   ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9989,13 +9990,13 @@
     ptrAss(hostptr, hostRecord);
     hostptr.p->hostStatus = HS_DEAD;
     hostptr.p->inPackedList = false;
-    hostptr.p->takeOverStatus = TOS_NOT_DEFINED;
     hostptr.p->lqhTransStatus = LTS_IDLE;
     hostptr.p->noOfWordsTCKEYCONF = 0;
     hostptr.p->noOfWordsTCINDXCONF = 0;
     hostptr.p->noOfPackedWordsLqh = 0;
     hostptr.p->hostLqhBlockRef = calcLqhBlockRef(hostptr.i);
   }//for
+  c_alive_nodes.clear();
 }//Dbtc::inithost()
 
 void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0, 
@@ -10248,6 +10249,7 @@
   }//while
   apiConnectptr.p->firstTcConnect = RNIL;
   apiConnectptr.p->lastTcConnect = RNIL;
+  apiConnectptr.p->m_transaction_nodes.clear();
 
   // MASV let state be CS_ABORTING until all 
   // signals in the "air" have been received. Reset to CS_CONNECTED
@@ -10321,6 +10323,7 @@
   cfirstfreeApiConnect = TlocalApiConnectptr.i;
   setApiConTimer(TlocalApiConnectptr.i, 0, __LINE__);
   TlocalApiConnectptr.p->apiConnectstate = CS_DISCONNECTED;
+  ndbassert(TlocalApiConnectptr.p->m_transaction_nodes.isclear());
   ndbassert(TlocalApiConnectptr.p->apiScanRec == RNIL);
   TlocalApiConnectptr.p->ndbapiBlockref = 0;
 }//Dbtc::releaseApiCon()
@@ -10855,6 +10858,34 @@
     infoEvent("IndexOpCount: pool: %d free: %d", 
 	      c_theIndexOperationPool.getSize(),
 	      c_theIndexOperationPool.getNoOfFree());
+  }
+
+  if (dumpState->args[0] == 2514)
+  {
+    if (signal->getLength() == 2)
+    {
+      dumpState->args[0] = DumpStateOrd::TcDumpOneApiConnectRec;
+      execDUMP_STATE_ORD(signal);
+    }
+
+    NodeReceiverGroup rg(CMVMI, c_alive_nodes);
+    dumpState->args[0] = 15;
+    sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+
+    signal->theData[0] = 2515;
+    sendSignalWithDelay(cownref, GSN_DUMP_STATE_ORD, signal, 1000, 1);    
+    return;
+  }
+
+  if (dumpState->args[0] == 2515)
+  {
+    NdbNodeBitmask mask = c_alive_nodes;
+    mask.clear(getOwnNodeId());
+    NodeReceiverGroup rg(NDBCNTR, mask);
+    
+    sendSignal(rg, GSN_SYSTEM_ERROR, signal, 1, JBB);
+    sendSignalWithDelay(cownref, GSN_SYSTEM_ERROR, signal, 300, 1);    
+    return;
   }
 }//Dbtc::execDUMP_STATE_ORD()
 
Thread
bk commit into 5.0 tree (tomas:1.2135)tomas4 Apr