Below is the list of changes that have just been committed into a local
5.0 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2135 06/04/04 11:50:54 tomas@stripped +2 -0
Merge tulin@stripped:/home/bk/mysql-5.0
into poseidon.ndb.mysql.com:/home/tomas/mysql-5.0
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
1.91 06/04/04 11:50:44 tomas@stripped +0 -0
Auto merged
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
1.46 06/04/04 11:50:43 tomas@stripped +0 -0
Auto merged
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: tomas
# Host: poseidon.ndb.mysql.com
# Root: /home/tomas/mysql-5.0/RESYNC
--- 1.45/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-03-22 15:46:48 +01:00
+++ 1.46/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-04-04 11:50:43 +02:00
@@ -11840,7 +11840,8 @@
/* THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/
/* SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET. */
/*----------------------------------------------------------------------*/
- arrGuard(ncrReplicaPtr.p->noCrashedReplicas + 1, 8);
+ arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, 8,
+ NDBD_EXIT_MAX_CRASHED_REPLICAS);
ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
SYSFILE->lastCompletedGCI[nodeId];
ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
--- 1.90/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-02-13 15:47:37 +01:00
+++ 1.91/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-04-04 11:50:44 +02:00
@@ -266,6 +266,10 @@
jam();
checkScanActiveInFailedLqh(signal, Tdata0, Tdata1);
return;
+ case TcContinueB::ZNF_CHECK_TRANSACTIONS:
+ jam();
+ nodeFailCheckTransactions(signal, Tdata0, Tdata1);
+ return;
case TcContinueB::CHECK_WAIT_DROP_TAB_FAILED_LQH:
jam();
checkWaitDropTabFailedLqh(signal, Tdata0, Tdata1);
@@ -303,8 +307,8 @@
hostptr.i = signal->theData[1];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
hostptr.p->hostStatus = HS_ALIVE;
- hostptr.p->takeOverStatus = TOS_IDLE;
signal->theData[0] = cownref;
+ c_alive_nodes.set(hostptr.i);
sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
}
@@ -503,6 +507,7 @@
* Finished
*/
jam();
+ checkNodeFailComplete(signal, nodeId, HostRecord::NF_CHECK_DROP_TAB);
return;
}
@@ -868,8 +873,6 @@
hostptr.i = i;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- hostptr.p->takeOverStatus = TOS_IDLE;
-
if (NodeBitmask::get(readNodes->inactiveNodes, i)) {
jam();
hostptr.p->hostStatus = HS_DEAD;
@@ -877,6 +880,7 @@
jam();
con_lineNodes++;
hostptr.p->hostStatus = HS_ALIVE;
+ c_alive_nodes.set(i);
}//if
}//if
}//for
@@ -2378,6 +2382,7 @@
regApiPtr->commitAckMarker = RNIL;
regApiPtr->buddyPtr = RNIL;
regApiPtr->currSavePointId = 0;
+ regApiPtr->m_transaction_nodes.clear();
// Trigger data
releaseFiredTriggerData(®ApiPtr->theFiredTriggers),
// Index data
@@ -2986,6 +2991,10 @@
signal->theData[0] = TdihConnectptr;
signal->theData[1] = Ttableref;
signal->theData[2] = TdistrHashValue;
+ signal->theData[3] = 0;
+ signal->theData[4] = 0;
+ signal->theData[5] = 0;
+ signal->theData[6] = 0;
/*-------------------------------------------------------------*/
/* FOR EFFICIENCY REASONS WE AVOID THE SIGNAL SENDING HERE AND */
@@ -3165,6 +3174,7 @@
TcConnectRecord * const regTcPtr = tcConnectptr.p;
ApiConnectRecord * const regApiPtr = apiConnectptr.p;
CacheRecord * const regCachePtr = cachePtr.p;
+ UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
#ifdef ERROR_INSERT
if (ERROR_INSERTED(8002)) {
systemErrorLab(signal, __LINE__);
@@ -3202,6 +3212,9 @@
LqhKeyReq::setScanTakeOverFlag(tslrAttrLen, regCachePtr->scanTakeOverInd);
Tdata10 = 0;
+ sig0 = regCachePtr->opSimple;
+ sig1 = regTcPtr->operation;
+ bool simpleRead = (sig1 == ZREAD && sig0 == ZTRUE);
LqhKeyReq::setKeyLen(Tdata10, regCachePtr->keylen);
LqhKeyReq::setLastReplicaNo(Tdata10, regTcPtr->lastReplicaNo);
LqhKeyReq::setLockType(Tdata10, regCachePtr->opLock);
@@ -3211,8 +3224,8 @@
LqhKeyReq::setApplicationAddressFlag(Tdata10, 1);
LqhKeyReq::setDirtyFlag(Tdata10, regTcPtr->dirtyOp);
LqhKeyReq::setInterpretedFlag(Tdata10, regCachePtr->opExec);
- LqhKeyReq::setSimpleFlag(Tdata10, regCachePtr->opSimple);
- LqhKeyReq::setOperation(Tdata10, regTcPtr->operation);
+ LqhKeyReq::setSimpleFlag(Tdata10, sig0);
+ LqhKeyReq::setOperation(Tdata10, sig1);
/* -----------------------------------------------------------------------
* Sequential Number of first LQH = 0, bit 22-23
* IF ATTRIBUTE INFORMATION IS SENT IN TCKEYREQ,
@@ -3225,18 +3238,16 @@
* ----------------------------------------------------------------------- */
//LqhKeyReq::setAPIVersion(Tdata10, regCachePtr->apiVersionNo);
Uint32 commitAckMarker = regTcPtr->commitAckMarker;
+ const Uint32 noOfLqhs = regTcPtr->noOfNodes;
if(commitAckMarker != RNIL){
jam();
-
LqhKeyReq::setMarkerFlag(Tdata10, 1);
- CommitAckMarker * tmp;
- tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
+ CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
/**
* Populate LQH array
*/
- const Uint32 noOfLqhs = regTcPtr->noOfNodes;
tmp->noOfLqhs = noOfLqhs;
for(Uint32 i = 0; i<noOfLqhs; i++){
tmp->lqhNodeId[i] = regTcPtr->tcNodedata[i];
@@ -3247,7 +3258,6 @@
/* NO READ LENGTH SENT FROM TC. SEQUENTIAL NUMBER IS 1 AND IT */
/* IS SENT TO A PRIMARY NODE. */
/* ************************************************************> */
- UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtrSend();
@@ -3271,6 +3281,14 @@
sig5 = regTcPtr->clientData;
sig6 = regCachePtr->scanInfo;
+ if (! simpleRead)
+ {
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[0]);
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[1]);
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[2]);
+ regApiPtr->m_transaction_nodes.set(regTcPtr->tcNodedata[3]);
+ }
+
lqhKeyReq->tableSchemaVersion = sig0;
lqhKeyReq->fragmentData = sig1;
lqhKeyReq->transId1 = sig2;
@@ -4655,6 +4673,7 @@
UintR TgcpPointer = regTmpApiPtr->gcpPointer;
UintR TgcpFilesize = cgcpFilesize;
UintR TcommitAckMarker = regTmpApiPtr->commitAckMarker;
+ NdbNodeBitmask Tnodes = regTmpApiPtr->m_transaction_nodes;
GcpRecord *localGcpRecord = gcpRecord;
regApiPtr->ndbapiBlockref = regTmpApiPtr->ndbapiBlockref;
@@ -4665,6 +4684,7 @@
regApiPtr->transid[1] = Ttransid2;
regApiPtr->lqhkeyconfrec = Tlqhkeyconfrec;
regApiPtr->commitAckMarker = TcommitAckMarker;
+ regApiPtr->m_transaction_nodes = Tnodes;
gcpPtr.i = TgcpPointer;
ptrCheckGuard(gcpPtr, TgcpFilesize, localGcpRecord);
@@ -4675,6 +4695,7 @@
regTmpApiPtr->commitAckMarker = RNIL;
regTmpApiPtr->firstTcConnect = RNIL;
regTmpApiPtr->lastTcConnect = RNIL;
+ regTmpApiPtr->m_transaction_nodes.clear();
releaseAllSeizedIndexOperations(regTmpApiPtr);
}//Dbtc::copyApi()
@@ -4933,7 +4954,7 @@
TcConnectRecordPtr localTcConnectptr;
UintR TtcConnectFilesize = ctcConnectFilesize;
TcConnectRecord *localTcConnectRecord = tcConnectRecord;
-
+ apiConnectptr.p->m_transaction_nodes.clear();
localTcConnectptr.i = apiConnectptr.p->firstTcConnect;
do {
jam();
@@ -5338,7 +5359,8 @@
break;
case CS_ABORTING:
jam();
- errorCode = ZABORTINPROGRESS;
+ errorCode = regApiPtr->returncode ?
+ regApiPtr->returncode : ZABORTINPROGRESS;
break;
case CS_START_SCAN:
jam();
@@ -5877,9 +5899,9 @@
if (transP->firstTcConnect == RNIL) {
jam();
- /*-----------------------------------------------------------------------*/
- /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */
- /*-----------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------*/
+ /* WE HAVE NO PARTICIPANTS IN THE TRANSACTION. */
+ /*--------------------------------------------------------------------*/
releaseAbortResources(signal);
return;
}//if
@@ -6156,10 +6178,12 @@
if (api_timer != 0) {
time_out_value= time_out_param + (api_con_ptr & mask_value);
time_passed= tc_timer - api_timer;
- if (time_passed > time_out_value) {
+ if (time_passed > time_out_value)
+ {
jam();
- timeOutFoundLab(signal, api_con_ptr);
- return;
+ timeOutFoundLab(signal, api_con_ptr, ZTIME_OUT_ERROR);
+ api_con_ptr++;
+ break;
}
}
}
@@ -6179,10 +6203,8 @@
return;
}//Dbtc::timeOutLoopStartLab()
-void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr)
+void Dbtc::timeOutFoundLab(Signal* signal, Uint32 TapiConPtr, Uint32 errCode)
{
- sendContinueTimeOutControl(signal, TapiConPtr + 1);
-
apiConnectptr.i = TapiConPtr;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
/*------------------------------------------------------------------*/
@@ -6195,7 +6217,8 @@
<< "Time-out in state = " << apiConnectptr.p->apiConnectstate
<< " apiConnectptr.i = " << apiConnectptr.i
<< " - exec: " << apiConnectptr.p->m_exec_flag
- << " - place: " << c_apiConTimer_line[apiConnectptr.i]);
+ << " - place: " << c_apiConTimer_line[apiConnectptr.i]
+ << " code: " << errCode);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
@@ -6212,7 +6235,7 @@
}//if
}
apiConnectptr.p->returnsignal = RS_TCROLLBACKREP;
- apiConnectptr.p->returncode = ZTIME_OUT_ERROR;
+ apiConnectptr.p->returncode = errCode;
abort010Lab(signal);
return;
case CS_RECEIVING:
@@ -6225,7 +6248,7 @@
/* START ABORTING THE TRANSACTION. ALSO START CHECKING THE */
/* REMAINING TRANSACTIONS. */
/*------------------------------------------------------------------*/
- terrorCode = ZTIME_OUT_ERROR;
+ terrorCode = errCode;
abortErrorLab(signal);
return;
case CS_COMMITTING:
@@ -6432,6 +6455,7 @@
return;
}
+ bool found = false;
OperationState tmp[16];
Uint32 TloopCount = 0;
@@ -6439,7 +6463,31 @@
jam();
if (tcConnectptr.i == RNIL) {
jam();
- if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+ ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+ found, Tcheck, apiConnectptr.p->counter);
+#endif
+ if (found || apiConnectptr.p->counter)
+ {
+ jam();
+ /**
+ * We sent atleast one ABORT/ABORTED
+ * or ZABORT_TIMEOUT_BREAK is in job buffer
+ * wait for reception...
+ */
+ return;
+ }
+
+ if (Tcheck == 1)
+ {
+ jam();
+ releaseAbortResources(signal);
+ return;
+ }
+
+ if (Tcheck == 0)
+ {
jam();
/*------------------------------------------------------------------
* All nodes had already reported ABORTED for all tcConnect records.
@@ -6448,9 +6496,11 @@
*------------------------------------------------------------------*/
char buf[96]; buf[0] = 0;
char buf2[96];
- BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
- __LINE__, apiConnectptr.i);
- for(Uint32 i = 0; i<TloopCount; i++){
+ BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+ __LINE__, apiConnectptr.i,
+ apiConnectptr.p->counter);
+ for(Uint32 i = 0; i<TloopCount; i++)
+ {
BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
BaseString::snprintf(buf, sizeof(buf), buf2);
}
@@ -6458,7 +6508,9 @@
ndbout_c(buf);
ndbrequire(false);
releaseAbortResources(signal);
+ return;
}
+
return;
}//if
TloopCount++;
@@ -6473,7 +6525,16 @@
signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
signal->theData[1] = tcConnectptr.i;
signal->theData[2] = apiConnectptr.i;
- sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ if (ERROR_INSERTED(8050))
+ {
+ ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)",
+ Tcheck, apiConnectptr.p->counter);
+ sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+ }
+ else
+ {
+ sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ }
return;
}//if
ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6496,7 +6557,7 @@
jam();
if (tcConnectptr.p->tcNodedata[Ti] != 0) {
TloopCount += 31;
- Tcheck = 1;
+ found = true;
hostptr.i = tcConnectptr.p->tcNodedata[Ti];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -6869,58 +6930,44 @@
const Uint32 tnewMasterId = nodeFail->masterNodeId;
arrGuard(tnoOfNodes, MAX_NDB_NODES);
+ Uint32 i;
int index = 0;
- for (unsigned i = 1; i< MAX_NDB_NODES; i++) {
- if(NodeBitmask::get(nodeFail->theNodes, i)){
+ for (i = 1; i< MAX_NDB_NODES; i++)
+ {
+ if(NodeBitmask::get(nodeFail->theNodes, i))
+ {
cdata[index] = i;
index++;
}//if
}//for
+ cmasterNodeId = tnewMasterId;
+
tcNodeFailptr.i = 0;
ptrAss(tcNodeFailptr, tcFailRecord);
- Uint32 tindex;
- for (tindex = 0; tindex < tnoOfNodes; tindex++) {
+ for (i = 0; i < tnoOfNodes; i++)
+ {
jam();
- hostptr.i = cdata[tindex];
+ hostptr.i = cdata[i];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+
/*------------------------------------------------------------*/
/* SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS */
/* FAILED. */
/*------------------------------------------------------------*/
hostptr.p->hostStatus = HS_DEAD;
+ hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
+ c_alive_nodes.clear(hostptr.i);
- if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
- jam();
- /*------------------------------------------------------------*/
- /* A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/
- /* EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT. */
- /* HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE. */
- /*------------------------------------------------------------*/
- /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
- /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
- /* USED THEM IS COMPLETED. */
- /*------------------------------------------------------------*/
- {
- NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
- nfRep->blockNo = DBTC;
- nfRep->nodeId = cownNodeid;
- nfRep->failedNodeId = hostptr.i;
- }
- sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- } else {
- ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
- hostptr.p->takeOverStatus = TOS_NODE_FAILED;
- }//if
-
- if (tcNodeFailptr.p->failStatus == FS_LISTENING) {
+ if (tcNodeFailptr.p->failStatus == FS_LISTENING)
+ {
jam();
/*------------------------------------------------------------*/
/* THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE */
/* FAILURE. */
/*------------------------------------------------------------*/
- if (hostptr.p->lqhTransStatus == LTS_ACTIVE) {
+ if (hostptr.p->lqhTransStatus == LTS_ACTIVE)
+ {
jam();
/*------------------------------------------------------------*/
/* WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */
@@ -6932,86 +6979,46 @@
}//if
}//if
- }//for
-
- const bool masterFailed = (cmasterNodeId != tnewMasterId);
- cmasterNodeId = tnewMasterId;
-
- if(getOwnNodeId() == cmasterNodeId && masterFailed){
- /**
- * Master has failed and I'm the new master
- */
- jam();
-
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+ if (getOwnNodeId() != tnewMasterId)
+ {
jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus != HS_ALIVE) {
- jam();
- if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
- jam();
- /*------------------------------------------------------------*/
- /* SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF */
- /* TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE */
- /* THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE */
- /* OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF */
- /* MASTER FAILS AFTER SENDING CONFIRMATION TO NEW */
- /* MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE */
- /* WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES */
- /* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
- /* CRASHED NODE HAVE ALREADY RECOVERED. */
- /*------------------------------------------------------------*/
- for(tmpHostptr.i = 1; tmpHostptr.i < MAX_NDB_NODES;tmpHostptr.i++) {
- jam();
- ptrAss(tmpHostptr, hostRecord);
- if (tmpHostptr.p->hostStatus == HS_ALIVE) {
- jam();
- tblockref = calcTcBlockRef(tmpHostptr.i);
- signal->theData[0] = hostptr.i;
- sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
- }//if
- }//for
- }//if
- }//if
- }//for
- }
-
- if(getOwnNodeId() == cmasterNodeId){
- jam();
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+ /**
+ * Only master does takeover currently
+ */
+ hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
+ }
+ else
+ {
jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus != HS_ALIVE) {
- jam();
- if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) {
- jam();
- /*------------------------------------------------------------*/
- /* CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL */
- /* SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/
- /* BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_ACTIVE;
- signal->theData[0] = hostptr.i;
- sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
- }//if
- }//if
- }//for
- }//if
- for (tindex = 0; tindex < tnoOfNodes; tindex++) {
- jam();
- hostptr.i = cdata[tindex];
- ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- /*------------------------------------------------------------*/
- /* LOOP THROUGH AND ABORT ALL SCANS THAT WHERE */
- /* CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED */
- /* NODE'S LQH */
- /*------------------------------------------------------------*/
+ signal->theData[0] = hostptr.i;
+ sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
+ }
+
checkScanActiveInFailedLqh(signal, 0, hostptr.i);
checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
- }//for
-
+ nodeFailCheckTransactions(signal, 0, hostptr.i);
+ }
}//Dbtc::execNODE_FAILREP()
+void
+Dbtc::checkNodeFailComplete(Signal* signal,
+ Uint32 failedNodeId,
+ Uint32 bit)
+{
+ hostptr.i = failedNodeId;
+ ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+ hostptr.p->m_nf_bits &= ~bit;
+ if (hostptr.p->m_nf_bits == 0)
+ {
+ NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
+ nfRep->blockNo = DBTC;
+ nfRep->nodeId = cownNodeid;
+ nfRep->failedNodeId = hostptr.i;
+ sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
+ NFCompleteRep::SignalLength, JBB);
+ }
+}
+
void Dbtc::checkScanActiveInFailedLqh(Signal* signal,
Uint32 scanPtrI,
Uint32 failedNodeId){
@@ -7053,8 +7060,44 @@
sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
return;
}//for
+
+ checkNodeFailComplete(signal, failedNodeId, HostRecord::NF_CHECK_SCAN);
+}
+
+void
+Dbtc::nodeFailCheckTransactions(Signal* signal,
+ Uint32 transPtrI,
+ Uint32 failedNodeId)
+{
+ jam();
+ Ptr<ApiConnectRecord> transPtr;
+ for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
+ {
+ ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord);
+ if (transPtr.p->m_transaction_nodes.get(failedNodeId))
+ {
+ jam();
+ // Force timeout regardless of state
+ Uint32 save = c_appl_timeout_value;
+ c_appl_timeout_value = 1;
+ setApiConTimer(transPtr.i, 0, __LINE__);
+ timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
+ c_appl_timeout_value = save;
+ }
+
+ // Send CONTINUEB to continue later
+ signal->theData[0] = TcContinueB::ZNF_CHECK_TRANSACTIONS;
+ signal->theData[1] = transPtr.i + 1; // Check next
+ signal->theData[2] = failedNodeId;
+ sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ return;
+ }
+
+ checkNodeFailComplete(signal, failedNodeId,
+ HostRecord::NF_CHECK_TRANSACTION);
}
+
void
Dbtc::checkScanFragList(Signal* signal,
Uint32 failedNodeId,
@@ -7070,54 +7113,14 @@
tfailedNodeId = signal->theData[0];
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- switch (hostptr.p->takeOverStatus) {
- case TOS_IDLE:
- jam();
- /*------------------------------------------------------------*/
- /* THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP */
- /* MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS. */
- /* WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT */
- /* FOR THE NODE_FAILREP MESSAGE. */
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_COMPLETED;
- break;
- case TOS_NODE_FAILED:
- case TOS_ACTIVE:
- jam();
- /*------------------------------------------------------------*/
- /* WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE */
- /* ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH */
- /* WE SET THE STATE TO TAKE_OVER_COMPLETED. */
- /*------------------------------------------------------------*/
- /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
- /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
- /* USED THEM IS COMPLETED. */
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_COMPLETED;
- {
- NFCompleteRep * const nfRep = (NFCompleteRep *)&signal->theData[0];
- nfRep->blockNo = DBTC;
- nfRep->nodeId = cownNodeid;
- nfRep->failedNodeId = hostptr.i;
- }
- sendSignal(cdihblockref, GSN_NF_COMPLETEREP, signal,
- NFCompleteRep::SignalLength, JBB);
- break;
- case TOS_COMPLETED:
- jam();
- /*------------------------------------------------------------*/
- /* WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */
- /* LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF */
- /* THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */
- /* WE SIMPLY IGNORE THE MESSAGE. */
- /*------------------------------------------------------------*/
- /*empty*/;
- break;
- default:
+
+ if (signal->getSendersBlockRef() != reference())
+ {
jam();
- systemErrorLab(signal, __LINE__);
return;
- }//switch
+ }
+
+ checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
}//Dbtc::execTAKE_OVERTCCONF()
void Dbtc::execTAKE_OVERTCREQ(Signal* signal)
@@ -7357,16 +7360,10 @@
/* TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL */
/* NODES THAT ARE ALIVE. */
/*------------------------------------------------------------*/
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
- jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus == HS_ALIVE) {
- jam();
- tblockref = calcTcBlockRef(hostptr.i);
- signal->theData[0] = tcNodeFailptr.p->takeOverNode;
- sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
- }//if
- }//for
+ NodeReceiverGroup rg(DBTC, c_alive_nodes);
+ signal->theData[0] = tcNodeFailptr.p->takeOverNode;
+ sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+
if (tcNodeFailptr.p->queueIndex > 0) {
jam();
/*------------------------------------------------------------*/
@@ -8048,6 +8045,7 @@
apiConnectptr.p->ndbapiBlockref = 0;
apiConnectptr.p->ndbapiConnect = 0;
apiConnectptr.p->buddyPtr = RNIL;
+ apiConnectptr.p->m_transaction_nodes.clear();
setApiConTimer(apiConnectptr.i, 0, __LINE__);
switch(ttransStatus){
case LqhTransConf::Committed:
@@ -9875,6 +9873,7 @@
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
+ apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = tiacTmp - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9902,6 +9901,7 @@
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
+ apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = (2 * tiacTmp) - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9929,6 +9929,7 @@
apiConnectptr.p->executingIndexOp = RNIL;
apiConnectptr.p->buddyPtr = RNIL;
apiConnectptr.p->currSavePointId = 0;
+ apiConnectptr.p->m_transaction_nodes.clear();
}//for
apiConnectptr.i = (3 * tiacTmp) - 1;
ptrCheckGuard(apiConnectptr, capiConnectFilesize, apiConnectRecord);
@@ -9989,13 +9990,13 @@
ptrAss(hostptr, hostRecord);
hostptr.p->hostStatus = HS_DEAD;
hostptr.p->inPackedList = false;
- hostptr.p->takeOverStatus = TOS_NOT_DEFINED;
hostptr.p->lqhTransStatus = LTS_IDLE;
hostptr.p->noOfWordsTCKEYCONF = 0;
hostptr.p->noOfWordsTCINDXCONF = 0;
hostptr.p->noOfPackedWordsLqh = 0;
hostptr.p->hostLqhBlockRef = calcLqhBlockRef(hostptr.i);
}//for
+ c_alive_nodes.clear();
}//Dbtc::inithost()
void Dbtc::initialiseRecordsLab(Signal* signal, UintR Tdata0,
@@ -10248,6 +10249,7 @@
}//while
apiConnectptr.p->firstTcConnect = RNIL;
apiConnectptr.p->lastTcConnect = RNIL;
+ apiConnectptr.p->m_transaction_nodes.clear();
// MASV let state be CS_ABORTING until all
// signals in the "air" have been received. Reset to CS_CONNECTED
@@ -10321,6 +10323,7 @@
cfirstfreeApiConnect = TlocalApiConnectptr.i;
setApiConTimer(TlocalApiConnectptr.i, 0, __LINE__);
TlocalApiConnectptr.p->apiConnectstate = CS_DISCONNECTED;
+ ndbassert(TlocalApiConnectptr.p->m_transaction_nodes.isclear());
ndbassert(TlocalApiConnectptr.p->apiScanRec == RNIL);
TlocalApiConnectptr.p->ndbapiBlockref = 0;
}//Dbtc::releaseApiCon()
@@ -10855,6 +10858,34 @@
infoEvent("IndexOpCount: pool: %d free: %d",
c_theIndexOperationPool.getSize(),
c_theIndexOperationPool.getNoOfFree());
+ }
+
+ if (dumpState->args[0] == 2514)
+ {
+ if (signal->getLength() == 2)
+ {
+ dumpState->args[0] = DumpStateOrd::TcDumpOneApiConnectRec;
+ execDUMP_STATE_ORD(signal);
+ }
+
+ NodeReceiverGroup rg(CMVMI, c_alive_nodes);
+ dumpState->args[0] = 15;
+ sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+
+ signal->theData[0] = 2515;
+ sendSignalWithDelay(cownref, GSN_DUMP_STATE_ORD, signal, 1000, 1);
+ return;
+ }
+
+ if (dumpState->args[0] == 2515)
+ {
+ NdbNodeBitmask mask = c_alive_nodes;
+ mask.clear(getOwnNodeId());
+ NodeReceiverGroup rg(NDBCNTR, mask);
+
+ sendSignal(rg, GSN_SYSTEM_ERROR, signal, 1, JBB);
+ sendSignalWithDelay(cownref, GSN_SYSTEM_ERROR, signal, 300, 1);
+ return;
}
}//Dbtc::execDUMP_STATE_ORD()
| Thread |
|---|
| • bk commit into 5.0 tree (tomas:1.2135) | tomas | 4 Apr |