Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2474 06/03/20 14:53:29 jonas@stripped +3 -0
ndb - wl2610, bug#18352
Remove useless and tricky state fiddleing in TC
to syncronize NF_CompleteRep as code is already present in DIH aswell
Keep broadcast of TAKEOVER_TCCONF for online upgrade
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
1.63 06/03/20 14:53:27 jonas@stripped +42 -144
Remove useless and tricky state fiddleing in TC
to syncronize NF_CompleteRep as code is already present in DIH aswell
Keep broadcast of TAKEOVER_TCCONF for online upgrade
ndb/src/kernel/blocks/dbtc/Dbtc.hpp
1.23 06/03/20 14:53:27 jonas@stripped +0 -9
Remove useless and tricky state fiddleing in TC
to syncronize NF_CompleteRep as code is already present in DIH aswell
Keep broadcast of TAKEOVER_TCCONF for online upgrade
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
1.61 06/03/20 14:53:27 jonas@stripped +166 -0
Add clever dump for showing active operations
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: perch.ndb.mysql.com
# Root: /home/jonas/src/41-work
--- 1.60/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2006-02-10 09:42:32 +01:00
+++ 1.61/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2006-03-20 14:53:27 +01:00
@@ -18448,6 +18448,172 @@
c_error_insert_table_id = dumpState->args[1];
SET_ERROR_INSERT_VALUE(5042);
}
+
+ TcConnectionrec *regTcConnectionrec = tcConnectionrec;
+ Uint32 ttcConnectrecFileSize = ctcConnectrecFileSize;
+ Uint32 arg = dumpState->args[0];
+ if(arg == 2306)
+ {
+ for(Uint32 i = 0; i<1024; i++)
+ {
+ TcConnectionrecPtr tcRec;
+ tcRec.i = ctransidHash[i];
+ while(tcRec.i != RNIL)
+ {
+ ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
+ ndbout << "TcConnectionrec " << tcRec.i;
+ signal->theData[0] = 2307;
+ signal->theData[1] = tcRec.i;
+ execDUMP_STATE_ORD(signal);
+ tcRec.i = tcRec.p->nextHashRec;
+ }
+ }
+ }
+
+ if(arg == 2307 || arg == 2308)
+ {
+ TcConnectionrecPtr tcRec;
+ tcRec.i = signal->theData[1];
+ ptrCheckGuard(tcRec, ttcConnectrecFileSize, regTcConnectionrec);
+
+ ndbout << " transactionState = " <<
tcRec.p->transactionState<<endl;
+ ndbout << " operation = " << tcRec.p->operation<<endl;
+ ndbout << " tcNodeFailrec = " << tcRec.p->tcNodeFailrec
+ << " seqNoReplica = " << tcRec.p->seqNoReplica
+ << " simpleRead = " << tcRec.p->simpleRead
+ << endl;
+ ndbout << " replicaType = " << tcRec.p->replicaType
+ << " reclenAiLqhkey = " << tcRec.p->reclenAiLqhkey
+ << " opExec = " << tcRec.p->opExec
+ << endl;
+ ndbout << " opSimple = " << tcRec.p->opSimple
+ << " nextSeqNoReplica = " << tcRec.p->nextSeqNoReplica
+ << " lockType = " << tcRec.p->lockType
+ << endl;
+ ndbout << " lastReplicaNo = " << tcRec.p->lastReplicaNo
+ << " indTakeOver = " << tcRec.p->indTakeOver
+ << " dirtyOp = " << tcRec.p->dirtyOp
+ << endl;
+ ndbout << " activeCreat = " << tcRec.p->activeCreat
+ << " tcBlockref = " << hex << tcRec.p->tcBlockref
+ << " reqBlockref = " << hex << tcRec.p->reqBlockref
+ << " primKeyLen = " << tcRec.p->primKeyLen
+ << endl;
+ ndbout << " nextReplica = " << tcRec.p->nextReplica
+ << " tcBlockref = " << hex << tcRec.p->tcBlockref
+ << " reqBlockref = " << hex << tcRec.p->reqBlockref
+ << " primKeyLen = " << tcRec.p->primKeyLen
+ << endl;
+ ndbout << " logStopPageNo = " << tcRec.p->logStopPageNo
+ << " logStartPageNo = " << tcRec.p->logStartPageNo
+ << " logStartPageIndex = " << tcRec.p->logStartPageIndex
+ << endl;
+ ndbout << " errorCode = " << tcRec.p->errorCode
+ << " clientBlockref = " << hex << tcRec.p->clientBlockref
+ << " applRef = " << hex << tcRec.p->applRef
+ << " totSendlenAi = " << tcRec.p->totSendlenAi
+ << endl;
+ ndbout << " totReclenAi = " << tcRec.p->totReclenAi
+ << " tcScanRec = " << tcRec.p->tcScanRec
+ << " tcScanInfo = " << tcRec.p->tcScanInfo
+ << " tcOprec = " << hex << tcRec.p->tcOprec
+ << endl;
+ ndbout << " tableref = " << tcRec.p->tableref
+ << " simpleTcConnect = " << tcRec.p->simpleTcConnect
+ << " storedProcId = " << tcRec.p->storedProcId
+ << " schemaVersion = " << tcRec.p->schemaVersion
+ << endl;
+ ndbout << " reqinfo = " << tcRec.p->reqinfo
+ << " reqRef = " << tcRec.p->reqRef
+ << " readlenAi = " << tcRec.p->readlenAi
+ << " prevTc = " << tcRec.p->prevTc
+ << endl;
+ ndbout << " prevLogTcrec = " << tcRec.p->prevLogTcrec
+ << " prevHashRec = " << tcRec.p->prevHashRec
+ << " nodeAfterNext0 = " << tcRec.p->nodeAfterNext[0]
+ << " nodeAfterNext1 = " << tcRec.p->nodeAfterNext[1]
+ << endl;
+ ndbout << " nextTcConnectrec = " << tcRec.p->nextTcConnectrec
+ << " nextTc = " << tcRec.p->nextTc
+ << " nextTcLogQueue = " << tcRec.p->nextTcLogQueue
+ << " nextLogTcrec = " << tcRec.p->nextLogTcrec
+ << endl;
+ ndbout << " nextHashRec = " << tcRec.p->nextHashRec
+ << " logWriteState = " << tcRec.p->logWriteState
+ << " logStartFileNo = " << tcRec.p->logStartFileNo
+ << " listState = " << tcRec.p->listState
+ << endl;
+ ndbout << " lastAttrinbuf = " << tcRec.p->lastAttrinbuf
+ << " lastTupkeybuf = " << tcRec.p->lastTupkeybuf
+ << " hashValue = " << tcRec.p->hashValue
+ << endl;
+ ndbout << " gci = " << tcRec.p->gci
+ << " fragmentptr = " << tcRec.p->fragmentptr
+ << " fragmentid = " << tcRec.p->fragmentid
+ << " firstTupkeybuf = " << tcRec.p->firstTupkeybuf
+ << endl;
+ ndbout << " firstAttrinbuf = " << tcRec.p->firstAttrinbuf
+ << " currTupAiLen = " << tcRec.p->currTupAiLen
+ << " currReclenAi = " << tcRec.p->currReclenAi
+ << endl;
+ ndbout << " tcTimer = " << tcRec.p->tcTimer
+ << " clientConnectrec = " << tcRec.p->clientConnectrec
+ << " applOprec = " << hex << tcRec.p->applOprec
+ << " abortState = " << tcRec.p->abortState
+ << endl;
+ ndbout << " transid0 = " << hex << tcRec.p->transid[0]
+ << " transid1 = " << hex << tcRec.p->transid[1]
+ << " tupkeyData0 = " << tcRec.p->tupkeyData[0]
+ << " tupkeyData1 = " << tcRec.p->tupkeyData[1]
+ << endl;
+ ndbout << " tupkeyData2 = " << tcRec.p->tupkeyData[2]
+ << " tupkeyData3 = " << tcRec.p->tupkeyData[3]
+ << endl;
+ switch (tcRec.p->transactionState) {
+
+ case TcConnectionrec::SCAN_STATE_USED:
+ if (tcRec.p->tcScanRec < cscanrecFileSize){
+ ScanRecordPtr TscanPtr;
+ c_scanRecordPool.getPtr(TscanPtr, tcRec.p->tcScanRec);
+ ndbout << " scanState = " << TscanPtr.p->scanState << endl;
+ //TscanPtr.p->scanLocalref[2];
+ ndbout << " copyPtr="<<TscanPtr.p->copyPtr
+ << " scanAccPtr="<<TscanPtr.p->scanAccPtr
+ << " scanAiLength="<<TscanPtr.p->scanAiLength
+ << endl;
+ ndbout << " m_curr_batch_size_rows="<<
+ TscanPtr.p->m_curr_batch_size_rows
+ << " m_max_batch_size_rows="<<
+ TscanPtr.p->m_max_batch_size_rows
+ << " scanErrorCounter="<<TscanPtr.p->scanErrorCounter
+ << endl;
+ ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion
+ << " scanStoredProcId="<<TscanPtr.p->scanStoredProcId
+ << " scanTcrec="<<TscanPtr.p->scanTcrec
+ << endl;
+ ndbout << " scanType="<<TscanPtr.p->scanType
+ << " scanApiBlockref="<<TscanPtr.p->scanApiBlockref
+ << " scanNodeId="<<TscanPtr.p->scanNodeId
+ << " scanCompletedStatus="<<TscanPtr.p->scanCompletedStatus
+ << endl;
+ ndbout << " scanFlag="<<TscanPtr.p->scanFlag
+ << " scanLockHold="<<TscanPtr.p->scanLockHold
+ << " scanLockMode="<<TscanPtr.p->scanLockMode
+ << " scanNumber="<<TscanPtr.p->scanNumber
+ << endl;
+ ndbout << " scanReleaseCounter="<<TscanPtr.p->scanReleaseCounter
+ << " scanTcWaiting="<<TscanPtr.p->scanTcWaiting
+ << " scanKeyinfoFlag="<<TscanPtr.p->scanKeyinfoFlag
+ << endl;
+ } else{
+ ndbout << "No connected scan record found" << endl;
+ }
+ break;
+ default:
+ break;
+ }
+ ndbrequire(arg != 2308);
+ }
}//Dblqh::execDUMP_STATE_ORD()
--- 1.22/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2006-03-20 11:29:56 +01:00
+++ 1.23/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2006-03-20 14:53:27 +01:00
@@ -211,14 +211,6 @@
LTS_ACTIVE = 1
};
- enum TakeOverState {
- TOS_NOT_DEFINED = 0,
- TOS_IDLE = 1,
- TOS_ACTIVE = 2,
- TOS_COMPLETED = 3,
- TOS_NODE_FAILED = 4
- };
-
enum FailState {
FS_IDLE = 0,
FS_LISTENING = 1,
@@ -933,7 +925,6 @@
struct HostRecord {
HostState hostStatus;
LqhTransState lqhTransStatus;
- TakeOverState takeOverStatus;
bool inPackedList;
UintR noOfPackedWordsLqh;
UintR packedWordsLqh[26];
--- 1.62/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-03-20 11:29:56 +01:00
+++ 1.63/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-03-20 14:53:27 +01:00
@@ -303,7 +303,6 @@
hostptr.i = signal->theData[1];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
hostptr.p->hostStatus = HS_ALIVE;
- hostptr.p->takeOverStatus = TOS_IDLE;
signal->theData[0] = cownref;
c_alive_nodes.set(hostptr.i);
sendSignal(tblockref, GSN_INCL_NODECONF, signal, 1, JBB);
@@ -856,8 +855,6 @@
hostptr.i = i;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- hostptr.p->takeOverStatus = TOS_IDLE;
-
if (NodeBitmask::get(readNodes->inactiveNodes, i)) {
jam();
hostptr.p->hostStatus = HS_DEAD;
@@ -6826,21 +6823,27 @@
const Uint32 tnewMasterId = nodeFail->masterNodeId;
arrGuard(tnoOfNodes, MAX_NDB_NODES);
+ Uint32 i;
int index = 0;
- for (unsigned i = 1; i< MAX_NDB_NODES; i++) {
- if(NodeBitmask::get(nodeFail->theNodes, i)){
+ for (i = 1; i< MAX_NDB_NODES; i++)
+ {
+ if(NodeBitmask::get(nodeFail->theNodes, i))
+ {
cdata[index] = i;
index++;
}//if
}//for
+ cmasterNodeId = tnewMasterId;
+
tcNodeFailptr.i = 0;
ptrAss(tcNodeFailptr, tcFailRecord);
- Uint32 tindex;
- for (tindex = 0; tindex < tnoOfNodes; tindex++) {
+ for (i = 0; i < tnoOfNodes; i++)
+ {
jam();
- hostptr.i = cdata[tindex];
+ hostptr.i = cdata[i];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
+
/*------------------------------------------------------------*/
/* SET STATUS OF THE FAILED NODE TO DEAD SINCE IT HAS */
/* FAILED. */
@@ -6849,30 +6852,15 @@
hostptr.p->m_nf_bits = HostRecord::NF_NODE_FAIL_BITS;
c_alive_nodes.clear(hostptr.i);
- if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
- jam();
- /*------------------------------------------------------------*/
- /* A VERY UNUSUAL SITUATION. THE TAKE OVER WAS COMPLETED*/
- /* EVEN BEFORE WE HEARD ABOUT THE NODE FAILURE REPORT. */
- /* HOWEVER UNUSUAL THIS SITUATION IS POSSIBLE. */
- /*------------------------------------------------------------*/
- /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
- /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
- /* USED THEM IS COMPLETED. */
- /*------------------------------------------------------------*/
- hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
- } else {
- ndbrequire(hostptr.p->takeOverStatus == TOS_IDLE);
- hostptr.p->takeOverStatus = TOS_NODE_FAILED;
- }//if
-
- if (tcNodeFailptr.p->failStatus == FS_LISTENING) {
+ if (tcNodeFailptr.p->failStatus == FS_LISTENING)
+ {
jam();
/*------------------------------------------------------------*/
/* THE CURRENT TAKE OVER CAN BE AFFECTED BY THIS NODE */
/* FAILURE. */
/*------------------------------------------------------------*/
- if (hostptr.p->lqhTransStatus == LTS_ACTIVE) {
+ if (hostptr.p->lqhTransStatus == LTS_ACTIVE)
+ {
jam();
/*------------------------------------------------------------*/
/* WE WERE WAITING FOR THE FAILED NODE IN THE TAKE OVER */
@@ -6884,78 +6872,25 @@
}//if
}//if
- }//for
-
- const bool masterFailed = (cmasterNodeId != tnewMasterId);
- cmasterNodeId = tnewMasterId;
-
- if(getOwnNodeId() == cmasterNodeId && masterFailed){
- /**
- * Master has failed and I'm the new master
- */
- jam();
-
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+ if (getOwnNodeId() != tnewMasterId)
+ {
jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus != HS_ALIVE) {
- jam();
- if (hostptr.p->takeOverStatus == TOS_COMPLETED) {
- jam();
- /*------------------------------------------------------------*/
- /* SEND TAKE OVER CONFIRMATION TO ALL ALIVE NODES IF */
- /* TAKE OVER IS COMPLETED. THIS IS PERFORMED TO ENSURE */
- /* THAT ALL NODES AGREE ON THE IDLE STATE OF THE TAKE */
- /* OVER. THIS MIGHT BE MISSED IN AN ERROR SITUATION IF */
- /* MASTER FAILS AFTER SENDING CONFIRMATION TO NEW */
- /* MASTER BUT FAILING BEFORE SENDING TO ANOTHER NODE */
- /* WHICH WAS NOT MASTER. IF THIS NODE LATER BECOMES */
- /* MASTER IT MIGHT START A NEW TAKE OVER EVEN AFTER THE */
- /* CRASHED NODE HAVE ALREADY RECOVERED. */
- /*------------------------------------------------------------*/
- NodeReceiverGroup rg(DBTC, c_alive_nodes);
- signal->theData[0] = hostptr.i;
- sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
- }//if
- }//if
- }//for
- }
-
- if(getOwnNodeId() == cmasterNodeId){
- jam();
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
+ /**
+ * Only master does takeover currently
+ */
+ hostptr.p->m_nf_bits &= ~HostRecord::NF_TAKEOVER;
+ }
+ else
+ {
jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus != HS_ALIVE) {
- jam();
- if (hostptr.p->takeOverStatus == TOS_NODE_FAILED) {
- jam();
- /*------------------------------------------------------------*/
- /* CONCLUDE ALL ACTIVITIES THE FAILED TC DID CONTROL */
- /* SINCE WE ARE THE MASTER. THIS COULD HAVE BEEN STARTED*/
- /* BY A PREVIOUS MASTER BUT HAVE NOT BEEN CONCLUDED YET.*/
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_ACTIVE;
- signal->theData[0] = hostptr.i;
- sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
- }//if
- }//if
- }//for
- }//if
- for (tindex = 0; tindex < tnoOfNodes; tindex++) {
- jam();
- hostptr.i = cdata[tindex];
- ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- /*------------------------------------------------------------*/
- /* LOOP THROUGH AND ABORT ALL SCANS THAT WHERE */
- /* CONTROLLED BY THIS TC AND ACTIVE IN THE FAILED */
- /* NODE'S LQH */
- /*------------------------------------------------------------*/
+ signal->theData[0] = hostptr.i;
+ sendSignal(cownref, GSN_TAKE_OVERTCREQ, signal, 1, JBB);
+ }
+
checkScanActiveInFailedLqh(signal, 0, hostptr.i);
checkWaitDropTabFailedLqh(signal, hostptr.i, 0); // nodeid, tableid
nodeFailCheckTransactions(signal, 0, hostptr.i);
- }//for
-
+ }
}//Dbtc::execNODE_FAILREP()
void
@@ -7071,47 +7006,17 @@
tfailedNodeId = signal->theData[0];
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- switch (hostptr.p->takeOverStatus) {
- case TOS_IDLE:
- jam();
- /*------------------------------------------------------------*/
- /* THIS MESSAGE ARRIVED EVEN BEFORE THE NODE_FAILREP */
- /* MESSAGE. THIS IS POSSIBLE IN EXTREME SITUATIONS. */
- /* WE SET THE STATE TO TAKE_OVER_COMPLETED AND WAIT */
- /* FOR THE NODE_FAILREP MESSAGE. */
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_COMPLETED;
- break;
- case TOS_NODE_FAILED:
- case TOS_ACTIVE:
- jam();
- /*------------------------------------------------------------*/
- /* WE ARE NOT MASTER AND THE TAKE OVER IS ACTIVE OR WE */
- /* ARE MASTER AND THE TAKE OVER IS ACTIVE. IN BOTH */
- /* WE SET THE STATE TO TAKE_OVER_COMPLETED. */
- /*------------------------------------------------------------*/
- /* RELEASE THE CURRENTLY UNUSED LQH CONNECTIONS. THE */
- /* REMAINING WILL BE RELEASED WHEN THE TRANSACTION THAT */
- /* USED THEM IS COMPLETED. */
- /*------------------------------------------------------------*/
- hostptr.p->takeOverStatus = TOS_COMPLETED;
- checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
- break;
- case TOS_COMPLETED:
- jam();
- /*------------------------------------------------------------*/
- /* WE HAVE ALREADY RECEIVED THE CONF SIGNAL. IT IS MOST */
- /* LIKELY SENT FROM A NEW MASTER WHICH WASN'T SURE IF */
- /* THIS NODE HEARD THE CONF SIGNAL FROM THE OLD MASTER. */
- /* WE SIMPLY IGNORE THE MESSAGE. */
- /*------------------------------------------------------------*/
- /*empty*/;
- break;
- default:
+
+ ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
+ tfailedNodeId, signal->getSendersBlockRef(), reference());
+ if (signal->getSendersBlockRef() != reference())
+ {
jam();
- systemErrorLab(signal);
return;
- }//switch
+ }
+
+
+ checkNodeFailComplete(signal, hostptr.i, HostRecord::NF_TAKEOVER);
}//Dbtc::execTAKE_OVERTCCONF()
void Dbtc::execTAKE_OVERTCREQ(Signal* signal)
@@ -7351,16 +7256,10 @@
/* TO REPORT THE COMPLETION OF THE TAKE OVER TO ALL */
/* NODES THAT ARE ALIVE. */
/*------------------------------------------------------------*/
- for (hostptr.i = 1; hostptr.i < MAX_NDB_NODES; hostptr.i++) {
- jam();
- ptrAss(hostptr, hostRecord);
- if (hostptr.p->hostStatus == HS_ALIVE) {
- jam();
- tblockref = calcTcBlockRef(hostptr.i);
- signal->theData[0] = tcNodeFailptr.p->takeOverNode;
- sendSignal(tblockref, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
- }//if
- }//for
+ NodeReceiverGroup rg(DBTC, c_alive_nodes);
+ signal->theData[0] = tcNodeFailptr.p->takeOverNode;
+ sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+
if (tcNodeFailptr.p->queueIndex > 0) {
jam();
/*------------------------------------------------------------*/
@@ -9937,7 +9836,6 @@
ptrAss(hostptr, hostRecord);
hostptr.p->hostStatus = HS_DEAD;
hostptr.p->inPackedList = false;
- hostptr.p->takeOverStatus = TOS_NOT_DEFINED;
hostptr.p->lqhTransStatus = LTS_IDLE;
hostptr.p->noOfWordsTCKEYCONF = 0;
hostptr.p->noOfWordsTCINDXCONF = 0;
| Thread |
|---|
| • bk commit into 4.1 tree (jonas:1.2474) BUG#18352 | jonas | 20 Mar |