Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-11-23 10:46:48+01:00, tomas@stripped +4 -0
bug#28445 - Heartbeat does not start until first API_REGREQ is recevied
- move api failure handling into own method
- add START_ORD so that hb checking can start really early
storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp@stripped, 2007-11-23 10:46:46+01:00,
tomas@stripped +29 -28
- make sure qmgr is "fully" informed about connections so that it can handle hb
correctly
- dont allow API/mysqld node to reconnect if we have not started yet (sp 8)
storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp@stripped, 2007-11-23 10:46:46+01:00,
tomas@stripped +3 -0
- move api failure handling into own method
- add START_ORD so that hb checking can start really early
storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp@stripped, 2007-11-23 10:46:46+01:00,
tomas@stripped +22 -4
- move api failure handling into own method
- add START_ORD so that hb checking can start really early
- Init datastructures in constructor
- as CONNECT_REP may occur before start phases
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2007-11-23 10:46:46+01:00,
tomas@stripped +189 -173
- Init datastructures in constructor
- as CONNECT_REP may occur before start phases
- start hb handling directly on connect rep (instead of first hb)
diff -Nrup a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
--- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 2007-09-28 10:10:53 +02:00
+++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 2007-11-23 10:46:46 +01:00
@@ -421,9 +421,10 @@ void Cmvmi::execCLOSE_COMREQ(Signal* sig
// Uint32 noOfNodes = closeCom->noOfNodes;
jamEntry();
- for (unsigned i = 0; i < MAX_NODES; i++){
- if(NodeBitmask::get(closeCom->theNodes, i)){
-
+ for (unsigned i = 0; i < MAX_NODES; i++)
+ {
+ if(NodeBitmask::get(closeCom->theNodes, i))
+ {
jam();
//-----------------------------------------------------
@@ -437,7 +438,9 @@ void Cmvmi::execCLOSE_COMREQ(Signal* sig
globalTransporterRegistry.do_disconnect(i);
}
}
- if (failNo != 0) {
+
+ if (failNo != 0)
+ {
jam();
signal->theData[0] = userRef;
signal->theData[1] = failNo;
@@ -456,13 +459,21 @@ void Cmvmi::execOPEN_COMREQ(Signal* sign
jamEntry();
const Uint32 len = signal->getLength();
- if(len == 2){
-
+ if(len == 2)
+ {
#ifdef ERROR_INSERT
if (! ((ERROR_INSERTED(9000) || ERROR_INSERTED(9002))
&& c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
+ if (globalData.theStartLevel != NodeState::SL_STARTED &&
+ (getNodeInfo(tStartingNode).m_type != NodeInfo::DB &&
+ getNodeInfo(tStartingNode).m_type != NodeInfo::MGM))
+ {
+ jam();
+ goto done;
+ }
+
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
@@ -475,9 +486,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* sign
//-----------------------------------------------------
}
} else {
- for(unsigned int i = 1; i < MAX_NODES; i++ ) {
+ for(unsigned int i = 1; i < MAX_NODES; i++ )
+ {
jam();
- if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
+ if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2)
+ {
jam();
#ifdef ERROR_INSERT
@@ -496,6 +509,7 @@ void Cmvmi::execOPEN_COMREQ(Signal* sign
}
}
+done:
if (userRef != 0) {
jam();
signal->theData[0] = tStartingNode;
@@ -536,24 +550,10 @@ void Cmvmi::execDISCONNECT_REP(Signal *s
setNodeInfo(hostId).m_connectCount++;
const NodeInfo::NodeType type = getNodeInfo(hostId).getType();
ndbrequire(type != NodeInfo::INVALID);
-
- if(type == NodeInfo::DB || globalData.theStartLevel == NodeState::SL_STARTED){
- jam();
- DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
- rep->nodeId = hostId;
- rep->err = errNo;
- sendSignal(QMGR_REF, GSN_DISCONNECT_REP, signal,
- DisconnectRep::SignalLength, JBA);
- } else if((globalData.theStartLevel == NodeState::SL_CMVMI ||
- globalData.theStartLevel == NodeState::SL_STARTING)
- && type == NodeInfo::MGM) {
- /**
- * Someone disconnected during cmvmi period
- */
- jam();
- globalTransporterRegistry.do_connect(hostId);
- }
+ sendSignal(QMGR_REF, GSN_DISCONNECT_REP, signal,
+ DisconnectRep::SignalLength, JBA);
+
cancelSubscription(hostId);
signal->theData[0] = NDB_LE_Disconnected;
@@ -587,6 +587,8 @@ void Cmvmi::execCONNECT_REP(Signal *sign
*/
if(type == NodeInfo::MGM){
jam();
+ signal->theData[0] = hostId;
+ sendSignal(QMGR_REF, GSN_CONNECT_REP, signal, 1, JBA);
} else {
/**
* Dont allow api nodes to connect
@@ -802,6 +804,8 @@ Cmvmi::execSTART_ORD(Signal* signal) {
}
}
}
+
+ EXECUTE_DIRECT(QMGR, GSN_START_ORD, signal, 1);
return ;
}
@@ -829,9 +833,6 @@ Cmvmi::execSTART_ORD(Signal* signal) {
*
* Do Restart
*/
-
- globalScheduler.clear();
- globalTimeQueue.clear();
// Disconnect all nodes as part of the system restart.
// We need to ensure that we are starting up
diff -Nrup a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-02-26 08:11:56 +01:00
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-11-23 10:46:46 +01:00
@@ -265,6 +265,8 @@ private:
void execALLOC_NODEID_CONF(Signal *);
void execALLOC_NODEID_REF(Signal *);
void completeAllocNodeIdReq(Signal *);
+
+ void execSTART_ORD(Signal*);
// Arbitration signals
void execARBIT_CFG(Signal* signal);
@@ -281,6 +283,7 @@ private:
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
Uint32 check_startup(Signal* signal);
+ void api_failed(Signal* signal, Uint32 aFailedNode);
void node_failed(Signal* signal, Uint16 aFailedNode);
void checkStartInterface(Signal* signal);
void failReport(Signal* signal,
diff -Nrup a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2007-02-14 06:35:30 +01:00
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2007-11-23 10:46:46 +01:00
@@ -31,10 +31,6 @@ void Qmgr::initData()
cnoCommitFailedNodes = 0;
c_maxDynamicId = 0;
c_clusterNodes.clear();
-
- Uint32 hbDBAPI = 500;
- setHbApiDelay(hbDBAPI);
- c_connectedNodes.set(getOwnNodeId());
c_stopReq.senderRef = 0;
/**
@@ -43,6 +39,27 @@ void Qmgr::initData()
ndbrequire((Uint32)NodeInfo::DB == 0);
ndbrequire((Uint32)NodeInfo::API == 1);
ndbrequire((Uint32)NodeInfo::MGM == 2);
+
+ NodeRecPtr nodePtr;
+ nodePtr.i = getOwnNodeId();
+ ptrAss(nodePtr, nodeRec);
+ nodePtr.p->blockRef = reference();
+
+ c_connectedNodes.set(getOwnNodeId());
+ setNodeInfo(getOwnNodeId()).m_version = NDB_VERSION;
+
+
+ /**
+ * Timeouts
+ */
+ const ndb_mgm_configuration_iterator * p =
+ m_ctx.m_config.getOwnConfigIterator();
+ ndbrequire(p != 0);
+
+ Uint32 hbDBAPI = 1500;
+ ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
+
+ setHbApiDelay(hbDBAPI);
}//Qmgr::initData()
void Qmgr::initRecords()
@@ -113,6 +130,7 @@ Qmgr::Qmgr(Block_context& ctx)
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
addRecSignal(GSN_NODE_VERSION_REP, &Qmgr::execNODE_VERSION_REP);
+ addRecSignal(GSN_START_ORD, &Qmgr::execSTART_ORD);
initData();
}//Qmgr::Qmgr()
diff -Nrup a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-07-11 14:36:40 +02:00
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-11-23 10:46:46 +01:00
@@ -238,6 +238,38 @@ Qmgr::execREAD_CONFIG_REQ(Signal* signal
ReadConfigConf::SignalLength, JBB);
}
+void
+Qmgr::execSTART_ORD(Signal* signal)
+{
+ /**
+ * Start timer handling
+ */
+ signal->theData[0] = ZTIMER_HANDLING;
+ sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 1, JBB);
+
+ NodeRecPtr nodePtr;
+ for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
+ {
+ ptrAss(nodePtr, nodeRec);
+ nodePtr.p->ndynamicId = 0;
+ if(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
+ {
+ nodePtr.p->phase = ZINIT;
+ c_definedNodes.set(nodePtr.i);
+ } else {
+ nodePtr.p->phase = ZAPI_INACTIVE;
+ }
+
+ setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
+ nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
+ nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
+ nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
+ nodePtr.p->failState = NORMAL;
+ nodePtr.p->rcv[0] = 0;
+ nodePtr.p->rcv[1] = 0;
+ }//for
+}
+
/*
4.2 ADD NODE MODULE*/
/*##########################################################################*/
@@ -298,8 +330,6 @@ void Qmgr::startphase1(Signal* signal)
nodePtr.i = getOwnNodeId();
ptrAss(nodePtr, nodeRec);
nodePtr.p->phase = ZSTARTING;
- nodePtr.p->blockRef = reference();
- c_connectedNodes.set(nodePtr.i);
signal->theData[0] = reference();
sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB);
@@ -371,11 +401,14 @@ void Qmgr::execCONNECT_REP(Signal* signa
case ZFAIL_CLOSING:
jam();
return;
- case ZINIT:
- ndbrequire(false);
case ZAPI_ACTIVE:
case ZAPI_INACTIVE:
return;
+ case ZINIT:
+ ndbrequire(getNodeInfo(nodeId).m_type == NodeInfo::MGM);
+ break;
+ default:
+ ndbrequire(false);
}
if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
@@ -1212,12 +1245,6 @@ void Qmgr::execCM_REGREF(Signal* signal)
{
jam();
electionWon(signal);
-
- /**
- * Start timer handling
- */
- signal->theData[0] = ZTIMER_HANDLING;
- sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB);
}
return;
@@ -1855,12 +1882,6 @@ Qmgr::joinedCluster(Signal* signal, Node
sendSttorryLab(signal);
- /**
- * Start timer handling
- */
- signal->theData[0] = ZTIMER_HANDLING;
- sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB);
-
sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
}
@@ -2094,25 +2115,6 @@ void Qmgr::findNeighbours(Signal* signal
/*---------------------------------------------------------------------------*/
void Qmgr::initData(Signal* signal)
{
- NodeRecPtr nodePtr;
- for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
- ptrAss(nodePtr, nodeRec);
- nodePtr.p->ndynamicId = 0;
- if(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB){
- nodePtr.p->phase = ZINIT;
- c_definedNodes.set(nodePtr.i);
- } else {
- nodePtr.p->phase = ZAPI_INACTIVE;
- }
-
- setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
- nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
- nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
- nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
- nodePtr.p->failState = NORMAL;
- nodePtr.p->rcv[0] = 0;
- nodePtr.p->rcv[1] = 0;
- }//for
cfailureNr = 1;
ccommitFailureNr = 1;
cprepareFailureNr = 1;
@@ -2146,13 +2148,11 @@ void Qmgr::initData(Signal* signal)
ndbrequire(p != 0);
Uint32 hbDBDB = 1500;
- Uint32 hbDBAPI = 1500;
Uint32 arbitTimeout = 1000;
c_restartPartialTimeout = 30000;
c_restartPartionedTimeout = 60000;
c_restartFailureTimeout = ~0;
ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
- ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
&c_restartPartialTimeout);
@@ -2177,7 +2177,6 @@ void Qmgr::initData(Signal* signal)
}
setHbDelay(hbDBDB);
- setHbApiDelay(hbDBAPI);
setArbitTimeout(arbitTimeout);
arbitRec.state = ARBIT_NULL; // start state for all nodes
@@ -2204,7 +2203,6 @@ void Qmgr::initData(Signal* signal)
execARBIT_CFG(signal);
}
- setNodeInfo(getOwnNodeId()).m_version = NDB_VERSION;
}//Qmgr::initData()
@@ -2237,20 +2235,22 @@ void Qmgr::timerHandlingLab(Signal* sign
hb_check_timer.reset();
}
}
-
+
if (interface_check_timer.check(TcurrentTime)) {
jam();
interface_check_timer.reset();
checkStartInterface(signal);
}
+ if (hb_api_timer.check(TcurrentTime))
+ {
+ jam();
+ hb_api_timer.reset();
+ apiHbHandlingLab(signal);
+ }
+
if (cactivateApiCheck != 0) {
jam();
- if (hb_api_timer.check(TcurrentTime)) {
- jam();
- hb_api_timer.reset();
- apiHbHandlingLab(signal);
- }//if
if (clatestTransactionCheck == 0) {
//-------------------------------------------------------------
// Initialise the Transaction check timer.
@@ -2367,18 +2367,21 @@ void Qmgr::apiHbHandlingLab(Signal* sign
if(type == NodeInfo::INVALID)
continue;
- if (TnodePtr.p->phase == ZAPI_ACTIVE){
+ if (c_connectedNodes.get(nodeId))
+ {
jam();
setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
- if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2){
+ if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2)
+ {
signal->theData[0] = NDB_LE_MissedHeartbeat;
signal->theData[1] = nodeId;
signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
}
- if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) {
+ if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4)
+ {
jam();
/*------------------------------------------------------------------*/
/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
@@ -2390,8 +2393,8 @@ void Qmgr::apiHbHandlingLab(Signal* sign
signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
signal->theData[1] = nodeId;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
- node_failed(signal, nodeId);
+
+ api_failed(signal, nodeId);
}//if
}//if
}//for
@@ -2480,26 +2483,6 @@ void Qmgr::sendApiFailReq(Signal* signal
sendSignal(DBTC_REF, GSN_API_FAILREQ, signal, 2, JBA);
sendSignal(DBDICT_REF, GSN_API_FAILREQ, signal, 2, JBA);
sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
-
- /**-------------------------------------------------------------------------
- * THE OTHER NODE WAS AN API NODE. THE COMMUNICATION LINK IS ALREADY
- * BROKEN AND THUS NO ACTION IS NEEDED TO BREAK THE CONNECTION.
- * WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW
- * SECONDS.
- *-------------------------------------------------------------------------*/
- setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
- setNodeInfo(failedNodePtr.i).m_version = 0;
- recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
-
- CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
-
- closeCom->xxxBlockRef = reference();
- closeCom->failNo = 0;
- closeCom->noOfNodes = 1;
- NodeBitmask::clear(closeCom->theNodes);
- NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
- sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
- CloseComReqConf::SignalLength, JBA);
}//Qmgr::sendApiFailReq()
void Qmgr::execAPI_FAILREQ(Signal* signal)
@@ -2512,20 +2495,7 @@ void Qmgr::execAPI_FAILREQ(Signal* signa
ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
- // ignore if api not active
- if (failedNodePtr.p->phase != ZAPI_ACTIVE)
- {
- jam();
- // But send to SUMA anyway...
- sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
- return;
- }
-
- signal->theData[0] = NDB_LE_Disconnected;
- signal->theData[1] = failedNodePtr.i;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
- node_failed(signal, failedNodePtr.i);
+ api_failed(signal, signal->theData[0]);
}
void Qmgr::execAPI_FAILCONF(Signal* signal)
@@ -2649,6 +2619,13 @@ void Qmgr::execDISCONNECT_REP(Signal* si
ndbrequire(false);
}
+ if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
+ {
+ jam();
+ api_failed(signal, nodeId);
+ return;
+ }
+
switch(nodePtr.p->phase){
case ZRUNNING:
jam();
@@ -2685,66 +2662,109 @@ void Qmgr::node_failed(Signal* signal, U
failedNodePtr.i = aFailedNode;
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
- if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB){
+ ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
+
+ /**---------------------------------------------------------------------
+ * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
+ * FAILURE WAS DISCOVERED.
+ *---------------------------------------------------------------------*/
+ switch(failedNodePtr.p->phase){
+ case ZRUNNING:
jam();
- /**---------------------------------------------------------------------
- * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
- * FAILURE WAS DISCOVERED.
- *---------------------------------------------------------------------*/
- switch(failedNodePtr.p->phase){
- case ZRUNNING:
- jam();
- failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
- return;
- case ZFAIL_CLOSING:
- jam();
- return;
- case ZSTARTING:
- c_start.reset();
- // Fall-through
- default:
- jam();
- /*---------------------------------------------------------------------*/
- // The other node is still not in the cluster but disconnected.
- // We must restart communication in three seconds.
- /*---------------------------------------------------------------------*/
- failedNodePtr.p->failState = NORMAL;
- failedNodePtr.p->phase = ZFAIL_CLOSING;
- setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
-
- CloseComReqConf * const closeCom =
- (CloseComReqConf *)&signal->theData[0];
-
- closeCom->xxxBlockRef = reference();
- closeCom->failNo = 0;
- closeCom->noOfNodes = 1;
- NodeBitmask::clear(closeCom->theNodes);
- NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
- sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
- CloseComReqConf::SignalLength, JBA);
- }//if
+ failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
return;
- }
-
- /**
- * API code
- */
- jam();
- if (failedNodePtr.p->phase != ZFAIL_CLOSING){
+ case ZFAIL_CLOSING:
+ jam();
+ return;
+ case ZSTARTING:
+ c_start.reset();
+ // Fall-through
+ default:
jam();
- //-------------------------------------------------------------------------
- // The API was active and has now failed. We need to initiate API failure
- // handling. If the API had already failed then we can ignore this
- // discovery.
- //-------------------------------------------------------------------------
+ /*---------------------------------------------------------------------*/
+ // The other node is still not in the cluster but disconnected.
+ // We must restart communication in three seconds.
+ /*---------------------------------------------------------------------*/
+ failedNodePtr.p->failState = NORMAL;
failedNodePtr.p->phase = ZFAIL_CLOSING;
-
- sendApiFailReq(signal, aFailedNode);
- arbitRec.code = ArbitCode::ApiFail;
- handleArbitApiFail(signal, aFailedNode);
+ setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
+
+ CloseComReqConf * const closeCom =
+ (CloseComReqConf *)&signal->theData[0];
+
+ closeCom->xxxBlockRef = reference();
+ closeCom->failNo = 0;
+ closeCom->noOfNodes = 1;
+ NodeBitmask::clear(closeCom->theNodes);
+ NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
+ sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
+ CloseComReqConf::SignalLength, JBA);
}//if
return;
-}//Qmgr::node_failed()
+}
+
+void
+Qmgr::api_failed(Signal* signal, Uint32 nodeId)
+{
+ NodeRecPtr failedNodePtr;
+ /**------------------------------------------------------------------------
+ * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
+ * DUE TO THIS.
+ *-----------------------------------------------------------------------*/
+ failedNodePtr.i = nodeId;
+ ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
+
+ if (failedNodePtr.p->phase == ZFAIL_CLOSING)
+ {
+ /**
+ * Failure handling already in progress
+ */
+ jam();
+ return;
+ }
+
+ if (failedNodePtr.p->phase == ZAPI_ACTIVE)
+ {
+ jam();
+ sendApiFailReq(signal, nodeId);
+ arbitRec.code = ArbitCode::ApiFail;
+ handleArbitApiFail(signal, nodeId);
+ }
+ else
+ {
+ /**
+ * Always inform SUMA
+ */
+ jam();
+ signal->theData[0] = nodeId;
+ signal->theData[1] = QMGR_REF;
+ sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
+ failedNodePtr.p->failState = NORMAL;
+ }
+
+ failedNodePtr.p->phase = ZFAIL_CLOSING;
+ setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
+ setNodeInfo(failedNodePtr.i).m_version = 0;
+ recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
+
+ CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
+ closeCom->xxxBlockRef = reference();
+ closeCom->failNo = 0;
+ closeCom->noOfNodes = 1;
+ NodeBitmask::clear(closeCom->theNodes);
+ NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
+ sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
+ CloseComReqConf::SignalLength, JBA);
+
+ if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
+ {
+ /**
+ * Allow MGM do reconnect "directly"
+ */
+ jam();
+ setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3;
+ }
+}
/**--------------------------------------------------------------------------
* AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
@@ -4963,43 +4983,39 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
c_start.m_president_candidate_gci);
infoEvent("ctoStatus = %d\n", ctoStatus);
for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
- if(getNodeInfo(i).getType() == NodeInfo::DB){
- NodeRecPtr nodePtr;
- nodePtr.i = i;
- ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
- char buf[100];
- switch(nodePtr.p->phase){
- case ZINIT:
- sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
- break;
- case ZSTARTING:
- sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
- break;
- case ZRUNNING:
- sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
- break;
- case ZPREPARE_FAIL:
- sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
- break;
- case ZFAIL_CLOSING:
- sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
- break;
- case ZAPI_INACTIVE:
- sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
- break;
- case ZAPI_ACTIVE:
- sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
- break;
- default:
- sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
- break;
- }
- infoEvent(buf);
+ NodeRecPtr nodePtr;
+ nodePtr.i = i;
+ ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
+ char buf[100];
+ switch(nodePtr.p->phase){
+ case ZINIT:
+ sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
+ break;
+ case ZSTARTING:
+ sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
+ break;
+ case ZRUNNING:
+ sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
+ break;
+ case ZPREPARE_FAIL:
+ sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
+ break;
+ case ZFAIL_CLOSING:
+ sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
+ break;
+ case ZAPI_INACTIVE:
+ sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
+ break;
+ case ZAPI_ACTIVE:
+ sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
+ break;
+ default:
+ sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
+ break;
}
+ infoEvent(buf);
}
- default:
- ;
- }//switch
+ }
#ifdef ERROR_INSERT
if (signal->theData[0] == 935 && signal->getLength() == 2)
| Thread |
|---|
| • bk commit into 5.1 tree (tomas:1.2584) BUG#28445 | tomas | 23 Nov |