#At file:///home/jonas/src/telco-6.3/
2860 Jonas Oreland 2009-02-18 [merge]
merge 62 to 63
modified:
storage/ndb/include/kernel/signaldata/AllocNodeId.hpp
storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
storage/ndb/src/mgmsrv/MgmtSrvr.cpp
storage/ndb/src/mgmsrv/MgmtSrvr.hpp
storage/ndb/src/mgmsrv/Services.cpp
=== modified file 'storage/ndb/include/kernel/signaldata/AllocNodeId.hpp'
--- a/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp 2006-12-27 01:23:51 +0000
+++ b/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp 2009-02-18 14:29:58 +0000
@@ -24,21 +24,28 @@
*/
class AllocNodeIdReq {
public:
- STATIC_CONST( SignalLength = 4 );
+ STATIC_CONST( SignalLength = 5 );
+ STATIC_CONST( SignalLengthQMGR = 7 );
Uint32 senderRef;
Uint32 senderData;
Uint32 nodeId;
Uint32 nodeType;
+ Uint32 timeout;
+
+ Uint32 secret_lo;
+ Uint32 secret_hi;
};
class AllocNodeIdConf {
public:
- STATIC_CONST( SignalLength = 3 );
+ STATIC_CONST( SignalLength = 5 );
Uint32 senderRef;
Uint32 senderData;
Uint32 nodeId;
+ Uint32 secret_lo;
+ Uint32 secret_hi;
};
class AllocNodeIdRef {
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2009-02-18 14:29:58 +0000
@@ -156,6 +156,8 @@ public:
QmgrState sendPresToStatus;
FailState failState;
BlockReference blockRef;
+ Uint64 m_secret;
+ Uint64 m_alloc_timeout;
NodeRec() { }
}; /* p2c: size = 52 bytes */
@@ -308,7 +310,7 @@ private:
void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal);
- void apiHbHandlingLab(Signal* signal);
+ void apiHbHandlingLab(Signal* signal, Uint64 now);
void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal);
void sendCmRegrefLab(Signal* signal, BlockReference ref,
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2009-02-18 14:29:58 +0000
@@ -27,6 +27,10 @@ void Qmgr::initData()
// Records with constant sizes
nodeRec = new NodeRec[MAX_NODES];
+ for (Uint32 i = 0; i<MAX_NODES; i++)
+ {
+ nodeRec[i].m_secret = 0;
+ }
cnoCommitFailedNodes = 0;
c_maxDynamicId = 0;
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-01-29 10:59:51 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-02-18 14:33:50 +0000
@@ -436,9 +436,14 @@ void Qmgr::execCONNECT_REP(Signal* signa
infoEvent("Discarding CONNECT_REP(%d)", nodeId);
return;
}
-
+
c_connectedNodes.set(nodeId);
+
NodeRecPtr nodePtr;
+ nodePtr.i = nodeId;
+ ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
+ nodePtr.p->m_secret = 0;
+
nodePtr.i = getOwnNodeId();
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
NodeInfo nodeInfo = getNodeInfo(nodeId);
@@ -2367,7 +2372,7 @@ void Qmgr::timerHandlingLab(Signal* sign
{
jam();
hb_api_timer.reset();
- apiHbHandlingLab(signal);
+ apiHbHandlingLab(signal, TcurrentTime);
}
if (cactivateApiCheck != 0) {
@@ -2473,7 +2478,7 @@ void Qmgr::checkHeartbeat(Signal* signal
}//if
}//Qmgr::checkHeartbeat()
-void Qmgr::apiHbHandlingLab(Signal* signal)
+void Qmgr::apiHbHandlingLab(Signal* signal, Uint64 now)
{
NodeRecPtr TnodePtr;
@@ -2518,6 +2523,14 @@ void Qmgr::apiHbHandlingLab(Signal* sign
api_failed(signal, nodeId);
}//if
}//if
+ else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
+ TnodePtr.p->m_secret != 0 && now > TnodePtr.p->m_alloc_timeout)
+ {
+ jam();
+ TnodePtr.p->m_secret = 0;
+ warningEvent("Releasing node id allocation for node %u",
+ TnodePtr.i);
+ }
}//for
return;
}//Qmgr::apiHbHandlingLab()
@@ -2552,6 +2565,7 @@ void Qmgr::checkStartInterface(Signal* s
* IS COMPLETE.
*-------------------------------------------------------------------*/
nodePtr.p->failState = NORMAL;
+ nodePtr.p->m_secret = 0;
Uint32 type = getNodeInfo(nodePtr.i).m_type;
switch(type){
case NodeInfo::DB:
@@ -2848,6 +2862,7 @@ void Qmgr::node_failed(Signal* signal, U
*-----------------------------------------------------------------------*/
failedNodePtr.i = aFailedNode;
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
+ failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
@@ -2918,7 +2933,8 @@ Qmgr::api_failed(Signal* signal, Uint32
*-----------------------------------------------------------------------*/
failedNodePtr.i = nodeId;
ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
-
+ failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
+
if (failedNodePtr.p->phase == ZFAIL_CLOSING)
{
/**
@@ -5337,22 +5353,37 @@ void
Qmgr::execALLOC_NODEID_REQ(Signal * signal)
{
jamEntry();
- const AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtr();
- Uint32 senderRef = req->senderRef;
- Uint32 nodeId = req->nodeId;
- Uint32 nodeType = req->nodeType;
+ AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
Uint32 error = 0;
- if (refToBlock(senderRef) != QMGR) // request from management server
+ NodeRecPtr nodePtr;
+ nodePtr.i = req.nodeId;
+ ptrAss(nodePtr, nodeRec);
+
+ if (refToBlock(req.senderRef) != QMGR) // request from management server
{
/* master */
if (getOwnNodeId() != cpresident)
+ {
+ jam();
error = AllocNodeIdRef::NotMaster;
+ }
else if (!opAllocNodeIdReq.m_tracker.done())
+ {
+ jam();
error = AllocNodeIdRef::Busy;
- else if (c_connectedNodes.get(nodeId))
+ }
+ else if (c_connectedNodes.get(req.nodeId))
+ {
+ jam();
error = AllocNodeIdRef::NodeConnected;
+ }
+ else if (nodePtr.p->m_secret != 0)
+ {
+ jam();
+ error = AllocNodeIdRef::NodeReserved;
+ }
if (error)
{
@@ -5361,60 +5392,99 @@ Qmgr::execALLOC_NODEID_REQ(Signal * sign
ref->senderRef = reference();
ref->errorCode = error;
ref->masterRef = numberToRef(QMGR, cpresident);
- sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
+ ref->senderData = req.senderData;
+ ref->nodeId = req.nodeId;
+ sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
AllocNodeIdRef::SignalLength, JBB);
return;
}
- if (ERROR_INSERTED(934) && nodeId != getOwnNodeId())
+ if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
{
CRASH_INSERTION(934);
}
+
+ /**
+ * generate secret
+ */
+ Uint64 now = NdbTick_CurrentMillisecond();
+ Uint32 secret_hi = now >> 24;
+ Uint32 secret_lo = Uint32(now << 8) + getOwnNodeId();
+ req.secret_hi = secret_hi;
+ req.secret_lo = secret_lo;
+
+ if (req.timeout > 60000)
+ req.timeout = 60000;
+
+ nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
+ nodePtr.p->m_alloc_timeout = now + req.timeout;
- opAllocNodeIdReq.m_req = *req;
+ opAllocNodeIdReq.m_req = req;
opAllocNodeIdReq.m_error = 0;
- opAllocNodeIdReq.m_connectCount = getNodeInfo(refToNode(senderRef)).m_connectCount;
+ opAllocNodeIdReq.m_connectCount =
+ getNodeInfo(refToNode(req.senderRef)).m_connectCount;
jam();
- AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtrSend();
- req->senderRef = reference();
+ AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
+ * req2 = req;
+ req2->senderRef = reference();
NodeReceiverGroup rg(QMGR, c_clusterNodes);
RequestTracker & p = opAllocNodeIdReq.m_tracker;
p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
- AllocNodeIdReq::SignalLength, JBB);
+ AllocNodeIdReq::SignalLengthQMGR, JBB);
return;
}
/* participant */
-
- if (c_connectedNodes.get(nodeId))
+ if (c_connectedNodes.get(req.nodeId))
+ {
+ jam();
error = AllocNodeIdRef::NodeConnected;
- else
+ }
+ else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
{
- NodeRecPtr nodePtr;
- nodePtr.i = nodeId;
- ptrAss(nodePtr, nodeRec);
- if (nodeType != getNodeInfo(nodeId).m_type)
- error = AllocNodeIdRef::NodeTypeMismatch;
- else if (nodePtr.p->failState != NORMAL)
- error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
+ jam();
+ error = AllocNodeIdRef::NodeTypeMismatch;
+ }
+ else if (nodePtr.p->failState != NORMAL)
+ {
+ jam();
+ error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
+ }
+#if 0
+ /**
+ * For now only make "time/secret" based reservation on master
+ * as we otherwise also need to clear it on failure + handle
+ * master failure
+ */
+ else if (nodePtr.p->m_secret != 0)
+ {
+ jam();
+ error = AllocNodeIdRef::NodeReserved;
}
+#endif
if (error)
{
+ jam();
AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
ref->senderRef = reference();
ref->errorCode = error;
- sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
+ ref->senderData = req.senderData;
+ ref->nodeId = req.nodeId;
+ ref->masterRef = numberToRef(QMGR, cpresident);
+ sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
AllocNodeIdRef::SignalLength, JBB);
return;
}
AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
conf->senderRef = reference();
- sendSignal(senderRef, GSN_ALLOC_NODEID_CONF, signal,
+ conf->secret_hi = req.secret_hi;
+ conf->secret_lo = req.secret_lo;
+ sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
AllocNodeIdConf::SignalLength, JBB);
}
@@ -5427,6 +5497,22 @@ Qmgr::execALLOC_NODEID_CONF(Signal * sig
const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
refToNode(conf->senderRef));
+
+ if (signal->getLength() >= AllocNodeIdConf::SignalLength)
+ {
+ jam();
+ if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
+ opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
+ {
+ jam();
+ if (opAllocNodeIdReq.m_error == 0)
+ {
+ jam();
+ opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
+ }
+ }
+ }
+
completeAllocNodeIdReq(signal);
}
@@ -5440,15 +5526,20 @@ Qmgr::execALLOC_NODEID_REF(Signal * sign
const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
{
+ jam();
opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
refToNode(ref->senderRef));
}
else
{
+ jam();
opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
refToNode(ref->senderRef));
if (opAllocNodeIdReq.m_error == 0)
+ {
+ jam();
opAllocNodeIdReq.m_error = ref->errorCode;
+ }
}
completeAllocNodeIdReq(signal);
}
@@ -5475,6 +5566,17 @@ Qmgr::completeAllocNodeIdReq(Signal *sig
if (opAllocNodeIdReq.m_tracker.hasRef())
{
jam();
+
+ {
+ /**
+ * Clear reservation
+ */
+ NodeRecPtr nodePtr;
+ nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
+ ptrAss(nodePtr, nodeRec);
+ nodePtr.p->m_secret = 0;
+ }
+
AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = opAllocNodeIdReq.m_req.senderData;
@@ -5486,12 +5588,15 @@ Qmgr::completeAllocNodeIdReq(Signal *sig
AllocNodeIdRef::SignalLength, JBB);
return;
}
+
jam();
+
AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
conf->senderRef = reference();
conf->senderData = opAllocNodeIdReq.m_req.senderData;
conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
- ndbassert(AllocNodeIdConf::SignalLength == 3);
+ conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
+ conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
AllocNodeIdConf::SignalLength, JBB);
}
=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.cpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2009-01-08 15:25:14 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2009-02-18 14:33:50 +0000
@@ -496,7 +496,7 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_
int error_code;
if (!alloc_node_id(&tmp, NDB_MGM_NODE_TYPE_MGM,
- 0, 0, error_code, error_string)){
+ 0, 0, error_code, error_string, 20)){
ndbout << "Unable to obtain requested nodeid: "
<< error_string.c_str() << endl;
require(false);
@@ -2144,7 +2144,9 @@ MgmtSrvr::get_connected_nodes(NodeBitmas
}
int
-MgmtSrvr::alloc_node_id_req(NodeId free_node_id, enum ndb_mgm_node_type type)
+MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
+ enum ndb_mgm_node_type type,
+ Uint32 timeout_ms)
{
SignalSender ss(theFacade);
ss.lock(); // lock will be released on exit
@@ -2158,6 +2160,7 @@ MgmtSrvr::alloc_node_id_req(NodeId free_
req->senderData = 19;
req->nodeId = free_node_id;
req->nodeType = type;
+ req->timeout = timeout_ms;
int do_send = 1;
NodeId nodeId = 0;
@@ -2241,141 +2244,165 @@ MgmtSrvr::alloc_node_id_req(NodeId free_
return 0;
}
-bool
-MgmtSrvr::alloc_node_id(NodeId * nodeId,
- enum ndb_mgm_node_type type,
- struct sockaddr *client_addr,
- SOCKET_SIZE_TYPE *client_addr_len,
- int &error_code, BaseString &error_string,
- int log_event)
+int
+MgmtSrvr::match_hostname(const struct sockaddr *clnt_addr,
+ const char *config_hostname) const
{
- DBUG_ENTER("MgmtSrvr::alloc_node_id");
- DBUG_PRINT("enter", ("nodeid: %d type: %d client_addr: 0x%ld",
- *nodeId, type, (long) client_addr));
- if (g_no_nodeid_checks) {
- if (*nodeId == 0) {
- error_string.appfmt("no-nodeid-checks set in management server.\n"
- "node id must be set explicitly in connectstring");
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- DBUG_RETURN(false);
+ struct in_addr config_addr= {0};
+ if (clnt_addr)
+ {
+ const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
+
+ if (Ndb_getInAddr(&config_addr, config_hostname) != 0
+ || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
+ {
+ struct in_addr tmp_addr;
+ if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
+ || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
+ {
+ // not localhost
+#if 0
+ ndbout << "MgmtSrvr::getFreeNodeId compare failed for \""
+ << config_hostname
+ << "\" id=" << tmp << endl;
+#endif
+ return -1;
+ }
+
+ // connecting through localhost
+ // check if config_hostname is local
+ if (!SocketServer::tryBind(0, config_hostname))
+ return -1;
}
- DBUG_RETURN(true);
}
- Guard g(m_node_id_mutex);
- int no_mgm= 0;
- NodeBitmask connected_nodes(m_reserved_nodes);
- get_connected_nodes(connected_nodes);
+ else
{
- for(Uint32 i = 0; i < MAX_NODES; i++)
- if (getNodeType(i) == NDB_MGM_NODE_TYPE_MGM)
- no_mgm++;
+ if (!SocketServer::tryBind(0, config_hostname))
+ return -1;
}
- bool found_matching_id= false;
- bool found_matching_type= false;
- bool found_free_node= false;
- unsigned id_found= 0;
- const char *config_hostname= 0;
- struct in_addr config_addr= {0};
- int r_config_addr= -1;
- unsigned type_c= 0;
+ return 0;
+}
+
+int
+MgmtSrvr::find_node_type(unsigned node_id, enum ndb_mgm_node_type type,
+ const struct sockaddr *client_addr,
+ NodeBitmask &nodes,
+ NodeBitmask &exact_nodes,
+ Vector<struct nodeid_and_host> &nodes_info,
+ int &error_code, BaseString &error_string)
+{
+ const char *found_config_hostname= 0;
+ unsigned type_c= (unsigned)type;
+
+ Guard g(m_configMutex);
- if(NdbMutex_Lock(m_configMutex))
- {
- // should not happen
- error_string.appfmt("unable to lock configuration mutex");
- error_code = NDB_MGM_ALLOCID_ERROR;
- DBUG_RETURN(false);
- }
ndb_mgm_configuration_iterator
iter(* _config->m_configValues, CFG_SECTION_NODE);
- for(iter.first(); iter.valid(); iter.next()) {
- unsigned tmp= 0;
- if(iter.get(CFG_NODE_ID, &tmp)) require(false);
- if (*nodeId && *nodeId != tmp)
- continue;
- found_matching_id= true;
- if(iter.get(CFG_TYPE_OF_SECTION, &type_c)) require(false);
- if(type_c != (unsigned)type)
- continue;
- found_matching_type= true;
- if (connected_nodes.get(tmp))
+ for(iter.first(); iter.valid(); iter.next())
+ {
+ unsigned id;
+ if (iter.get(CFG_NODE_ID, &id))
+ require(false);
+ if (node_id && node_id != id)
continue;
- found_free_node= true;
- if(iter.get(CFG_NODE_HOST, &config_hostname)) require(false);
- if (config_hostname && config_hostname[0] == 0)
- config_hostname= 0;
- else if (client_addr) {
- // check hostname compatability
- const void *tmp_in= &(((sockaddr_in*)client_addr)->sin_addr);
- if((r_config_addr= Ndb_getInAddr(&config_addr, config_hostname)) != 0
- || memcmp(&config_addr, tmp_in, sizeof(config_addr)) != 0) {
- struct in_addr tmp_addr;
- if(Ndb_getInAddr(&tmp_addr, "localhost") != 0
- || memcmp(&tmp_addr, tmp_in, sizeof(config_addr)) != 0) {
- // not localhost
-#if 0
- ndbout << "MgmtSrvr::getFreeNodeId compare failed for \""
- << config_hostname
- << "\" id=" << tmp << endl;
-#endif
- continue;
- }
- // connecting through localhost
- // check if config_hostname is local
- if (!SocketServer::tryBind(0,config_hostname)) {
- continue;
- }
- }
- } else { // client_addr == 0
- if (!SocketServer::tryBind(0,config_hostname)) {
- continue;
+ if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
+ require(false);
+ if (type_c != (unsigned)type)
+ {
+ if (!node_id)
+ continue;
+ goto error;
+ }
+ const char *config_hostname= 0;
+ if (iter.get(CFG_NODE_HOST, &config_hostname))
+ require(false);
+ if (config_hostname == 0 || config_hostname[0] == 0)
+ {
+ config_hostname= "";
+ }
+ else
+ {
+ found_config_hostname= config_hostname;
+ if (match_hostname(client_addr, config_hostname))
+ {
+ if (!node_id)
+ continue;
+ goto error;
}
+ exact_nodes.set(id);
}
- if (*nodeId != 0 ||
- type != NDB_MGM_NODE_TYPE_MGM ||
- no_mgm == 1) { // any match is ok
-
- if (config_hostname == 0 &&
- *nodeId == 0 &&
- type != NDB_MGM_NODE_TYPE_MGM)
- {
- if (!id_found) // only set if not set earlier
- id_found= tmp;
- continue; /* continue looking for a nodeid with specified
- * hostname
- */
- }
- assert(id_found == 0);
- id_found= tmp;
- break;
- }
- if (id_found) { // mgmt server may only have one match
- error_string.appfmt("Ambiguous node id's %d and %d.\n"
- "Suggest specifying node id in connectstring,\n"
- "or specifying unique host names in config file.",
- id_found, tmp);
- NdbMutex_Unlock(m_configMutex);
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- DBUG_RETURN(false);
+ nodes.set(id);
+ struct nodeid_and_host a= {id, config_hostname};
+ nodes_info.push_back(a);
+ if (node_id)
+ break;
+ }
+ if (nodes_info.size() != 0)
+ {
+ return 0;
+ }
+
+ error:
+ /*
+ lock on m_configMutex held because found_config_hostname may have
+ reference inot config structure
+ */
+ error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+ if (node_id)
+ {
+ if (type_c != (unsigned) type)
+ {
+ BaseString type_string, type_c_string;
+ const char *alias, *str;
+ alias= ndb_mgm_get_node_type_alias_string(type, &str);
+ type_string.assfmt("%s(%s)", alias, str);
+ alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
+ &str);
+ type_c_string.assfmt("%s(%s)", alias, str);
+ error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
+ node_id, type_c_string.c_str(),
+ type_string.c_str());
+ return -1;
}
- if (config_hostname == 0) {
- error_string.appfmt("Ambiguity for node id %d.\n"
- "Suggest specifying node id in connectstring,\n"
- "or specifying unique host names in config file,\n"
- "or specifying just one mgmt server in config file.",
- tmp);
- NdbMutex_Unlock(m_configMutex);
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- DBUG_RETURN(false);
+ if (found_config_hostname)
+ {
+ struct in_addr config_addr= {0};
+ int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
+ error_string.appfmt("Connection with id %d done from wrong host ip %s,",
+ node_id, inet_ntoa(((struct sockaddr_in *)
+ (client_addr))->sin_addr));
+ error_string.appfmt(" expected %s(%s).", found_config_hostname,
+ r_config_addr ?
+ "lookup failed" : inet_ntoa(config_addr));
+ return -1;
}
- id_found= tmp; // mgmt server matched, check for more matches
+ error_string.appfmt("No node defined with id=%d in config file.", node_id);
+ return -1;
}
- NdbMutex_Unlock(m_configMutex);
- if (id_found && client_addr != 0)
+ // node_id == 0 and nodes_info.size() == 0
+ if (found_config_hostname)
{
- int res = alloc_node_id_req(id_found, type);
- unsigned save_id_found = id_found;
+ error_string.appfmt("Connection done from wrong host ip %s.",
+ (client_addr)?
+ inet_ntoa(((struct sockaddr_in *)
+ (client_addr))->sin_addr):"");
+ return -1;
+ }
+
+ error_string.append("No nodes defined in config file.");
+ return -1;
+}
+
+int
+MgmtSrvr::try_alloc(unsigned id, const char *config_hostname,
+ enum ndb_mgm_node_type type,
+ const struct sockaddr *client_addr,
+ Uint32 timeout_ms)
+{
+ if (client_addr != 0)
+ {
+ int res = alloc_node_id_req(id, type, timeout_ms);
switch (res)
{
case 0:
@@ -2386,155 +2413,176 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
break;
default:
// something wrong
- id_found = 0;
- break;
-
- }
- if (id_found == 0)
- {
- char buf[128];
- ndb_error_string(res, buf, sizeof(buf));
- error_string.appfmt("Cluster refused allocation of id %d. Error: %d (%s).",
- save_id_found, res, buf);
- g_eventLogger->warning("Cluster refused allocation of id %d. "
- "Connection from ip %s. "
- "Returned error string \"%s\"", save_id_found,
- inet_ntoa(((struct sockaddr_in *)
- (client_addr))->sin_addr),
- error_string.c_str());
- DBUG_RETURN(false);
+ return -1;
}
}
- if (id_found)
+ DBUG_PRINT("info", ("allocating node id %d",id));
{
- *nodeId= id_found;
- DBUG_PRINT("info", ("allocating node id %d",*nodeId));
+ int r= 0;
+ if (client_addr)
+ {
+ m_connect_address[id]= ((struct sockaddr_in *)client_addr)->sin_addr;
+ }
+ else if (config_hostname)
+ {
+ r= Ndb_getInAddr(&(m_connect_address[id]), config_hostname);
+ }
+ else
{
- int r= 0;
- if (client_addr)
- m_connect_address[id_found]=
- ((struct sockaddr_in *)client_addr)->sin_addr;
- else if (config_hostname)
- r= Ndb_getInAddr(&(m_connect_address[id_found]), config_hostname);
- else {
- char name[256];
- r= gethostname(name, sizeof(name));
- if (r == 0) {
- name[sizeof(name)-1]= 0;
- r= Ndb_getInAddr(&(m_connect_address[id_found]), name);
- }
- }
- if (r)
- m_connect_address[id_found].s_addr= 0;
- }
- m_reserved_nodes.set(id_found);
- if (theFacade && id_found != theFacade->ownId())
- {
- /**
- * Make sure we're ready to accept connections from this node
- */
- theFacade->lock_mutex();
- theFacade->doConnect(id_found);
- theFacade->unlock_mutex();
+ char name[256];
+ r= gethostname(name, sizeof(name));
+ if (r == 0)
+ {
+ name[sizeof(name)-1]= 0;
+ r= Ndb_getInAddr(&(m_connect_address[id]), name);
+ }
+ }
+ if (r)
+ {
+ m_connect_address[id].s_addr= 0;
}
+ }
+ m_reserved_nodes.set(id);
+ if (theFacade && id != theFacade->ownId())
+ {
+ /**
+ * Make sure we're ready to accept connections from this node
+ */
+ theFacade->lock_mutex();
+ theFacade->doConnect(id);
+ theFacade->unlock_mutex();
+ }
- char tmp_str[128];
- m_reserved_nodes.getText(tmp_str);
- g_eventLogger->info("Mgmt server state: nodeid %d reserved for ip %s, "
- "m_reserved_nodes %s.",
- id_found, get_connect_address(id_found), tmp_str);
+ char tmp_str[128];
+ m_reserved_nodes.getText(tmp_str);
+ g_eventLogger->info("Mgmt server state: nodeid %d reserved for ip %s, "
+ "m_reserved_nodes %s.",
+ id, get_connect_address(id), tmp_str);
+
+ return 0;
+}
+
+bool
+MgmtSrvr::alloc_node_id(NodeId * nodeId,
+ enum ndb_mgm_node_type type,
+ const struct sockaddr *client_addr,
+ SOCKET_SIZE_TYPE *client_addr_len,
+ int &error_code, BaseString &error_string,
+ int log_event,
+ int timeout_s)
+{
+ DBUG_ENTER("MgmtSrvr::alloc_node_id");
+ DBUG_PRINT("enter", ("nodeid: %d type: %d client_addr: 0x%ld",
+ *nodeId, type, (long) client_addr));
+
+ if (g_no_nodeid_checks) {
+ if (*nodeId == 0) {
+ error_string.appfmt("no-nodeid-checks set in management server.\n"
+ "node id must be set explicitly in connectstring");
+ error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+ DBUG_RETURN(false);
+ }
DBUG_RETURN(true);
}
- if (found_matching_type && !found_free_node) {
- // we have a temporary error which might be due to that
- // we have got the latest connect status from db-nodes. Force update.
- updateStatus();
- }
+ Uint32 timeout_ms = Uint32(1000 * timeout_s);
+
+ Guard g(m_node_id_mutex);
+
+ NodeBitmask connected_nodes;
+ get_connected_nodes(connected_nodes);
+
+ NodeBitmask nodes, exact_nodes;
+ Vector<struct nodeid_and_host> nodes_info;
- BaseString type_string, type_c_string;
+ /* find all nodes with correct type */
+ if (find_node_type(*nodeId, type, client_addr, nodes, exact_nodes, nodes_info,
+ error_code, error_string))
+ goto error;
+
+ // nodes_info.size() == 0 handled inside find_node_type
+ DBUG_ASSERT(nodes_info.size() != 0);
+
+ if (type == NDB_MGM_NODE_TYPE_MGM && nodes_info.size() > 1)
{
- const char *alias, *str;
- alias= ndb_mgm_get_node_type_alias_string(type, &str);
- type_string.assfmt("%s(%s)", alias, str);
- alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
- &str);
- type_c_string.assfmt("%s(%s)", alias, str);
+ // mgmt server may only have one match
+ error_string.appfmt("Ambiguous node id's %d and %d.\n"
+ "Suggest specifying node id in connectstring,\n"
+ "or specifying unique host names in config file.",
+ nodes_info[0].id, nodes_info[1].id);
+ error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+ goto error;
}
- if (*nodeId == 0)
+ /* remove connected and reserved nodes from possible nodes to allocate */
+ nodes.bitANDC(connected_nodes);
+ nodes.bitANDC(m_reserved_nodes);
+
+ /* first try all nodes with exact match of hostname */
+ for (Uint32 i = 0; i < nodes_info.size(); i++)
{
- if (found_matching_id)
+ unsigned id= nodes_info[i].id;
+ if (!nodes.get(id))
+ continue;
+
+ if (!exact_nodes.get(id))
+ continue;
+
+ const char *config_hostname= nodes_info[i].host.c_str();
+ if (!try_alloc(id, config_hostname, type, client_addr, timeout_ms))
{
- if (found_matching_type)
- {
- if (found_free_node)
- {
- error_string.appfmt("Connection done from wrong host ip %s.",
- (client_addr)?
- inet_ntoa(((struct sockaddr_in *)
- (client_addr))->sin_addr):"");
- error_code = NDB_MGM_ALLOCID_ERROR;
- }
- else
- {
- error_string.appfmt("No free node id found for %s.",
- type_string.c_str());
- error_code = NDB_MGM_ALLOCID_ERROR;
- }
- }
- else
- {
- error_string.appfmt("No %s node defined in config file.",
- type_string.c_str());
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- }
+ // success
+ *nodeId= id;
+ DBUG_RETURN(true);
}
- else
+ }
+
+ /* now try the open nodes */
+ for (Uint32 i = 0; i < nodes_info.size(); i++)
+ {
+ unsigned id= nodes_info[i].id;
+ if (!nodes.get(id))
+ continue;
+
+ /**
+ * exact node tried in loop above
+ */
+ if (exact_nodes.get(id))
+ continue;
+
+ if (!try_alloc(id, NULL, type, client_addr, timeout_ms))
{
- error_string.append("No nodes defined in config file.");
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+ // success
+ *nodeId= id;
+ DBUG_RETURN(true);
}
}
+
+ /*
+ there are nodes with correct type available but
+ allocation failed for some reason
+ */
+ if (*nodeId)
+ {
+ error_string.appfmt("Id %d already allocated by another node.",
+ *nodeId);
+ }
else
{
- if (found_matching_id)
- {
- if (found_matching_type)
- {
- if (found_free_node)
- {
- // have to split these into two since inet_ntoa overwrites itself
- error_string.appfmt("Connection with id %d done from wrong host ip %s,",
- *nodeId, inet_ntoa(((struct sockaddr_in *)
- (client_addr))->sin_addr));
- error_string.appfmt(" expected %s(%s).", config_hostname,
- r_config_addr ?
- "lookup failed" : inet_ntoa(config_addr));
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- }
- else
- {
- error_string.appfmt("Id %d already allocated by another node.",
- *nodeId);
- error_code = NDB_MGM_ALLOCID_ERROR;
- }
- }
- else
- {
- error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
- *nodeId, type_c_string.c_str(),
- type_string.c_str());
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- }
- }
- else
- {
- error_string.appfmt("No node defined with id=%d in config file.",
- *nodeId);
- error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
- }
+ const char *alias, *str;
+ alias= ndb_mgm_get_node_type_alias_string(type, &str);
+ error_string.appfmt("No free node id found for %s(%s).",
+ alias, str);
+ }
+ error_code = NDB_MGM_ALLOCID_ERROR;
+
+ error:
+ if (error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)
+ {
+ // we have a temporary error which might be due to that
+ // we have got the latest connect status from db-nodes. Force update.
+ updateStatus();
}
if (log_event || error_code == NDB_MGM_ALLOCID_CONFIG_MISMATCH)
@@ -2548,27 +2596,35 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
: "<none>",
error_string.c_str());
- NodeBitmask connected_nodes2;
- get_connected_nodes(connected_nodes2);
BaseString tmp_connected, tmp_not_connected;
for(Uint32 i = 0; i < MAX_NODES; i++)
{
- if (connected_nodes2.get(i))
+ if (connected_nodes.get(i))
{
- if (!m_reserved_nodes.get(i))
- tmp_connected.appfmt(" %d", i);
+ if (!m_reserved_nodes.get(i))
+ {
+ tmp_connected.appfmt("%d ", i);
+ }
}
else if (m_reserved_nodes.get(i))
{
- tmp_not_connected.appfmt(" %d", i);
+ tmp_not_connected.appfmt("%d ", i);
}
}
+
if (tmp_connected.length() > 0)
- g_eventLogger->info("Mgmt server state: node id's %s connected but not reserved",
- tmp_connected.c_str());
+ {
+ g_eventLogger->info
+ ("Mgmt server state: node id's %sconnected but not reserved",
+ tmp_connected.c_str());
+ }
+
if (tmp_not_connected.length() > 0)
- g_eventLogger->info("Mgmt server state: node id's %s not connected but reserved",
- tmp_not_connected.c_str());
+ {
+ g_eventLogger->info
+ ("Mgmt server state: node id's %snot connected but reserved",
+ tmp_not_connected.c_str());
+ }
}
DBUG_RETURN(false);
}
@@ -3112,3 +3168,4 @@ template class MutexVector<NodeId>;
template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
template class Vector<EventSubscribeReq>;
template class MutexVector<EventSubscribeReq>;
+template class Vector<MgmtSrvr::nodeid_and_host>;
=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.hpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp 2009-01-08 15:25:14 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp 2009-02-18 14:33:50 +0000
@@ -399,10 +399,11 @@ public:
*/
bool getNextNodeId(NodeId * _nodeId, enum ndb_mgm_node_type type) const ;
bool alloc_node_id(NodeId * _nodeId, enum ndb_mgm_node_type type,
- struct sockaddr *client_addr,
+ const struct sockaddr *client_addr,
SOCKET_SIZE_TYPE *client_addr_len,
int &error_code, BaseString &error_string,
- int log_event = 1);
+ int log_event = 1,
+ int timeout_s = 20);
/**
*
@@ -491,7 +492,9 @@ private:
*/
int getBlockNumber(const BaseString &blockName);
- int alloc_node_id_req(NodeId free_node_id, enum ndb_mgm_node_type type);
+ int alloc_node_id_req(NodeId free_node_id,
+ enum ndb_mgm_node_type type,
+ Uint32 timeout_ms);
int check_nodes_starting();
int check_nodes_stopping();
@@ -644,6 +647,21 @@ private:
Config *_props;
ConfigRetriever *m_config_retriever;
+
+ struct nodeid_and_host
+ {
+ unsigned id;
+ BaseString host;
+ };
+ int find_node_type(unsigned node_id, enum ndb_mgm_node_type type,
+ const struct sockaddr *client_addr,
+ NodeBitmask &nodes,
+ NodeBitmask &exact_nodes,
+ Vector<nodeid_and_host> &nodes_info,
+ int &error_code, BaseString &error_string);
+ int match_hostname(const struct sockaddr *, const char *) const;
+ int try_alloc(unsigned id, const char *, enum ndb_mgm_node_type type,
+ const struct sockaddr *client_addr, Uint32 timeout_ms);
};
inline
=== modified file 'storage/ndb/src/mgmsrv/Services.cpp'
--- a/storage/ndb/src/mgmsrv/Services.cpp 2009-01-08 15:25:14 +0000
+++ b/storage/ndb/src/mgmsrv/Services.cpp 2009-02-18 14:33:50 +0000
@@ -510,7 +510,8 @@ MgmApiSession::get_nodeid(Parser_t::Cont
while (!m_mgmsrv.alloc_node_id(&tmp, (enum ndb_mgm_node_type)nodetype,
(struct sockaddr*)&addr, &addrlen,
error_code, error_string,
- tick == 0 ? 0 : log_event))
+ tick == 0 ? 0 : log_event,
+ timeout))
{
/* NDB_MGM_ALLOCID_CONFIG_MISMATCH is a non retriable error */
if (tick == 0 && error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.3 branch (jonas:2860) | Jonas Oreland | 18 Feb |