#At file:///home/msvensson/mysql/bug48301/6.3/ based on revid:magnus.blaudd@stripped29-qtaex8kbvvscxcnp
3136 Magnus Blåudd 2009-11-02
Bug#48301 ndb_mgmd 'get status' shows confusing status for API and MGM nodes
- Move all logic need for answering 'get status' into it's own subclass of MgmtSrvr
- Hook the Status module into MgmtSrvr event reports so it get notified about
'connect' and 'disconnect' events from any node in the cluster. This is done
by adding a static event listener client to make sure that the event log levels
never can become too low so that Disconnected/Connected events are not sent to ndb_mgmd
- It's possible to query the Status module about the status for any node in the cluster
by calling 'get_node_status' and it will then fill in a node_status struct which is passed
by reference.
- Add a new status type NDB_MGM_NODE_STATUS_CONNECTED which is valid for API and MGM nodes.
- Remove old 'm_connect_address' array
- Added a few ifdef's for code that is a little bit different in 7.0, those ifdefs will be
removed when merged up.
modified:
storage/ndb/include/mgmapi/mgmapi.h
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
storage/ndb/src/mgmapi/mgmapi.cpp
storage/ndb/src/mgmsrv/MgmtSrvr.cpp
storage/ndb/src/mgmsrv/MgmtSrvr.hpp
storage/ndb/src/mgmsrv/Services.cpp
storage/ndb/src/ndbapi/ClusterMgr.cpp
=== modified file 'storage/ndb/include/mgmapi/mgmapi.h'
--- a/storage/ndb/include/mgmapi/mgmapi.h 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/include/mgmapi/mgmapi.h 2009-11-02 12:52:37 +0000
@@ -205,8 +205,8 @@ extern "C" {
NDB_MGM_NODE_STATUS_RESTARTING = 6,
/** Maintenance mode*/
NDB_MGM_NODE_STATUS_SINGLEUSER = 7,
- /** Resume mode*/
- NDB_MGM_NODE_STATUS_RESUME = 8,
+ /** Node is connected */
+ NDB_MGM_NODE_STATUS_CONNECTED = 8,
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
/** Min valid value*/
NDB_MGM_NODE_STATUS_MIN = 0,
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-10-06 14:11:14 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-11-02 12:52:37 +0000
@@ -3198,6 +3198,7 @@ Qmgr::execAPI_VERSION_REQ(Signal * signa
else
{
conf->version = 0;
+ conf->mysql_version = 0;
conf->inet_addr= 0;
}
conf->nodeId = nodeId;
=== modified file 'storage/ndb/src/mgmapi/mgmapi.cpp'
--- a/storage/ndb/src/mgmapi/mgmapi.cpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/mgmapi/mgmapi.cpp 2009-11-02 12:52:37 +0000
@@ -796,7 +796,8 @@ static struct ndb_mgm_status_atoi status
{ "STARTED", NDB_MGM_NODE_STATUS_STARTED },
{ "SHUTTING_DOWN", NDB_MGM_NODE_STATUS_SHUTTING_DOWN },
{ "RESTARTING", NDB_MGM_NODE_STATUS_RESTARTING },
- { "SINGLE USER MODE", NDB_MGM_NODE_STATUS_SINGLEUSER }
+ { "SINGLE USER MODE", NDB_MGM_NODE_STATUS_SINGLEUSER },
+ { "CONNECTED", NDB_MGM_NODE_STATUS_CONNECTED }
};
const int no_of_status_values = (sizeof(status_values) /
=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.cpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2009-10-09 14:58:08 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2009-11-02 12:52:37 +0000
@@ -372,6 +372,27 @@ int MgmtSrvr::init()
return -1;
}
+
+static void
+check_event_setting(int eventType,
+ const LogLevel::EventCategory expected_category,
+ const Uint32 expected_threshold)
+{
+ LogLevel::EventCategory category;
+ Uint32 threshold;
+ Logger::LoggerLevel unused1;
+ EventLogger::EventTextFunction unused2;
+
+ // Must find the event with this type
+ require(EventLogger::event_lookup(eventType,
+ category, threshold,
+ unused1, unused2) == 0);
+ // Event should have the expected settings
+ require(category == expected_category);
+ require(threshold == expected_threshold);
+}
+
+
MgmtSrvr::MgmtSrvr(SocketServer *socket_server,
const char *config_filename,
const char *connect_string) :
@@ -383,7 +404,8 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_
theWaitState(WAIT_SUBSCRIBE_CONF),
m_local_mgm_handle(0),
m_event_listner(this),
- m_master_node(0)
+ m_master_node(0),
+ m_status(*this)
{
DBUG_ENTER("MgmtSrvr::MgmtSrvr");
@@ -451,7 +473,6 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_
*/
for(Uint32 i = 0; i<MAX_NODES; i++) {
nodeTypes[i] = (enum ndb_mgm_node_type)-1;
- m_connect_address[i].s_addr= 0;
}
{
@@ -530,7 +551,35 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_
m_event_listner.m_clients.push_back(se);
m_event_listner.m_logLevel = se.m_logLevel;
}
-
+
+ /*
+ Setup MgmtSrvr as client[1] in m_event_listner to
+ get information about API connect/disconnect
+ */
+ {
+ Ndb_mgmd_event_service::Event_listener se;
+#if MYSQL_VERSION_D < NDB_MAKE_VERSION(7,0,0)
+ se.m_socket = NDB_INVALID_SOCKET; // With closed socket!
+#else
+ my_socket_invalidate(&(se.m_socket)); // With closed socket!
+#endif
+ for(size_t t = 0; t<LogLevel::LOGLEVEL_CATEGORIES; t++){
+ se.m_logLevel.setLogLevel((LogLevel::EventCategory)t, 0);
+ }
+ // Currently only interested in the two connection
+ // events NDB_LE_Connected and NDB_LE_Disconnected
+ // set log level for this listener high enough to
+ // always receive them
+ const LogLevel::EventCategory category = LogLevel::llConnection;
+ const Uint32 threshold = 8;
+ check_event_setting(NDB_LE_Connected, category, threshold);
+ check_event_setting(NDB_LE_Disconnected, category, threshold);
+ se.m_logLevel.setLogLevel(category, threshold);
+
+ m_event_listner.m_clients.push_back(se);
+ m_event_listner.update_max_log_level(se.m_logLevel);
+ }
+
DBUG_VOID_RETURN;
}
@@ -720,155 +769,6 @@ MgmtSrvr::start(int nodeId)
return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
}
-/*****************************************************************************
- * Version handling
- *****************************************************************************/
-
-int
-MgmtSrvr::versionNode(int nodeId, Uint32 &version, Uint32& mysql_version,
- const char **address)
-{
- version= 0;
- mysql_version = 0;
- if (getOwnNodeId() == nodeId)
- {
- /**
- * If we're inquiring about our own node id,
- * We know what version we are (version implies connected for mgm)
- * but would like to find out from elsewhere what address they're using
- * to connect to us. This means that secondary mgm servers
- * can list ip addresses for mgm servers.
- *
- * If we don't get an address (i.e. no db nodes),
- * we get the address from the configuration.
- */
- sendVersionReq(nodeId, version, mysql_version, address);
- version= NDB_VERSION;
- mysql_version = NDB_MYSQL_VERSION_D;
- if(!*address)
- {
- ndb_mgm_configuration_iterator
- iter(*_config->m_configValues, CFG_SECTION_NODE);
- unsigned tmp= 0;
- for(iter.first();iter.valid();iter.next())
- {
- if(iter.get(CFG_NODE_ID, &tmp)) require(false);
- if((unsigned)nodeId!=tmp)
- continue;
- if(iter.get(CFG_NODE_HOST, address)) require(false);
- break;
- }
- }
- }
- else if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
- {
- ClusterMgr::Node node= theFacade->theClusterMgr->getNodeInfo(nodeId);
- if(node.connected)
- {
- version= node.m_info.m_version;
- mysql_version = node.m_info.m_mysql_version;
- }
- *address= get_connect_address(nodeId);
- }
- else if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API ||
- getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
- {
- return sendVersionReq(nodeId, version, mysql_version, address);
- }
-
- return 0;
-}
-
-int
-MgmtSrvr::sendVersionReq(int v_nodeId,
- Uint32 &version,
- Uint32& mysql_version,
- const char **address)
-{
- SignalSender ss(theFacade);
- ss.lock();
-
- SimpleSignal ssig;
- ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
- req->senderRef = ss.getOwnRef();
- req->nodeId = v_nodeId;
- ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_API_VERSION_REQ,
- ApiVersionReq::SignalLength);
-
- int do_send = 1;
- NodeId nodeId;
-
- while (1)
- {
- if (do_send)
- {
- bool next;
- nodeId = 0;
-
- while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
- okToSendTo(nodeId, true) != 0);
-
- const ClusterMgr::Node &node=
- theFacade->theClusterMgr->getNodeInfo(nodeId);
- if(next && node.m_state.startLevel != NodeState::SL_STARTED)
- {
- NodeId tmp=nodeId;
- while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
- okToSendTo(nodeId, true) != 0);
- if(!next)
- nodeId= tmp;
- }
-
- if(!next) return NO_CONTACT_WITH_DB_NODES;
-
- if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
- return SEND_OR_RECEIVE_FAILED;
- }
- do_send = 0;
- }
-
- SimpleSignal *signal = ss.waitFor();
-
- int gsn = signal->readSignalNumber();
- switch (gsn) {
- case GSN_API_VERSION_CONF: {
- const ApiVersionConf * const conf =
- CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
- assert((int) conf->nodeId == v_nodeId);
- version = conf->version;
- mysql_version = conf->mysql_version;
- if (version < NDBD_SPLIT_VERSION)
- mysql_version = 0;
- struct in_addr in;
- in.s_addr= conf->inet_addr;
- *address= inet_ntoa(in);
- return 0;
- }
- case GSN_NF_COMPLETEREP:{
- const NFCompleteRep * const rep =
- CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
- if (rep->failedNodeId == nodeId)
- do_send = 1; // retry with other node
- continue;
- }
- case GSN_NODE_FAILREP:{
- const NodeFailRep * const rep =
- CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
- if (NdbNodeBitmask::get(rep->theNodes,nodeId))
- do_send = 1; // retry with other node
- continue;
- }
- case GSN_TAKE_OVERTCCONF:
- continue;
- default:
- report_unknown_signal(signal);
- return SEND_OR_RECEIVE_FAILED;
- }
- break;
- } // while(1)
-
- return 0;
-}
int MgmtSrvr::sendStopMgmd(NodeId nodeId,
bool abort,
@@ -1383,15 +1283,12 @@ int MgmtSrvr::restartNodes(const Vector<
#endif
while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0)
{
- Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
- Uint32 mysql_version = 0;
- Uint32 connectCount = 0;
- bool system;
- const char *address;
- status(nodeId, &s, &version, &mysql_version, &startPhase,
- &system, &dynamicId, &nodeGroup, &connectCount, &address);
- NdbSleep_MilliSleep(100);
+ NdbSleep_MilliSleep(100);
waitTime = (maxTime - NdbTick_CurrentMillisecond());
+
+ Status::Node node_status;
+ m_status.get_node_status(nodeId, getNodeType(nodeId), node_status);
+ s = node_status.status;
}
}
@@ -1467,15 +1364,12 @@ int MgmtSrvr::restartDB(bool nostart, bo
ndbout_c("Waiting for %d not started", nodeId);
#endif
while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0) {
- Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
- Uint32 mysql_version = 0;
- Uint32 connectCount = 0;
- bool system;
- const char *address;
- status(nodeId, &s, &version, &mysql_version, &startPhase,
- &system, &dynamicId, &nodeGroup, &connectCount, &address);
- NdbSleep_MilliSleep(100);
+ NdbSleep_MilliSleep(100);
waitTime = (maxTime - NdbTick_CurrentMillisecond());
+
+ Status::Node node_status;
+ m_status.get_node_status(nodeId, NDB_MGM_NODE_TYPE_NDB, node_status);
+ s = node_status.status;
}
}
@@ -1548,96 +1442,6 @@ MgmtSrvr::updateStatus()
}
int
-MgmtSrvr::status(int nodeId,
- ndb_mgm_node_status * _status,
- Uint32 * version,
- Uint32 * mysql_version,
- Uint32 * _phase,
- bool * _system,
- Uint32 * dynamic,
- Uint32 * nodegroup,
- Uint32 * connectCount,
- const char **address)
-{
- if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API ||
- getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) {
- versionNode(nodeId, *version, *mysql_version, address);
- } else {
- *address= get_connect_address(nodeId);
- }
-
- const ClusterMgr::Node node =
- theFacade->theClusterMgr->getNodeInfo(nodeId);
-
- if(!node.connected){
- * _status = NDB_MGM_NODE_STATUS_NO_CONTACT;
- return 0;
- }
-
- if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB) {
- * version = node.m_info.m_version;
- * mysql_version = node.m_info.m_mysql_version;
- }
-
- * dynamic = node.m_state.dynamicId;
- * nodegroup = node.m_state.nodeGroup;
- * connectCount = node.m_info.m_connectCount;
-
- switch(node.m_state.startLevel){
- case NodeState::SL_CMVMI:
- * _status = NDB_MGM_NODE_STATUS_NOT_STARTED;
- * _phase = 0;
- return 0;
- break;
- case NodeState::SL_STARTING:
- * _status = NDB_MGM_NODE_STATUS_STARTING;
- * _phase = node.m_state.starting.startPhase;
- return 0;
- break;
- case NodeState::SL_STARTED:
- * _status = NDB_MGM_NODE_STATUS_STARTED;
- * _phase = 0;
- return 0;
- break;
- case NodeState::SL_STOPPING_1:
- * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
- * _phase = 1;
- * _system = node.m_state.stopping.systemShutdown != 0;
- return 0;
- break;
- case NodeState::SL_STOPPING_2:
- * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
- * _phase = 2;
- * _system = node.m_state.stopping.systemShutdown != 0;
- return 0;
- break;
- case NodeState::SL_STOPPING_3:
- * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
- * _phase = 3;
- * _system = node.m_state.stopping.systemShutdown != 0;
- return 0;
- break;
- case NodeState::SL_STOPPING_4:
- * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
- * _phase = 4;
- * _system = node.m_state.stopping.systemShutdown != 0;
- return 0;
- break;
- case NodeState::SL_SINGLEUSER:
- * _status = NDB_MGM_NODE_STATUS_SINGLEUSER;
- * _phase = 0;
- return 0;
- break;
- default:
- * _status = NDB_MGM_NODE_STATUS_UNKNOWN;
- * _phase = 0;
- return 0;
- }
-
- return -1;
-}
-
-int
MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
const EventSubscribeReq& ll)
{
@@ -2059,15 +1863,11 @@ void
MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete)
{
DBUG_ENTER("MgmtSrvr::handleStatus");
- Uint32 theData[25];
- EventReport *rep = (EventReport *)theData;
- theData[1] = nodeId;
+ // Generate artifical event for connect/disconnect
+ Uint32 theData[2];
+ EventReport *rep = (EventReport *)theData;
if (alive) {
- if (nodeTypes[nodeId] == NODE_TYPE_DB)
- {
- m_started_nodes.push_back(nodeId);
- }
rep->setEventType(NDB_LE_Connected);
} else {
rep->setEventType(NDB_LE_Disconnected);
@@ -2076,8 +1876,17 @@ MgmtSrvr::handleStatus(NodeId nodeId, bo
DBUG_VOID_RETURN;
}
}
- rep->setNodeId(_ownNodeId);
- eventReport(theData, 1);
+ rep->setNodeId(_ownNodeId); // From node
+ theData[1] = nodeId; // Affected node
+ eventReport(theData, 2);
+
+ // Put newly connected NDB in list of nodes to
+ // setup event subscriptions from
+ if (alive && nodeTypes[nodeId] == NODE_TYPE_DB)
+ {
+ m_started_nodes.push_back(nodeId);
+ }
+
DBUG_VOID_RETURN;
}
@@ -2108,29 +1917,10 @@ MgmtSrvr::nodeStatusNotification(void* m
enum ndb_mgm_node_type
MgmtSrvr::getNodeType(NodeId nodeId) const
{
- if(nodeId >= MAX_NODES)
- return (enum ndb_mgm_node_type)-1;
-
+ require(nodeId < MAX_NODES);
return nodeTypes[nodeId];
}
-const char *MgmtSrvr::get_connect_address(Uint32 node_id)
-{
- if (m_connect_address[node_id].s_addr == 0 &&
- theFacade && theFacade->theTransporterRegistry &&
- theFacade->theClusterMgr &&
- getNodeType(node_id) == NDB_MGM_NODE_TYPE_NDB)
- {
- const ClusterMgr::Node &node=
- theFacade->theClusterMgr->getNodeInfo(node_id);
- if (node.connected)
- {
- m_connect_address[node_id]=
- theFacade->theTransporterRegistry->get_connect_address(node_id);
- }
- }
- return inet_ntoa(m_connect_address[node_id]);
-}
void
MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const
@@ -2422,32 +2212,10 @@ MgmtSrvr::try_alloc(unsigned id, const c
}
}
- DBUG_PRINT("info", ("allocating node id %d",id));
- {
- int r= 0;
- if (client_addr)
- {
- m_connect_address[id]= ((struct sockaddr_in *)client_addr)->sin_addr;
- }
- else if (config_hostname)
- {
- r= Ndb_getInAddr(&(m_connect_address[id]), config_hostname);
- }
- else
- {
- char name[256];
- r= gethostname(name, sizeof(name));
- if (r == 0)
- {
- name[sizeof(name)-1]= 0;
- r= Ndb_getInAddr(&(m_connect_address[id]), name);
- }
- }
- if (r)
- {
- m_connect_address[id].s_addr= 0;
- }
- }
+ // TODO Remove all the special hacks that only serves
+ // to allow this node to allocate it's own nodeid before theFacade
+ // has been started
+
m_reserved_nodes.set(id);
if (theFacade && id != theFacade->ownId())
{
@@ -2458,12 +2226,18 @@ MgmtSrvr::try_alloc(unsigned id, const c
theFacade->doConnect(id);
theFacade->unlock_mutex();
}
-
+
+ const char* ip = "127.0.0.1";
+ if (client_addr)
+ ip = inet_ntoa(((const sockaddr_in*)client_addr)->sin_addr);
char tmp_str[128];
m_reserved_nodes.getText(tmp_str);
- g_eventLogger->info("Mgmt server state: nodeid %d reserved for ip %s, "
- "m_reserved_nodes %s.",
- id, get_connect_address(id), tmp_str);
+ g_eventLogger->info("NodeId %d reserved for ip %s, [ reserved: %s].", id, ip,
+#if NDB_VERSION_D < NDB_MAKE_VERSION(7,0,0)
+ tmp_str);
+#else
+ BaseString::getPrettyTextShort(m_reserved_nodes).c_str());
+#endif
return 0;
}
@@ -2472,7 +2246,7 @@ bool
MgmtSrvr::alloc_node_id(NodeId * nodeId,
enum ndb_mgm_node_type type,
const struct sockaddr *client_addr,
- SOCKET_SIZE_TYPE *client_addr_len,
+ SOCKET_SIZE_TYPE *client_addr_len,
int &error_code, BaseString &error_string,
int log_event,
int timeout_s)
@@ -2657,12 +2431,29 @@ void
MgmtSrvr::eventReport(const Uint32 * theData, Uint32 len)
{
const EventReport * const eventReport = (EventReport *)&theData[0];
-
+
NodeId nodeId = eventReport->getNodeId();
Ndb_logevent_type type = eventReport->getEventType();
- // Log event
+
+ switch (type){
+ case NDB_LE_Connected:
+ // Notify MgmtSrvr::Status about event
+ m_status.connected(theData[1]);
+ break;
+ case NDB_LE_Disconnected:
+ // Notify MgmtSrvr::Status about event
+ m_status.disconnected(theData[1]);
+ break;
+ default:
+ // Ignore
+ break;
+ }
+
+ // Log event to cluster log
g_eventLogger->log(type, theData, len, nodeId,
&m_event_listner[0].m_logLevel);
+
+ // Send event to listeners
m_event_listner.log(type, theData, len, nodeId);
}
@@ -2869,8 +2660,13 @@ MgmtSrvr::Allocated_resources::~Allocate
char tmp_str[128];
m_mgmsrv.m_reserved_nodes.getText(tmp_str);
- g_eventLogger->info("Mgmt server state: nodeid %d freed, m_reserved_nodes %s.",
- get_nodeid(), tmp_str);
+ g_eventLogger->info("NodeId %d released, [ reserved: %s].",
+ get_nodeid(),
+#if NDB_VERSION_D < NDB_MAKE_VERSION(7,0,0)
+ tmp_str);
+#else
+ BaseString::getPrettyTextShort(m_reserved_nodes).c_str());
+#endif
}
}
@@ -3169,8 +2965,422 @@ int MgmtSrvr::connect_to_self(const char
return 0;
}
+
+int
+MgmtSrvr::Status::sendAPIVersionReq(NodeId v_nodeId, Node& node_status)
+{
+ SignalSender ss(m_mgmsrv.theFacade);
+ ss.lock();
+
+ SimpleSignal ssig;
+ ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
+ req->senderRef = ss.getOwnRef();
+ req->nodeId = v_nodeId;
+ ssig.set(ss, TestOrd::TraceAPI, QMGR,
+ GSN_API_VERSION_REQ, ApiVersionReq::SignalLength);
+
+ NodeId nodeId;
+ bool do_send = true;
+ while(true)
+ {
+ if (do_send)
+ {
+ nodeId = ss.getAliveNode();
+ if (nodeId == 0)
+ {
+ return NO_CONTACT_WITH_DB_NODES;
+ }
+
+ if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
+ {
+ return SEND_OR_RECEIVE_FAILED;
+ }
+
+ do_send = false;
+ }
+
+ SimpleSignal *signal = ss.waitFor();
+
+ switch (signal->readSignalNumber()) {
+ case GSN_API_VERSION_CONF: {
+ const ApiVersionConf * const conf =
+ CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
+
+ assert(conf->nodeId == v_nodeId);
+ node_status.version = conf->version;
+ node_status.mysql_version = conf->mysql_version;
+ if (conf->version < NDBD_SPLIT_VERSION)
+ node_status.mysql_version = 0;
+ struct in_addr in;
+ in.s_addr= conf->inet_addr;
+ node_status.connect_address.assign(inet_ntoa(in));
+
+ return 0;
+ }
+
+ case GSN_NF_COMPLETEREP:{
+ const NFCompleteRep * const rep =
+ CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
+ if (rep->failedNodeId == nodeId)
+ do_send = true; // retry with other node
+ continue;
+ }
+
+ case GSN_NODE_FAILREP:{
+ const NodeFailRep * const rep =
+ CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
+ if (NdbNodeBitmask::get(rep->theNodes,nodeId))
+ do_send = true; // retry with other node
+ continue;
+ }
+ case GSN_API_REGCONF:
+ case GSN_TAKE_OVERTCCONF:
+ // Ignore
+ continue;
+ default:
+ report_unknown_signal(signal);
+ return SEND_OR_RECEIVE_FAILED;
+ }
+ }
+
+ // Should never come here
+ require(false);
+ return -1;
+}
+
+
+void
+MgmtSrvr::Status::status_ndb(NodeId node_id, Node& node_status)
+{
+ m_mgmsrv.theFacade->lock_mutex();
+ const ClusterMgr::Node& node =
+ m_mgmsrv.theFacade->theClusterMgr->getNodeInfo(node_id);
+ assert(node.m_info.getType() == (NodeInfo::NodeType)node_status.type);
+
+ if (!node.connected)
+ {
+ node_status.status = NDB_MGM_NODE_STATUS_NO_CONTACT;
+ }
+ else
+ {
+ node_status.version = node.m_info.m_version;
+ node_status.mysql_version = node.m_info.m_mysql_version;
+ node_status.dynamic = node.m_state.dynamicId;
+ node_status.nodegroup = node.m_state.nodeGroup;
+ node_status.connect_count = node.m_info.m_connectCount;
+
+ // Get connect_address from cache
+ struct in_addr addr;
+ if (!m_connect_address_cache.find(node_id, addr)){
+ assert(false);
+ addr.s_addr = 0;
+ }
+ node_status.connect_address.assign(inet_ntoa(addr));
+
+ switch(node.m_state.startLevel)
+ {
+ case NodeState::SL_CMVMI:
+ node_status.status = NDB_MGM_NODE_STATUS_NOT_STARTED;
+ node_status.phase = 0;
+ break;
+ case NodeState::SL_STARTING:
+ node_status.status = NDB_MGM_NODE_STATUS_STARTING;
+ node_status.phase = node.m_state.starting.startPhase;
+ break;
+ case NodeState::SL_STARTED:
+ node_status.status = NDB_MGM_NODE_STATUS_STARTED;
+ node_status.phase = 0;
+ break;
+ case NodeState::SL_STOPPING_1:
+ node_status.status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
+ node_status.phase = 1;
+ break;
+ case NodeState::SL_STOPPING_2:
+ node_status.status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
+ node_status.phase = 2;
+ break;
+ case NodeState::SL_STOPPING_3:
+ node_status.status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
+ node_status.phase = 3;
+ break;
+ case NodeState::SL_STOPPING_4:
+ node_status.status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
+ node_status.phase = 4;
+ break;
+ case NodeState::SL_SINGLEUSER:
+ node_status.status = NDB_MGM_NODE_STATUS_SINGLEUSER;
+ node_status.phase = 0;
+ break;
+ default:
+ node_status.status = NDB_MGM_NODE_STATUS_UNKNOWN;
+ node_status.phase = 0;
+ }
+ }
+ m_mgmsrv.theFacade->unlock_mutex();
+}
+
+
+class ConfigIter : public ndb_mgm_configuration_iterator {
+public:
+ ConfigIter(const Config* conf, unsigned type) :
+ ndb_mgm_configuration_iterator(*conf->m_configValues, type) {};
+};
+
+
+void
+MgmtSrvr::Status::status_mgm(NodeId node_id, Node& node_status)
+{
+
+ if (node_id == m_mgmsrv.getOwnNodeId())
+ {
+ /*
+ Use special cases:
+ 1) for own node which is not "connected" in ClusterMgr
+ 2) for own adress, which should be the ip seen by NDB (if connected)
+ */
+
+ /* Return own status as seen from NDB nodes. */
+ status_api(node_id, node_status);
+
+ if (node_status.version)
+ {
+ // Check that the version returned is equal to our compiled in
+ assert(node_status.version == NDB_VERSION);
+ assert(node_status.mysql_version == NDB_MYSQL_VERSION_D);
+ }
+ else
+ {
+ // No NDB node connected, return status from own perspective
+ node_status.version = NDB_VERSION;
+ node_status.mysql_version = NDB_MYSQL_VERSION_D;
+ node_status.status = NDB_MGM_NODE_STATUS_CONNECTED;
+
+ // Get HostName from config
+ const char* address;
+ Guard g(m_mgmsrv.m_configMutex);
+ ConfigIter iter(m_mgmsrv._config, CFG_SECTION_NODE);
+ require(iter.find(CFG_NODE_ID, node_id) == 0);
+ require(iter.get(CFG_NODE_HOST, &address) == 0);
+
+ // Try to convert name to numerical ip address
+ struct in_addr addr;
+ if (Ndb_getInAddr(&addr, address) == 0)
+ address = inet_ntoa(addr);
+ node_status.connect_address.assign(address);
+ }
+ return;
+ }
+
+#if NDB_VERSION_D < NDB_MAKE_VERSION(7,0,0)
+ /*
+ MGM nodes in pre 7.0 are not connected directly to each
+ other, return status as seen by NDB(if connected)
+ */
+ status_api(node_id, node_status);
+#else
+ /*
+ MGM nodes are from 7.0 connected directly to all other MGM
+ node(s), return status as seen by ClusterMgr
+ */
+ m_mgmsrv.theFacade->lock_mutex();
+ const ClusterMgr::Node& node =
+ m_mgmsrv.theFacade->theClusterMgr->getNodeInfo(node_id);
+ assert(node_status.type == node.m_info.getType());
+
+ if (!node.connected)
+ {
+ node_status.status = NDB_MGM_NODE_STATUS_NO_CONTACT;
+ }
+ else
+ {
+ node_status.status = NDB_MGM_NODE_STATUS_CONNECTED;
+ node_status.version = node.m_info.m_version;
+ node_status.mysql_version = node.m_info.m_mysql_version;
+ node_status.connect_count = node.m_info.m_connectCount;
+
+ // Get connect_address from cache
+ struct in_addr addr;
+ if (!m_connect_address_cache.find(node_id, addr)){
+ assert(false);
+ addr.s_addr = 0;
+ }
+ node_status.connect_address.assign(inet_ntoa(addr));
+ }
+ m_mgmsrv.theFacade->unlock_mutex();
+#endif
+}
+
+
+void
+MgmtSrvr::Status::status_api(NodeId node_id, Node& node_status)
+{
+ /*
+ API nodes are _not_ connected to MGM node(s) => return status as seen by
+ any connected NDB node
+ */
+
+ // All connected API nodes are kept in cache
+ if (!m_api_status_cache.find(node_id, node_status))
+ {
+ /*
+ Node not found in cache -> not connected
+ */
+ node_status.status = NDB_MGM_NODE_STATUS_NO_CONTACT;
+ return;
+ }
+
+ /*
+ Found node in cache -> it's connected(to CMVMI)
+ */
+
+ if (node_status.version)
+ {
+ // Node already reported as connected both
+ // by CMVMI and QMGR, return cached status
+ return;
+ }
+
+ // Node is known to be connected but QMGR has not yet
+ // answered with version and connect address, ask again
+ if (sendAPIVersionReq(node_id, node_status) != 0)
+ {
+ // sendAPIVersionReq failed
+ assert(node_status.version == 0);
+ node_status.status = NDB_MGM_NODE_STATUS_UNKNOWN;
+ return;
+ }
+
+ // Got reply from NDB node, set correct status, cache
+ // the reply and return it
+ if (node_status.version)
+ node_status.status = NDB_MGM_NODE_STATUS_CONNECTED;
+ else
+ node_status.status = NDB_MGM_NODE_STATUS_NO_CONTACT;
+
+ if (!m_api_status_cache.add(node_id, node_status))
+ {
+ g_eventLogger->warning("Failed to cache api status for node %d" , node_id);
+ assert(false);
+ }
+ return;
+}
+
+
+void
+MgmtSrvr::Status::get_node_status(NodeId node_id, ndb_mgm_node_type type,
+ Node& node_status)
+{
+ node_status.node_id = node_id;
+ node_status.type = type;
+
+ switch(type){
+ case NDB_MGM_NODE_TYPE_NDB:
+ status_ndb(node_id, node_status);
+ break;
+ case NDB_MGM_NODE_TYPE_MGM:
+ status_mgm(node_id, node_status);
+ break;
+ case NDB_MGM_NODE_TYPE_API:
+ status_api(node_id, node_status);
+ g_eventLogger->info("API node: %d, status: %d, version: %d",
+ node_id, node_status.status, node_status.version);
+ break;
+ default:
+ require(false);
+ break;
+ }
+ assert(node_status.node_id != 0);
+ assert(node_status.node_id == node_id);
+ assert(node_status.type != NDB_MGM_NODE_TYPE_UNKNOWN);
+ assert(node_status.type == type);
+
+ switch(type){
+ case NDB_MGM_NODE_TYPE_NDB:
+ break;
+ case NDB_MGM_NODE_TYPE_MGM:
+ assert(node_status.status == NDB_MGM_NODE_STATUS_UNKNOWN ||
+ node_status.status == NDB_MGM_NODE_STATUS_NO_CONTACT ||
+ node_status.status == NDB_MGM_NODE_STATUS_CONNECTED);
+ break;
+ case NDB_MGM_NODE_TYPE_API:
+ assert(node_status.status == NDB_MGM_NODE_STATUS_UNKNOWN ||
+ node_status.status == NDB_MGM_NODE_STATUS_NO_CONTACT ||
+ node_status.status == NDB_MGM_NODE_STATUS_CONNECTED);
+ break;
+ default:
+ require(false);
+ break;
+ }
+};
+
+
+void
+MgmtSrvr::Status::connected(NodeId node_id)
+{
+ // Cache the connecting nodes connect adress if it's
+ // connected directly to this node
+ // Note! TTFM is already locked
+ const ClusterMgr::Node& node =
+ m_mgmsrv.theFacade->theClusterMgr->getNodeInfo(node_id);
+ if (node.connected)
+ {
+ struct in_addr addr =
+ m_mgmsrv.theFacade->theTransporterRegistry->get_connect_address(node_id);
+ m_connect_address_cache.add(node_id, addr);
+ }
+ switch(node.m_info.getType()){
+
+ case NodeInfo::DB:
+ {
+ m_connected_ndb_nodes.set(node_id);
+ break;
+ }
+
+ case NodeInfo::API:
+ {
+ // Add dummy node to api cache, indicating it's connected
+ Node dummy_api;
+ dummy_api.node_id = node_id;
+ dummy_api.type = NDB_MGM_NODE_TYPE_API;
+ dummy_api.status = NDB_MGM_NODE_STATUS_NO_CONTACT;
+ m_api_status_cache.add(node_id, dummy_api);
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+
+void
+MgmtSrvr::Status::disconnected(NodeId node_id)
+{
+ // Need to refresh api status cache for this node
+ m_api_status_cache.clear(node_id);
+
+ // Clear the cached connect adress for this node
+ m_connect_address_cache.clear(node_id);
+
+ // Note! TTFM is already locked
+ const ClusterMgr::Node& node =
+ m_mgmsrv.theFacade->theClusterMgr->getNodeInfo(node_id);
+ if (node.m_info.getType() == NodeInfo::DB)
+ m_connected_ndb_nodes.clear(node_id);
+
+ if (m_connected_ndb_nodes.isclear())
+ {
+ // all ndb nodes are gone -> clear all API status
+ for (NodeId i = 0; i < MAX_NODES; i++)
+ m_api_status_cache.clear(i);
+ }
+}
+
+
template class MutexVector<NodeId>;
template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
template class Vector<EventSubscribeReq>;
template class MutexVector<EventSubscribeReq>;
template class Vector<MgmtSrvr::nodeid_and_host>;
+template class HashMap<NodeId, struct in_addr>;
+template class HashMap<NodeId, struct MgmtSrvr::Status::Node>;
=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.hpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp 2009-11-02 12:52:37 +0000
@@ -27,6 +27,7 @@
#include <NdbTCP.h>
#include <ConfigRetriever.hpp>
#include <Vector.hpp>
+#include <HashMap.hpp>
#include <NodeBitmask.hpp>
#include <signaldata/ManagementServer.hpp>
#include <ndb_version.h>
@@ -175,23 +176,6 @@ public:
~MgmtSrvr();
- /**
- * Get status on a node.
- * address may point to a common area (e.g. from inet_addr)
- * There is no gaurentee that it is preserved across calls.
- * Copy the string if you are not going to use it immediately.
- */
- int status(int nodeId,
- ndb_mgm_node_status * status,
- Uint32 * version,
- Uint32 * mysql_version,
- Uint32 * phase,
- bool * systemShutdown,
- Uint32 * dynamicId,
- Uint32 * nodeGroup,
- Uint32 * connectCount,
- const char **address);
-
// All the functions below may return any of this error codes:
// NO_CONTACT_WITH_PROCESS, PROCESS_NOT_CONFIGURED, WRONG_PROCESS_TYPE,
// COULD_NOT_ALLOCATE_MEMORY, SEND_OR_RECEIVE_FAILED
@@ -235,15 +219,6 @@ public:
int shutdownDB(int * cnt = 0, bool abort = false);
/**
- * print version info about a node
- *
- * @param processId: Id of the DB process to stop
- * @return 0 if succeeded, otherwise: as stated above, plus:
- */
- int versionNode(int nodeId, Uint32 &version, Uint32 &mysql_version,
- const char **address);
-
- /**
* Maintenance on the system
*/
int enterSingleUser(int * cnt = 0, Uint32 singleuserNodeId = 0);
@@ -452,7 +427,6 @@ public:
ConfigRetriever *get_config_retriever() { return m_config_retriever; };
- const char *get_connect_address(Uint32 node_id);
void get_connected_nodes(NodeBitmask &connected_nodes) const;
SocketServer *get_socket_server() { return m_socket_server; }
@@ -517,7 +491,6 @@ private:
Uint32 m_nextConfigGenerationNumber;
NodeBitmask m_reserved_nodes;
- struct in_addr m_connect_address[MAX_NODES];
//**************************************************************************
// Specific signal handling methods
@@ -623,8 +596,6 @@ private:
char m_local_mgm_connect_string[20];
class TransporterFacade * theFacade;
- int sendVersionReq( int processId, Uint32 &version, Uint32& mysql_version,
- const char **address);
int translateStopRef(Uint32 errCode);
bool _isStopThread;
@@ -665,6 +636,122 @@ private:
int match_hostname(const struct sockaddr *, const char *) const;
int try_alloc(unsigned id, const char *, enum ndb_mgm_node_type type,
const struct sockaddr *client_addr, Uint32 timeout_ms);
+
+public:
+ struct Status {
+ /*
+ Status module, keeps status information about
+ nodes in the cluster in order to be able
+ to answer 'get status' calls
+ */
+ struct Node {
+ NodeId node_id;
+ ndb_mgm_node_type type;
+ ndb_mgm_node_status status;
+ Uint32 version;
+ Uint32 mysql_version;
+ BaseString connect_address;
+ Uint32 connect_count; // Valid if node is NDB or MGM
+ Uint32 phase; // Valid if node is NDB
+ Uint32 dynamic; // Valid if node is NDB
+ Uint32 nodegroup; // Valid if node is NDB
+
+ Node() :
+ node_id(0),
+ type(NDB_MGM_NODE_TYPE_UNKNOWN),
+ status(NDB_MGM_NODE_STATUS_UNKNOWN),
+ version(0), mysql_version(0), connect_count(0),
+ phase(0), dynamic(0), nodegroup(0) {}
+ };
+
+ void get_node_status(NodeId nodeId, ndb_mgm_node_type type,
+ Node& node_status);
+
+ // Functions to be called by MgmtSrvr when node
+ // connect or disconnect to the cluster
+ void connected(NodeId node_id);
+ void disconnected(NodeId node_id);
+
+ // Constructor
+ Status(MgmtSrvr& mgm) : m_mgmsrv(mgm) {};
+
+ private:
+ /*
+ API status chache
+ Status of API nodes only change when connect
+ or disconnect -> they can be cached
+ */
+ class ApiStatusCache {
+ HashMap<NodeId, Node> m_api_nodes;
+ NdbMutex* m_mutex;
+ public:
+ ApiStatusCache() {
+ m_mutex = NdbMutex_Create();
+ }
+ ~ApiStatusCache() {
+ NdbMutex_Destroy(m_mutex);
+ }
+ void clear(NodeId nodeId) {
+ Guard g(m_mutex);
+ fprintf(stderr, "clear, nodeid: %d\n", nodeId);
+ m_api_nodes.remove(nodeId);
+ }
+ bool add(NodeId nodeId, Node& api_stat) {
+ Guard g(m_mutex);
+ fprintf(stderr, "add, nodeid: %d, status: %d, version: %d\n",
+ nodeId, api_stat.status, api_stat.version);
+ return m_api_nodes.insert(nodeId, api_stat, true);
+ }
+ bool find(NodeId nodeId, Node& api_stat) {
+ Guard g(m_mutex);
+ if (!m_api_nodes.search(nodeId, api_stat))
+ {
+ fprintf(stderr, "find, nodeid: %d is not in cache\n",
+ nodeId);
+ return false;
+ }
+ assert(api_stat.node_id == nodeId);
+ fprintf(stderr, "find, nodeid: %d, status: %d, version: %d\n",
+ nodeId, api_stat.status, api_stat.version);
+ return true;
+ }
+ } m_api_status_cache;
+
+ /*
+ Connect adress cache
+ Contains the connect adress of nodes connected
+ directly to us
+ Always accessed with TTFM locked
+ */
+ class ConnectAddressCache {
+ HashMap<NodeId, struct in_addr> m_addresses;
+ public:
+ void clear(NodeId node_id) {
+ m_addresses.remove(node_id);
+ }
+ bool add(NodeId node_id, struct in_addr addr) {
+ assert(addr.s_addr != 0);
+ return m_addresses.insert(node_id, addr);
+ }
+ bool find(NodeId node_id, struct in_addr& addr) {
+ if (!m_addresses.search(node_id, addr))
+ return false;
+ assert(addr.s_addr != 0);
+ return true;
+ }
+ } m_connect_address_cache;
+
+ // Ask any connected NDB node about the status of an API node
+ int sendAPIVersionReq(NodeId node_id, Node& node_status);
+
+ void status_api(NodeId node_id, Node& node_status);
+ void status_mgm(NodeId node_id, Node& node_status);
+ void status_ndb(NodeId node_id, Node& node_status);
+
+ MgmtSrvr& m_mgmsrv;
+ NdbNodeBitmask m_connected_ndb_nodes;
+ } m_status;
+
};
inline
=== modified file 'storage/ndb/src/mgmsrv/Services.cpp'
--- a/storage/ndb/src/mgmsrv/Services.cpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/mgmsrv/Services.cpp 2009-11-02 12:52:37 +0000
@@ -997,30 +997,23 @@ printNodeStatus(OutputStream *output,
enum ndb_mgm_node_type type) {
NodeId nodeId = 0;
while(mgmsrv.getNextNodeId(&nodeId, type)) {
- enum ndb_mgm_node_status status;
- Uint32 startPhase = 0,
- version = 0, mysql_version = 0,
- dynamicId = 0,
- nodeGroup = 0,
- connectCount = 0;
- bool system;
- const char *address= NULL;
- mgmsrv.status(nodeId, &status, &version, &mysql_version, &startPhase,
- &system, &dynamicId, &nodeGroup, &connectCount,
- &address);
+ MgmtSrvr::Status::Node node_status;
+ mgmsrv.m_status.get_node_status(nodeId, type, node_status);
+
output->println("node.%d.type: %s",
- nodeId,
- ndb_mgm_get_node_type_string(type));
+ nodeId, ndb_mgm_get_node_type_string(type));
output->println("node.%d.status: %s",
- nodeId,
- ndb_mgm_get_node_status_string(status));
- output->println("node.%d.version: %d", nodeId, version);
- output->println("node.%d.mysql_version: %d", nodeId, mysql_version);
- output->println("node.%d.startphase: %d", nodeId, startPhase);
- output->println("node.%d.dynamic_id: %d", nodeId, dynamicId);
- output->println("node.%d.node_group: %d", nodeId, nodeGroup);
- output->println("node.%d.connect_count: %d", nodeId, connectCount);
- output->println("node.%d.address: %s", nodeId, address ? address : "");
+ nodeId, ndb_mgm_get_node_status_string(node_status.status));
+ output->println("node.%d.version: %d", nodeId, node_status.version);
+ output->println("node.%d.mysql_version: %d",
+ nodeId, node_status.mysql_version);
+ output->println("node.%d.startphase: %d", nodeId, node_status.phase);
+ output->println("node.%d.dynamic_id: %d", nodeId, node_status.dynamic);
+ output->println("node.%d.node_group: %d", nodeId, node_status.nodegroup);
+ output->println("node.%d.connect_count: %d",
+ nodeId, node_status.connect_count);
+ output->println("node.%d.address: %s",
+ nodeId, node_status.connect_address.c_str());
}
}
=== modified file 'storage/ndb/src/ndbapi/ClusterMgr.cpp'
--- a/storage/ndb/src/ndbapi/ClusterMgr.cpp 2009-09-08 12:23:44 +0000
+++ b/storage/ndb/src/ndbapi/ClusterMgr.cpp 2009-11-02 12:52:37 +0000
@@ -353,6 +353,9 @@ ClusterMgr::execAPI_REGREQ(const Uint32
if(node.m_info.m_version != apiRegReq->version){
node.m_info.m_version = apiRegReq->version;
+ node.m_info.m_mysql_version = apiRegReq->mysql_version;
+ if (node.m_info.m_version < NDBD_SPLIT_VERSION)
+ node.m_info.m_mysql_version = 0;
if (getMajor(node.m_info.m_version) < getMajor(NDB_VERSION) ||
getMinor(node.m_info.m_version) < getMinor(NDB_VERSION)) {
Attachment: [text/bzr-bundle] bzr/magnus.blaudd@sun.com-20091102125237-p48i8jugu5tjy3wt.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.3 branch (magnus.blaudd:3136)Bug#48301 | Magnus Blåudd | 2 Nov 2009 |