#At file:///home/msvensson/mysql/6.4-bug42056/ based on
revid:msvensson@stripped
3215 Magnus Svensson 2009-01-14
Bug#42056 ndb_mgmd hang on STOPing the management node
- Calculate difference between start and curr time in a unsigned safe way
so that wait loop terminates properly if all nodes has not reached the given
state in time.
- Improve printouts and send them to log instead of stdout
- Check if session thread should exit becuase it has been stopped
also in the case when for eaxmple a read timeout occurs. The default
calue of read timeout is 30 seconds, so it should be expected that a shutdown
of ndb_mgmd may take 30 seconds to complete.
modified:
storage/ndb/src/mgmsrv/MgmtSrvr.cpp
storage/ndb/src/mgmsrv/Services.cpp
=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.cpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2009-01-08 15:41:27 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp 2009-01-14 17:32:38 +0000
@@ -1164,7 +1164,7 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<
for (unsigned i= 0; i < node_ids.size(); i++)
{
nodeId= node_ids[i];
- ndbout << "asked to stop " << nodeId << endl;
+ g_eventLogger->info("Going to stop node %d", nodeId);
if ((getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM)
&&(getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB))
@@ -1181,7 +1181,7 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<
}
else
{
- ndbout << "which is me" << endl;
+ g_eventLogger->info("Stopping this node");
*stopSelf= (restart)? -1 : 1;
stoppedNodes.set(nodeId);
}
@@ -1505,8 +1505,8 @@ int MgmtSrvr::restartNodes(const Vector<
*stopCount = nodes.count();
// start up the nodes again
- NDB_TICKS waitTime = 12000;
- NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
+ const NDB_TICKS waitTime = 12000;
+ const NDB_TICKS startTime = NdbTick_CurrentMillisecond();
for (unsigned i = 0; i < node_ids.size(); i++)
{
NodeId nodeId= node_ids[i];
@@ -1515,7 +1515,8 @@ int MgmtSrvr::restartNodes(const Vector<
#ifdef VM_TRACE
ndbout_c("Waiting for %d not started", nodeId);
#endif
- while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0)
+ while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
+ (NdbTick_CurrentMillisecond() - startTime) < waitTime)
{
Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
Uint32 mysql_version = 0;
@@ -1525,7 +1526,6 @@ int MgmtSrvr::restartNodes(const Vector<
status(nodeId, &s, &version, &mysql_version, &startPhase,
&system, &dynamicId, &nodeGroup, &connectCount,
&address);
NdbSleep_MilliSleep(100);
- waitTime = (maxTime - NdbTick_CurrentMillisecond());
}
}
@@ -1588,9 +1588,9 @@ int MgmtSrvr::restartDB(bool nostart, bo
* Here all nodes were correctly stopped,
* so we wait for all nodes to be contactable
*/
- NDB_TICKS waitTime = 12000;
NodeId nodeId = 0;
- NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
+ const NDB_TICKS waitTime = 12000;
+ const NDB_TICKS startTime = NdbTick_CurrentMillisecond();
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
if (!nodes.get(nodeId))
@@ -1600,7 +1600,9 @@ int MgmtSrvr::restartDB(bool nostart, bo
#ifdef VM_TRACE
ndbout_c("Waiting for %d not started", nodeId);
#endif
- while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0) {
+ while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
+ (NdbTick_CurrentMillisecond() - startTime) < waitTime)
+ {
Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
Uint32 mysql_version = 0;
Uint32 connectCount = 0;
@@ -1609,7 +1611,6 @@ int MgmtSrvr::restartDB(bool nostart, bo
status(nodeId, &s, &version, &mysql_version, &startPhase,
&system, &dynamicId, &nodeGroup, &connectCount, &address);
NdbSleep_MilliSleep(100);
- waitTime = (maxTime - NdbTick_CurrentMillisecond());
}
}
=== modified file 'storage/ndb/src/mgmsrv/Services.cpp'
--- a/storage/ndb/src/mgmsrv/Services.cpp 2009-01-08 15:41:27 +0000
+++ b/storage/ndb/src/mgmsrv/Services.cpp 2009-01-14 17:32:38 +0000
@@ -386,11 +386,12 @@ MgmApiSession::runSession()
if (m_parser->run(ctx, *this))
{
- stop= m_stop;
+ stop= m_stop; // Has session been stopped
assert(ctx.m_status == Parser_t::Ok);
}
else
{
+ stop= m_stop; // Has session been stopped
const char* msg= NULL;
switch(ctx.m_status) {
case Parser_t::Eof: // Client disconnected
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (msvensson:3215) Bug#42056 | Magnus Svensson | 14 Jan 2009 |