List:Commits« Previous MessageNext Message »
From:Magnus Svensson Date:January 14 2009 6:32pm
Subject:bzr commit into mysql-5.1 branch (msvensson:3215) Bug#42056
View as plain text  
#At file:///home/msvensson/mysql/6.4-bug42056/ based on
revid:msvensson@stripped

 3215 Magnus Svensson	2009-01-14
      Bug#42056 ndb_mgmd hang on STOPing the management node
       - Calculate difference between start and curr time in a unsigned safe way
         so that wait loop terminates properly if all nodes has not reached the given
         state in time.
       - Improve printouts and send them to log instead of stdout
       - Check if session thread should exit becuase it has been stopped
         also in the case when for eaxmple a read timeout occurs. The default
         calue of read timeout is 30 seconds, so it should be expected that a shutdown
         of ndb_mgmd may take 30 seconds to complete.
modified:
  storage/ndb/src/mgmsrv/MgmtSrvr.cpp
  storage/ndb/src/mgmsrv/Services.cpp

=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.cpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp	2009-01-08 15:41:27 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp	2009-01-14 17:32:38 +0000
@@ -1164,7 +1164,7 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<
     for (unsigned i= 0; i < node_ids.size(); i++)
     {
       nodeId= node_ids[i];
-      ndbout << "asked to stop " << nodeId << endl;
+      g_eventLogger->info("Going to stop node %d", nodeId);
 
       if ((getNodeType(nodeId) != NDB_MGM_NODE_TYPE_MGM)
           &&(getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB))
@@ -1181,7 +1181,7 @@ int MgmtSrvr::sendSTOP_REQ(const Vector<
       }
       else
       {
-        ndbout << "which is me" << endl;
+        g_eventLogger->info("Stopping this node");
         *stopSelf= (restart)? -1 : 1;
         stoppedNodes.set(nodeId);
       }
@@ -1505,8 +1505,8 @@ int MgmtSrvr::restartNodes(const Vector<
     *stopCount = nodes.count();
   
   // start up the nodes again
-  NDB_TICKS waitTime = 12000;
-  NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
+  const NDB_TICKS waitTime = 12000;
+  const NDB_TICKS startTime = NdbTick_CurrentMillisecond();
   for (unsigned i = 0; i < node_ids.size(); i++)
   {
     NodeId nodeId= node_ids[i];
@@ -1515,7 +1515,8 @@ int MgmtSrvr::restartNodes(const Vector<
 #ifdef VM_TRACE
     ndbout_c("Waiting for %d not started", nodeId);
 #endif
-    while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0)
+    while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
+           (NdbTick_CurrentMillisecond() - startTime) < waitTime)
     {
       Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
       Uint32 mysql_version = 0;
@@ -1525,7 +1526,6 @@ int MgmtSrvr::restartNodes(const Vector<
       status(nodeId, &s, &version, &mysql_version, &startPhase, 
              &system, &dynamicId, &nodeGroup, &connectCount,
&address);
       NdbSleep_MilliSleep(100);  
-      waitTime = (maxTime - NdbTick_CurrentMillisecond());
     }
   }
 
@@ -1588,9 +1588,9 @@ int MgmtSrvr::restartDB(bool nostart, bo
    * Here all nodes were correctly stopped,
    * so we wait for all nodes to be contactable
    */
-  NDB_TICKS waitTime = 12000;
   NodeId nodeId = 0;
-  NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
+  const NDB_TICKS waitTime = 12000;
+  const NDB_TICKS startTime = NdbTick_CurrentMillisecond();
 
   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
     if (!nodes.get(nodeId))
@@ -1600,7 +1600,9 @@ int MgmtSrvr::restartDB(bool nostart, bo
 #ifdef VM_TRACE
     ndbout_c("Waiting for %d not started", nodeId);
 #endif
-    while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0) {
+    while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
+           (NdbTick_CurrentMillisecond() - startTime) < waitTime)
+    {
       Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
       Uint32 mysql_version = 0;
       Uint32 connectCount = 0;
@@ -1609,7 +1611,6 @@ int MgmtSrvr::restartDB(bool nostart, bo
       status(nodeId, &s, &version, &mysql_version, &startPhase, 
 	     &system, &dynamicId, &nodeGroup, &connectCount, &address);
       NdbSleep_MilliSleep(100);  
-      waitTime = (maxTime - NdbTick_CurrentMillisecond());
     }
   }
   

=== modified file 'storage/ndb/src/mgmsrv/Services.cpp'
--- a/storage/ndb/src/mgmsrv/Services.cpp	2009-01-08 15:41:27 +0000
+++ b/storage/ndb/src/mgmsrv/Services.cpp	2009-01-14 17:32:38 +0000
@@ -386,11 +386,12 @@ MgmApiSession::runSession()
 
     if (m_parser->run(ctx, *this))
     {
-      stop= m_stop;
+      stop= m_stop; // Has session been stopped
       assert(ctx.m_status == Parser_t::Ok);
     }
     else
     {
+      stop= m_stop; // Has session been stopped
       const char* msg= NULL;
       switch(ctx.m_status) {
       case Parser_t::Eof:    // Client disconnected

Thread
bzr commit into mysql-5.1 branch (msvensson:3215) Bug#42056Magnus Svensson14 Jan 2009