MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Stewart Smith Date:July 3 2006 5:38am
Subject:bk commit into 5.0 tree (stewart:1.2138) BUG#13985
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of stewart. When stewart does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2138 06/07/03 15:37:57 stewart@stripped +5 -0
  BUG#13985 ndb_mgm "status" command can return incorrect data node status
  
  Second half of the fix for this bug.
  
  This patch forces a heartbeat to be sent and will wait (a little while)
  for replies. This way we can get
  
  > all status
  X starting
  Y started
  X started
  >
  
  which is okay as the new status comes after the old status, always.
  There is the slimmest of opportunities to get output like above where only half
  the cluster appears started.
  
  This is about the best we can do with a command line interactive program.

  ndb/src/ndbapi/ClusterMgr.hpp
    1.8 06/07/03 15:37:51 stewart@stripped +8 -2
    Add ::forceHB(NodeBitmask) and associated variables

  ndb/src/ndbapi/ClusterMgr.cpp
    1.23 06/07/03 15:37:51 stewart@stripped +67 -4
    new DEBUG_REG define for debugging registration and HB code.
    
    Add ClusterMgr::forceHB(NodeBitmask) which sends a HB signal to each node in
    the bitmask and then waits for a REGCONF from them.
    Will only wait for a total of 1 second, not blocking an end client for too long.
    
    On receipt of HB, clear the nodeId in the waiting for bitmask and signal any
    waiting threads.

  ndb/src/mgmsrv/Services.cpp
    1.65 06/07/03 15:37:51 stewart@stripped +3 -0
    When status is queried, force an update of the status in the mgm server. (i.e. send heartbeats)

  ndb/src/mgmsrv/MgmtSrvr.hpp
    1.44 06/07/03 15:37:51 stewart@stripped +2 -0
    add prototype for updateStatus(NodeBitmask) method

  ndb/src/mgmsrv/MgmtSrvr.cpp
    1.100 06/07/03 15:37:51 stewart@stripped +6 -0
    Add updateStatus method to MgmtSrvr.
    
    Used to force an update of node status for the nodes.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	stewart
# Host:	willster.(none)
# Root:	/home/stewart/Documents/MySQL/5.0/bug13985

--- 1.99/ndb/src/mgmsrv/MgmtSrvr.cpp	2006-05-16 19:49:54 +10:00
+++ 1.100/ndb/src/mgmsrv/MgmtSrvr.cpp	2006-07-03 15:37:51 +10:00
@@ -1412,6 +1412,12 @@
 
 #include <ClusterMgr.hpp>
 
+void
+MgmtSrvr::updateStatus(NodeBitmask nodes)
+{
+  theFacade->theClusterMgr->forceHB(nodes);
+}
+
 int 
 MgmtSrvr::status(int nodeId, 
                  ndb_mgm_node_status * _status, 

--- 1.43/ndb/src/mgmsrv/MgmtSrvr.hpp	2006-04-26 23:55:24 +10:00
+++ 1.44/ndb/src/mgmsrv/MgmtSrvr.hpp	2006-07-03 15:37:51 +10:00
@@ -487,6 +487,8 @@
   void get_connected_nodes(NodeBitmask &connected_nodes) const;
   SocketServer *get_socket_server() { return m_socket_server; }
 
+  void updateStatus(NodeBitmask nodes);
+
   //**************************************************************************
 private:
   //**************************************************************************

--- 1.64/ndb/src/mgmsrv/Services.cpp	2006-05-08 15:56:24 +10:00
+++ 1.65/ndb/src/mgmsrv/Services.cpp	2006-07-03 15:37:51 +10:00
@@ -951,6 +951,9 @@
 		MgmtSrvr &mgmsrv,
 		enum ndb_mgm_node_type type) {
   NodeId nodeId = 0;
+  NodeBitmask hbnodes;
+  mgmsrv.get_connected_nodes(hbnodes);
+  mgmsrv.updateStatus(hbnodes);
   while(mgmsrv.getNextNodeId(&nodeId, type)) {
     enum ndb_mgm_node_status status;
     Uint32 startPhase = 0, 

--- 1.22/ndb/src/ndbapi/ClusterMgr.cpp	2006-05-16 02:44:12 +10:00
+++ 1.23/ndb/src/ndbapi/ClusterMgr.cpp	2006-07-03 15:37:51 +10:00
@@ -39,6 +39,8 @@
 
 int global_flag_send_heartbeat_now= 0;
 
+//#define DEBUG_REG
+
 // Just a C wrapper for threadMain
 extern "C" 
 void*
@@ -67,6 +69,8 @@
   DBUG_ENTER("ClusterMgr::ClusterMgr");
   ndbSetOwnVersion();
   clusterMgrThreadMutex = NdbMutex_Create();
+  waitForHBMutex= NdbMutex_Create();
+  waitForHBCond= NdbCondition_Create();
   noOfAliveNodes= 0;
   noOfConnectedNodes= 0;
   theClusterMgrThread= 0;
@@ -77,7 +81,9 @@
 ClusterMgr::~ClusterMgr()
 {
   DBUG_ENTER("ClusterMgr::~ClusterMgr");
-  doStop();  
+  doStop();
+  NdbCondition_Destroy(waitForHBCond);
+  NdbMutex_Destroy(waitForHBMutex);
   NdbMutex_Destroy(clusterMgrThreadMutex);
   DBUG_VOID_RETURN;
 }
@@ -164,6 +170,49 @@
 }
 
 void
+ClusterMgr::forceHB(NodeBitmask waitFor)
+{
+    theFacade.lock_mutex();
+    global_flag_send_heartbeat_now= 1;
+
+    waitForHBFromNodes= waitFor;
+#ifdef DEBUG_REG
+    char buf[128];
+    ndbout << "Waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
+#endif
+    NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId()));
+
+    signal.theVerId_signalNumber   = GSN_API_REGREQ;
+    signal.theReceiversBlockNumber = QMGR;
+    signal.theTrace                = 0;
+    signal.theLength               = ApiRegReq::SignalLength;
+
+    ApiRegReq * req = CAST_PTR(ApiRegReq, signal.getDataPtrSend());
+    req->ref = numberToRef(API_CLUSTERMGR, theFacade.ownId());
+    req->version = NDB_VERSION;
+
+    int nodeId= 0;
+    for(int i=0;
+        NodeBitmask::NotFound!=(nodeId= waitForHBFromNodes.find(i));
+        i= nodeId+1)
+    {
+#ifdef DEBUG_REG
+      ndbout << "FORCE HB to " << nodeId << endl;
+#endif
+      theFacade.sendSignalUnCond(&signal, nodeId);
+    }
+
+    theFacade.unlock_mutex();
+
+    NdbMutex_Lock(waitForHBMutex);
+    NdbCondition_WaitTimeout(waitForHBCond, waitForHBMutex, 1000);
+    NdbMutex_Unlock(waitForHBMutex);
+#ifdef DEBUG_REG
+    ndbout << "Still waiting for HB from " << waitForHBFromNodes.getText(buf) << endl;
+#endif
+}
+
+void
 ClusterMgr::threadMain( ){
   NdbApiSignal signal(numberToRef(API_CLUSTERMGR, theFacade.ownId()));
   
@@ -226,7 +275,7 @@
 	if (theNode.m_info.m_type == NodeInfo::REP) {
 	  signal.theReceiversBlockNumber = API_CLUSTERMGR;
 	}
-#if 0 
+#ifdef DEBUG_REG
 	ndbout_c("ClusterMgr: Sending API_REGREQ to node %d", (int)nodeId);
 #endif
 	theFacade.sendSignalUnCond(&signal, nodeId);
@@ -278,7 +327,7 @@
   const ApiRegReq * const apiRegReq = (ApiRegReq *)&theData[0];
   const NodeId nodeId = refToNode(apiRegReq->ref);
 
-#if 0
+#ifdef DEBUG_REG
   ndbout_c("ClusterMgr: Recd API_REGREQ from node %d", nodeId);
 #endif
 
@@ -319,7 +368,7 @@
   const ApiRegConf * const apiRegConf = (ApiRegConf *)&theData[0];
   const NodeId nodeId = refToNode(apiRegConf->qmgrRef);
   
-#if 0 
+#ifdef DEBUG_REG
   ndbout_c("ClusterMgr: Recd API_REGCONF from node %d", nodeId);
 #endif
 
@@ -351,6 +400,13 @@
   if (node.m_info.m_type != NodeInfo::REP) {
     node.hbFrequency = (apiRegConf->apiHeartbeatFrequency * 10) - 50;
   }
+  waitForHBFromNodes.clear(nodeId);
+  if(waitForHBFromNodes.isclear())
+  {
+    NdbMutex_Lock(waitForHBMutex);
+    NdbCondition_Signal(waitForHBCond);
+    NdbMutex_Unlock(waitForHBMutex);
+  }
 }
 
 void
@@ -378,6 +434,13 @@
   case ApiRegRef::UnsupportedVersion:
   default:
     break;
+  }
+  waitForHBFromNodes.clear(nodeId);
+  if(waitForHBFromNodes.isclear())
+  {
+    NdbMutex_Lock(waitForHBMutex);
+    NdbCondition_Signal(waitForHBCond);
+    NdbMutex_Unlock(waitForHBMutex);
   }
 }
 

--- 1.7/ndb/src/ndbapi/ClusterMgr.hpp	2006-05-16 02:44:12 +10:00
+++ 1.8/ndb/src/ndbapi/ClusterMgr.hpp	2006-07-03 15:37:51 +10:00
@@ -49,7 +49,9 @@
 
   void doStop();
   void startThread();
-  
+
+  void forceHB(NodeBitmask waitFor);
+
 private:
   void threadMain();
   
@@ -85,7 +87,11 @@
   Uint32        noOfConnectedNodes;
   Node          theNodes[MAX_NODES];
   NdbThread*    theClusterMgrThread;
-  
+
+  NodeBitmask   waitForHBFromNodes; // used in forcing HBs
+  NdbMutex*     waitForHBMutex;
+  NdbCondition* waitForHBCond;
+
   /**
    * Used for controlling start/stop of the thread
    */
Thread
bk commit into 5.0 tree (stewart:1.2138) BUG#13985Stewart Smith3 Jul