List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:February 18 2009 2:34pm
Subject:bzr commit into mysql-5.1-telco-6.3 branch (jonas:2860)
View as plain text  
#At file:///home/jonas/src/telco-6.3/

 2860 Jonas Oreland	2009-02-18 [merge]
      merge 62 to 63
modified:
  storage/ndb/include/kernel/signaldata/AllocNodeId.hpp
  storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
  storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
  storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
  storage/ndb/src/mgmsrv/MgmtSrvr.cpp
  storage/ndb/src/mgmsrv/MgmtSrvr.hpp
  storage/ndb/src/mgmsrv/Services.cpp

=== modified file 'storage/ndb/include/kernel/signaldata/AllocNodeId.hpp'
--- a/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp	2006-12-27 01:23:51 +0000
+++ b/storage/ndb/include/kernel/signaldata/AllocNodeId.hpp	2009-02-18 14:29:58 +0000
@@ -24,21 +24,28 @@
  */
 class AllocNodeIdReq {
 public:
-  STATIC_CONST( SignalLength = 4 );
+  STATIC_CONST( SignalLength = 5 );
+  STATIC_CONST( SignalLengthQMGR = 7 );
 
   Uint32 senderRef;
   Uint32 senderData;
   Uint32 nodeId;
   Uint32 nodeType;
+  Uint32 timeout;
+
+  Uint32 secret_lo;
+  Uint32 secret_hi;
 };
 
 class AllocNodeIdConf {
 public:
-  STATIC_CONST( SignalLength = 3 );
+  STATIC_CONST( SignalLength = 5 );
 
   Uint32 senderRef;
   Uint32 senderData;
   Uint32 nodeId;
+  Uint32 secret_lo;
+  Uint32 secret_hi;
 };
 
 class AllocNodeIdRef {

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2009-02-18 14:29:58 +0000
@@ -156,6 +156,8 @@ public:
     QmgrState sendPresToStatus;
     FailState failState;
     BlockReference blockRef;
+    Uint64 m_secret;
+    Uint64 m_alloc_timeout;
 
     NodeRec() { }
   }; /* p2c: size = 52 bytes */
@@ -308,7 +310,7 @@ private:
   void electionWon(Signal* signal);
   void cmInfoconf010Lab(Signal* signal);
   
-  void apiHbHandlingLab(Signal* signal);
+  void apiHbHandlingLab(Signal* signal, Uint64 now);
   void timerHandlingLab(Signal* signal);
   void hbReceivedLab(Signal* signal);
   void sendCmRegrefLab(Signal* signal, BlockReference ref, 

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp	2008-11-13 13:15:56 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp	2009-02-18 14:29:58 +0000
@@ -27,6 +27,10 @@ void Qmgr::initData() 
 
   // Records with constant sizes
   nodeRec = new NodeRec[MAX_NODES];
+  for (Uint32 i = 0; i<MAX_NODES; i++)
+  {
+    nodeRec[i].m_secret = 0;
+  }
 
   cnoCommitFailedNodes = 0;
   c_maxDynamicId = 0;

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2009-01-29 10:59:51 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2009-02-18 14:33:50 +0000
@@ -436,9 +436,14 @@ void Qmgr::execCONNECT_REP(Signal* signa
     infoEvent("Discarding CONNECT_REP(%d)", nodeId);
     return;
   }
-  
+
   c_connectedNodes.set(nodeId);
+
   NodeRecPtr nodePtr;
+  nodePtr.i = nodeId;
+  ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
+  nodePtr.p->m_secret = 0;
+
   nodePtr.i = getOwnNodeId();
   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
   NodeInfo nodeInfo = getNodeInfo(nodeId);
@@ -2367,7 +2372,7 @@ void Qmgr::timerHandlingLab(Signal* sign
   {
     jam();
     hb_api_timer.reset();
-    apiHbHandlingLab(signal);
+    apiHbHandlingLab(signal, TcurrentTime);
   }
 
   if (cactivateApiCheck != 0) {
@@ -2473,7 +2478,7 @@ void Qmgr::checkHeartbeat(Signal* signal
   }//if
 }//Qmgr::checkHeartbeat()
 
-void Qmgr::apiHbHandlingLab(Signal* signal) 
+void Qmgr::apiHbHandlingLab(Signal* signal, Uint64 now)
 {
   NodeRecPtr TnodePtr;
 
@@ -2518,6 +2523,14 @@ void Qmgr::apiHbHandlingLab(Signal* sign
         api_failed(signal, nodeId);
       }//if
     }//if
+    else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
+             TnodePtr.p->m_secret != 0 && now > TnodePtr.p->m_alloc_timeout)
+    {
+      jam();
+      TnodePtr.p->m_secret = 0;
+      warningEvent("Releasing node id allocation for node %u",
+                   TnodePtr.i);
+    }
   }//for
   return;
 }//Qmgr::apiHbHandlingLab()
@@ -2552,6 +2565,7 @@ void Qmgr::checkStartInterface(Signal* s
 	 * IS COMPLETE.
 	 *-------------------------------------------------------------------*/
         nodePtr.p->failState = NORMAL;
+        nodePtr.p->m_secret = 0;
         Uint32 type = getNodeInfo(nodePtr.i).m_type;
         switch(type){
         case NodeInfo::DB:
@@ -2848,6 +2862,7 @@ void Qmgr::node_failed(Signal* signal, U
    *-----------------------------------------------------------------------*/
   failedNodePtr.i = aFailedNode;
   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
+  failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
 
   ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
   
@@ -2918,7 +2933,8 @@ Qmgr::api_failed(Signal* signal, Uint32 
    *-----------------------------------------------------------------------*/
   failedNodePtr.i = nodeId;
   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
-  
+  failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
+
   if (failedNodePtr.p->phase == ZFAIL_CLOSING)
   {
     /**
@@ -5337,22 +5353,37 @@ void
 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
 {
   jamEntry();
-  const AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtr();
-  Uint32 senderRef = req->senderRef;
-  Uint32 nodeId = req->nodeId;
-  Uint32 nodeType = req->nodeType;
+  AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
   Uint32 error = 0;
 
-  if (refToBlock(senderRef) != QMGR) // request from management server
+  NodeRecPtr nodePtr;
+  nodePtr.i = req.nodeId;
+  ptrAss(nodePtr, nodeRec);
+
+  if (refToBlock(req.senderRef) != QMGR) // request from management server
   {
     /* master */
 
     if (getOwnNodeId() != cpresident)
+    {
+      jam();
       error = AllocNodeIdRef::NotMaster;
+    }
     else if (!opAllocNodeIdReq.m_tracker.done())
+    {
+      jam();
       error = AllocNodeIdRef::Busy;
-    else if (c_connectedNodes.get(nodeId))
+    }
+    else if (c_connectedNodes.get(req.nodeId))
+    {
+      jam();
       error = AllocNodeIdRef::NodeConnected;
+    }
+    else if (nodePtr.p->m_secret != 0)
+    {
+      jam();
+      error = AllocNodeIdRef::NodeReserved;
+    }
 
     if (error)
     {
@@ -5361,60 +5392,99 @@ Qmgr::execALLOC_NODEID_REQ(Signal * sign
       ref->senderRef = reference();
       ref->errorCode = error;
       ref->masterRef = numberToRef(QMGR, cpresident);
-      sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
+      ref->senderData = req.senderData;
+      ref->nodeId = req.nodeId;
+      sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
                  AllocNodeIdRef::SignalLength, JBB);
       return;
     }
 
-    if (ERROR_INSERTED(934) && nodeId != getOwnNodeId())
+    if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
     {
       CRASH_INSERTION(934);
     }
+
+    /**
+     * generate secret
+     */
+    Uint64 now = NdbTick_CurrentMillisecond();
+    Uint32 secret_hi = now >> 24;
+    Uint32 secret_lo = Uint32(now << 8) + getOwnNodeId();
+    req.secret_hi = secret_hi;
+    req.secret_lo = secret_lo;
+
+    if (req.timeout > 60000)
+      req.timeout = 60000;
+
+    nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
+    nodePtr.p->m_alloc_timeout = now + req.timeout;
     
-    opAllocNodeIdReq.m_req = *req;
+    opAllocNodeIdReq.m_req = req;
     opAllocNodeIdReq.m_error = 0;
-    opAllocNodeIdReq.m_connectCount = getNodeInfo(refToNode(senderRef)).m_connectCount;
+    opAllocNodeIdReq.m_connectCount =
+      getNodeInfo(refToNode(req.senderRef)).m_connectCount;
 
     jam();
-    AllocNodeIdReq * req = (AllocNodeIdReq*)signal->getDataPtrSend();
-    req->senderRef = reference();
+    AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
+    * req2 = req;
+    req2->senderRef = reference();
     NodeReceiverGroup rg(QMGR, c_clusterNodes);
     RequestTracker & p = opAllocNodeIdReq.m_tracker;
     p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
 
     sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
-               AllocNodeIdReq::SignalLength, JBB);
+               AllocNodeIdReq::SignalLengthQMGR, JBB);
     return;
   }
 
   /* participant */
-
-  if (c_connectedNodes.get(nodeId))
+  if (c_connectedNodes.get(req.nodeId))
+  {
+    jam();
     error = AllocNodeIdRef::NodeConnected;
-  else
+  }
+  else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
   {
-    NodeRecPtr nodePtr;
-    nodePtr.i = nodeId;
-    ptrAss(nodePtr, nodeRec);
-    if (nodeType != getNodeInfo(nodeId).m_type)
-      error = AllocNodeIdRef::NodeTypeMismatch;
-    else if (nodePtr.p->failState != NORMAL)
-      error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
+    jam();
+    error = AllocNodeIdRef::NodeTypeMismatch;
+  }
+  else if (nodePtr.p->failState != NORMAL)
+  {
+    jam();
+    error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
+  }
+#if 0
+  /**
+   * For now only make "time/secret" based reservation on master
+   *   as we otherwise also need to clear it on failure + handle
+   *   master failure
+   */
+  else if (nodePtr.p->m_secret != 0)
+  {
+    jam();
+    error = AllocNodeIdRef::NodeReserved;
   }
+#endif
 
   if (error)
   {
+    jam();
     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
     ref->senderRef = reference();
     ref->errorCode = error;
-    sendSignal(senderRef, GSN_ALLOC_NODEID_REF, signal,
+    ref->senderData = req.senderData;
+    ref->nodeId = req.nodeId;
+    ref->masterRef = numberToRef(QMGR, cpresident);
+    sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
                AllocNodeIdRef::SignalLength, JBB);
     return;
   }
 
   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
   conf->senderRef = reference();
-  sendSignal(senderRef, GSN_ALLOC_NODEID_CONF, signal,
+  conf->secret_hi = req.secret_hi;
+  conf->secret_lo = req.secret_lo;
+  sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
              AllocNodeIdConf::SignalLength, JBB);
 }
 
@@ -5427,6 +5497,22 @@ Qmgr::execALLOC_NODEID_CONF(Signal * sig
   const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
   opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
                                         refToNode(conf->senderRef));
+
+  if (signal->getLength() >= AllocNodeIdConf::SignalLength)
+  {
+    jam();
+    if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
+        opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
+    {
+      jam();
+      if (opAllocNodeIdReq.m_error == 0)
+      {
+        jam();
+        opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
+      }
+    }
+  }
+
   completeAllocNodeIdReq(signal);
 }
 
@@ -5440,15 +5526,20 @@ Qmgr::execALLOC_NODEID_REF(Signal * sign
   const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
   if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
   {
+    jam();
     opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
                                          refToNode(ref->senderRef));    
   }
   else
   {
+    jam();
     opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
                                          refToNode(ref->senderRef));
     if (opAllocNodeIdReq.m_error == 0)
+    {
+      jam();
       opAllocNodeIdReq.m_error = ref->errorCode;
+    }
   }
   completeAllocNodeIdReq(signal);
 }
@@ -5475,6 +5566,17 @@ Qmgr::completeAllocNodeIdReq(Signal *sig
   if (opAllocNodeIdReq.m_tracker.hasRef())
   {
     jam();
+
+    {
+      /**
+       * Clear reservation
+       */
+      NodeRecPtr nodePtr;
+      nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
+      ptrAss(nodePtr, nodeRec);
+      nodePtr.p->m_secret = 0;
+    }
+
     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
     ref->senderRef = reference();
     ref->senderData = opAllocNodeIdReq.m_req.senderData;
@@ -5486,12 +5588,15 @@ Qmgr::completeAllocNodeIdReq(Signal *sig
                AllocNodeIdRef::SignalLength, JBB);
     return;
   }
+
   jam();
+
   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
   conf->senderRef = reference();
   conf->senderData = opAllocNodeIdReq.m_req.senderData;
   conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
-  ndbassert(AllocNodeIdConf::SignalLength == 3);
+  conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
+  conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
   sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
              AllocNodeIdConf::SignalLength, JBB);
 }

=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.cpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.cpp	2009-01-08 15:25:14 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.cpp	2009-02-18 14:33:50 +0000
@@ -496,7 +496,7 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_
     int error_code;
 
     if (!alloc_node_id(&tmp, NDB_MGM_NODE_TYPE_MGM,
-		       0, 0, error_code, error_string)){
+		       0, 0, error_code, error_string, 20)){
       ndbout << "Unable to obtain requested nodeid: "
 	     << error_string.c_str() << endl;
       require(false);
@@ -2144,7 +2144,9 @@ MgmtSrvr::get_connected_nodes(NodeBitmas
 }
 
 int
-MgmtSrvr::alloc_node_id_req(NodeId free_node_id, enum ndb_mgm_node_type type)
+MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
+                            enum ndb_mgm_node_type type,
+                            Uint32 timeout_ms)
 {
   SignalSender ss(theFacade);
   ss.lock(); // lock will be released on exit
@@ -2158,6 +2160,7 @@ MgmtSrvr::alloc_node_id_req(NodeId free_
   req->senderData = 19;
   req->nodeId = free_node_id;
   req->nodeType = type;
+  req->timeout = timeout_ms;
 
   int do_send = 1;
   NodeId nodeId = 0;
@@ -2241,141 +2244,165 @@ MgmtSrvr::alloc_node_id_req(NodeId free_
   return 0;
 }
 
-bool
-MgmtSrvr::alloc_node_id(NodeId * nodeId, 
-			enum ndb_mgm_node_type type,
-			struct sockaddr *client_addr, 
-			SOCKET_SIZE_TYPE *client_addr_len,
-			int &error_code, BaseString &error_string,
-                        int log_event)
+int
+MgmtSrvr::match_hostname(const struct sockaddr *clnt_addr,
+                         const char *config_hostname) const
 {
-  DBUG_ENTER("MgmtSrvr::alloc_node_id");
-  DBUG_PRINT("enter", ("nodeid: %d  type: %d  client_addr: 0x%ld",
-		       *nodeId, type, (long) client_addr));
-  if (g_no_nodeid_checks) {
-    if (*nodeId == 0) {
-      error_string.appfmt("no-nodeid-checks set in management server.\n"
-			  "node id must be set explicitly in connectstring");
-      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-      DBUG_RETURN(false);
+  struct in_addr config_addr= {0};
+  if (clnt_addr)
+  {
+    const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
+
+    if (Ndb_getInAddr(&config_addr, config_hostname) != 0
+        || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
+    {
+      struct in_addr tmp_addr;
+      if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
+          || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
+      {
+        // not localhost
+#if 0
+        ndbout << "MgmtSrvr::getFreeNodeId compare failed for \""
+               << config_hostname
+               << "\" id=" << tmp << endl;
+#endif
+        return -1;
+      }
+
+      // connecting through localhost
+      // check if config_hostname is local
+      if (!SocketServer::tryBind(0, config_hostname))
+        return -1;
     }
-    DBUG_RETURN(true);
   }
-  Guard g(m_node_id_mutex);
-  int no_mgm= 0;
-  NodeBitmask connected_nodes(m_reserved_nodes);
-  get_connected_nodes(connected_nodes);
+  else
   {
-    for(Uint32 i = 0; i < MAX_NODES; i++)
-      if (getNodeType(i) == NDB_MGM_NODE_TYPE_MGM)
-	no_mgm++;
+    if (!SocketServer::tryBind(0, config_hostname))
+      return -1;
   }
-  bool found_matching_id= false;
-  bool found_matching_type= false;
-  bool found_free_node= false;
-  unsigned id_found= 0;
-  const char *config_hostname= 0;
-  struct in_addr config_addr= {0};
-  int r_config_addr= -1;
-  unsigned type_c= 0;
+  return 0;
+}
+
+int
+MgmtSrvr::find_node_type(unsigned node_id, enum ndb_mgm_node_type type,
+                         const struct sockaddr *client_addr,
+                         NodeBitmask &nodes,
+                         NodeBitmask &exact_nodes,
+                         Vector<struct nodeid_and_host> &nodes_info,
+                         int &error_code, BaseString &error_string)
+{
+  const char *found_config_hostname= 0;
+  unsigned type_c= (unsigned)type;
+
+  Guard g(m_configMutex);
 
-  if(NdbMutex_Lock(m_configMutex))
-  {
-    // should not happen
-    error_string.appfmt("unable to lock configuration mutex");
-    error_code = NDB_MGM_ALLOCID_ERROR;
-    DBUG_RETURN(false);
-  }
   ndb_mgm_configuration_iterator
     iter(* _config->m_configValues, CFG_SECTION_NODE);
-  for(iter.first(); iter.valid(); iter.next()) {
-    unsigned tmp= 0;
-    if(iter.get(CFG_NODE_ID, &tmp)) require(false);
-    if (*nodeId && *nodeId != tmp)
-      continue;
-    found_matching_id= true;
-    if(iter.get(CFG_TYPE_OF_SECTION, &type_c)) require(false);
-    if(type_c != (unsigned)type)
-      continue;
-    found_matching_type= true;
-    if (connected_nodes.get(tmp))
+  for(iter.first(); iter.valid(); iter.next())
+  {
+    unsigned id;
+    if (iter.get(CFG_NODE_ID, &id))
+      require(false);
+    if (node_id && node_id != id)
       continue;
-    found_free_node= true;
-    if(iter.get(CFG_NODE_HOST, &config_hostname)) require(false);
-    if (config_hostname && config_hostname[0] == 0)
-      config_hostname= 0;
-    else if (client_addr) {
-      // check hostname compatability
-      const void *tmp_in= &(((sockaddr_in*)client_addr)->sin_addr);
-      if((r_config_addr= Ndb_getInAddr(&config_addr, config_hostname)) != 0
-	 || memcmp(&config_addr, tmp_in, sizeof(config_addr)) != 0) {
-	struct in_addr tmp_addr;
-	if(Ndb_getInAddr(&tmp_addr, "localhost") != 0
-	   || memcmp(&tmp_addr, tmp_in, sizeof(config_addr)) != 0) {
-	  // not localhost
-#if 0
-	  ndbout << "MgmtSrvr::getFreeNodeId compare failed for \""
-		 << config_hostname
-		 << "\" id=" << tmp << endl;
-#endif
-	  continue;
-	}
-	// connecting through localhost
-	// check if config_hostname is local
-	if (!SocketServer::tryBind(0,config_hostname)) {
-	  continue;
-	}
-      }
-    } else { // client_addr == 0
-      if (!SocketServer::tryBind(0,config_hostname)) {
-	continue;
+    if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
+      require(false);
+    if (type_c != (unsigned)type)
+    {
+      if (!node_id)
+        continue;
+      goto error;
+    }
+    const char *config_hostname= 0;
+    if (iter.get(CFG_NODE_HOST, &config_hostname))
+      require(false);
+    if (config_hostname == 0 || config_hostname[0] == 0)
+    {
+      config_hostname= "";
+    }
+    else
+    {
+      found_config_hostname= config_hostname;
+      if (match_hostname(client_addr, config_hostname))
+      {
+        if (!node_id)
+          continue;
+        goto error;
       }
+      exact_nodes.set(id);
     }
-    if (*nodeId != 0 ||
-	type != NDB_MGM_NODE_TYPE_MGM ||
-	no_mgm == 1) { // any match is ok
-
-      if (config_hostname == 0 &&
-	  *nodeId == 0 &&
-	  type != NDB_MGM_NODE_TYPE_MGM)
-      {
-	if (!id_found) // only set if not set earlier
-	  id_found= tmp;
-	continue; /* continue looking for a nodeid with specified
-		   * hostname
-		   */
-      }
-      assert(id_found == 0);
-      id_found= tmp;
-      break;
-    }
-    if (id_found) { // mgmt server may only have one match
-      error_string.appfmt("Ambiguous node id's %d and %d.\n"
-			  "Suggest specifying node id in connectstring,\n"
-			  "or specifying unique host names in config file.",
-			  id_found, tmp);
-      NdbMutex_Unlock(m_configMutex);
-      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-      DBUG_RETURN(false);
+    nodes.set(id);
+    struct nodeid_and_host a= {id, config_hostname};
+    nodes_info.push_back(a);
+    if (node_id)
+      break;
+  }
+  if (nodes_info.size() != 0)
+  {
+    return 0;
+  }
+
+ error:
+  /*
+    lock on m_configMutex held because found_config_hostname may have
+    reference inot config structure
+  */
+  error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+  if (node_id)
+  {
+    if (type_c != (unsigned) type)
+    {
+      BaseString type_string, type_c_string;
+      const char *alias, *str;
+      alias= ndb_mgm_get_node_type_alias_string(type, &str);
+      type_string.assfmt("%s(%s)", alias, str);
+      alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
+                                                &str);
+      type_c_string.assfmt("%s(%s)", alias, str);
+      error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
+                          node_id, type_c_string.c_str(),
+                          type_string.c_str());
+      return -1;
     }
-    if (config_hostname == 0) {
-      error_string.appfmt("Ambiguity for node id %d.\n"
-			  "Suggest specifying node id in connectstring,\n"
-			  "or specifying unique host names in config file,\n"
-			  "or specifying just one mgmt server in config file.",
-			  tmp);
-      NdbMutex_Unlock(m_configMutex);
-      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-      DBUG_RETURN(false);
+    if (found_config_hostname)
+    {
+      struct in_addr config_addr= {0};
+      int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
+      error_string.appfmt("Connection with id %d done from wrong host ip %s,",
+                          node_id, inet_ntoa(((struct sockaddr_in *)
+                                              (client_addr))->sin_addr));
+      error_string.appfmt(" expected %s(%s).", found_config_hostname,
+                          r_config_addr ?
+                          "lookup failed" : inet_ntoa(config_addr));
+      return -1;
     }
-    id_found= tmp; // mgmt server matched, check for more matches
+    error_string.appfmt("No node defined with id=%d in config file.", node_id);
+    return -1;
   }
-  NdbMutex_Unlock(m_configMutex);
 
-  if (id_found && client_addr != 0)
+  // node_id == 0 and nodes_info.size() == 0
+  if (found_config_hostname)
   {
-    int res = alloc_node_id_req(id_found, type);
-    unsigned save_id_found = id_found;
+    error_string.appfmt("Connection done from wrong host ip %s.",
+                        (client_addr)?
+                        inet_ntoa(((struct sockaddr_in *)
+                                   (client_addr))->sin_addr):"");
+    return -1;
+  }
+
+  error_string.append("No nodes defined in config file.");
+  return -1;
+}
+
+int
+MgmtSrvr::try_alloc(unsigned id, const char *config_hostname,
+                    enum ndb_mgm_node_type type,
+                    const struct sockaddr *client_addr,
+                    Uint32 timeout_ms)
+{
+  if (client_addr != 0)
+  {
+    int res = alloc_node_id_req(id, type, timeout_ms);
     switch (res)
     {
     case 0:
@@ -2386,155 +2413,176 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
       break;
     default:
       // something wrong
-      id_found = 0;
-      break;
-
-    }
-    if (id_found == 0)
-    {
-      char buf[128];
-      ndb_error_string(res, buf, sizeof(buf));
-      error_string.appfmt("Cluster refused allocation of id %d. Error: %d (%s).",
-			  save_id_found, res, buf);
-      g_eventLogger->warning("Cluster refused allocation of id %d. "
-                             "Connection from ip %s. "
-                             "Returned error string \"%s\"", save_id_found,
-                             inet_ntoa(((struct sockaddr_in *)
-                                        (client_addr))->sin_addr),
-                             error_string.c_str());
-      DBUG_RETURN(false);
+      return -1;
     }
   }
 
-  if (id_found)
+  DBUG_PRINT("info", ("allocating node id %d",id));
   {
-    *nodeId= id_found;
-    DBUG_PRINT("info", ("allocating node id %d",*nodeId));
+    int r= 0;
+    if (client_addr)
+    {
+      m_connect_address[id]= ((struct sockaddr_in *)client_addr)->sin_addr;
+    }
+    else if (config_hostname)
+    {
+      r= Ndb_getInAddr(&(m_connect_address[id]), config_hostname);
+    }
+    else
     {
-      int r= 0;
-      if (client_addr)
-	m_connect_address[id_found]=
-	  ((struct sockaddr_in *)client_addr)->sin_addr;
-      else if (config_hostname)
-	r= Ndb_getInAddr(&(m_connect_address[id_found]), config_hostname);
-      else {
-	char name[256];
-	r= gethostname(name, sizeof(name));
-	if (r == 0) {
-	  name[sizeof(name)-1]= 0;
-	  r= Ndb_getInAddr(&(m_connect_address[id_found]), name);
-	}
-      }
-      if (r)
-	m_connect_address[id_found].s_addr= 0;
-    }
-    m_reserved_nodes.set(id_found);
-    if (theFacade && id_found != theFacade->ownId())
-    {
-      /**
-       * Make sure we're ready to accept connections from this node
-       */
-      theFacade->lock_mutex();
-      theFacade->doConnect(id_found);
-      theFacade->unlock_mutex();
+      char name[256];
+      r= gethostname(name, sizeof(name));
+      if (r == 0)
+      {
+        name[sizeof(name)-1]= 0;
+        r= Ndb_getInAddr(&(m_connect_address[id]), name);
+      }
+    }
+    if (r)
+    {
+      m_connect_address[id].s_addr= 0;
     }
+  }
+  m_reserved_nodes.set(id);
+  if (theFacade && id != theFacade->ownId())
+  {
+    /**
+     * Make sure we're ready to accept connections from this node
+     */
+    theFacade->lock_mutex();
+    theFacade->doConnect(id);
+    theFacade->unlock_mutex();
+  }
     
-    char tmp_str[128];
-    m_reserved_nodes.getText(tmp_str);
-    g_eventLogger->info("Mgmt server state: nodeid %d reserved for ip %s, "
-                        "m_reserved_nodes %s.",
-                        id_found, get_connect_address(id_found), tmp_str);
+  char tmp_str[128];
+  m_reserved_nodes.getText(tmp_str);
+  g_eventLogger->info("Mgmt server state: nodeid %d reserved for ip %s, "
+                      "m_reserved_nodes %s.",
+                      id, get_connect_address(id), tmp_str);
+
+  return 0;
+}
+
+bool
+MgmtSrvr::alloc_node_id(NodeId * nodeId,
+			enum ndb_mgm_node_type type,
+			const struct sockaddr *client_addr,
+			SOCKET_SIZE_TYPE *client_addr_len,
+			int &error_code, BaseString &error_string,
+                        int log_event,
+                        int timeout_s)
+{
+  DBUG_ENTER("MgmtSrvr::alloc_node_id");
+  DBUG_PRINT("enter", ("nodeid: %d  type: %d  client_addr: 0x%ld",
+		       *nodeId, type, (long) client_addr));
+
+  if (g_no_nodeid_checks) {
+    if (*nodeId == 0) {
+      error_string.appfmt("no-nodeid-checks set in management server.\n"
+			  "node id must be set explicitly in connectstring");
+      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+      DBUG_RETURN(false);
+    }
     DBUG_RETURN(true);
   }
 
-  if (found_matching_type && !found_free_node) {
-    // we have a temporary error which might be due to that 
-    // we have got the latest connect status from db-nodes.  Force update.
-    updateStatus();
-  }
+  Uint32 timeout_ms = Uint32(1000 * timeout_s);
+
+  Guard g(m_node_id_mutex);
+
+  NodeBitmask connected_nodes;
+  get_connected_nodes(connected_nodes);
+
+  NodeBitmask nodes, exact_nodes;
+  Vector<struct nodeid_and_host> nodes_info;
 
-  BaseString type_string, type_c_string;
+  /* find all nodes with correct type */
+  if (find_node_type(*nodeId, type, client_addr, nodes, exact_nodes, nodes_info,
+                     error_code, error_string))
+    goto error;
+
+  // nodes_info.size() == 0 handled inside find_node_type
+  DBUG_ASSERT(nodes_info.size() != 0);
+
+  if (type == NDB_MGM_NODE_TYPE_MGM && nodes_info.size() > 1)
   {
-    const char *alias, *str;
-    alias= ndb_mgm_get_node_type_alias_string(type, &str);
-    type_string.assfmt("%s(%s)", alias, str);
-    alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
-					      &str);
-    type_c_string.assfmt("%s(%s)", alias, str);
+    // mgmt server may only have one match
+    error_string.appfmt("Ambiguous node id's %d and %d.\n"
+                        "Suggest specifying node id in connectstring,\n"
+                        "or specifying unique host names in config file.",
+                        nodes_info[0].id, nodes_info[1].id);
+    error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+    goto error;
   }
 
-  if (*nodeId == 0)
+  /* remove connected and reserved nodes from possible nodes to allocate */
+  nodes.bitANDC(connected_nodes);
+  nodes.bitANDC(m_reserved_nodes);
+
+  /* first try all nodes with exact match of hostname */
+  for (Uint32 i = 0; i < nodes_info.size(); i++)
   {
-    if (found_matching_id)
+    unsigned id= nodes_info[i].id;
+    if (!nodes.get(id))
+      continue;
+
+    if (!exact_nodes.get(id))
+      continue;
+
+    const char *config_hostname= nodes_info[i].host.c_str();
+    if (!try_alloc(id, config_hostname, type, client_addr, timeout_ms))
     {
-      if (found_matching_type)
-      {
-	if (found_free_node)
-        {
-	  error_string.appfmt("Connection done from wrong host ip %s.",
-			      (client_addr)?
-                              inet_ntoa(((struct sockaddr_in *)
-					 (client_addr))->sin_addr):"");
-          error_code = NDB_MGM_ALLOCID_ERROR;
-        }
-	else
-        {
-	  error_string.appfmt("No free node id found for %s.",
-			      type_string.c_str());
-          error_code = NDB_MGM_ALLOCID_ERROR;
-        }
-      }
-      else
-      {
-	error_string.appfmt("No %s node defined in config file.",
-			    type_string.c_str());
-        error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-      }
+      // success
+      *nodeId= id;
+      DBUG_RETURN(true);
     }
-    else
+  }
+
+  /* now try the open nodes */
+  for (Uint32 i = 0; i < nodes_info.size(); i++)
+  {
+    unsigned id= nodes_info[i].id;
+    if (!nodes.get(id))
+      continue;
+
+    /**
+     * exact node tried in loop above
+     */
+    if (exact_nodes.get(id))
+      continue;
+
+    if (!try_alloc(id, NULL, type, client_addr, timeout_ms))
     {
-      error_string.append("No nodes defined in config file.");
-      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+      // success
+      *nodeId= id;
+      DBUG_RETURN(true);
     }
   }
+
+  /*
+    there are nodes with correct type available but
+    allocation failed for some reason
+  */
+  if (*nodeId)
+  {
+    error_string.appfmt("Id %d already allocated by another node.",
+                        *nodeId);
+  }
   else
   {
-    if (found_matching_id)
-    {
-      if (found_matching_type)
-      {
-	if (found_free_node)
-        {
-	  // have to split these into two since inet_ntoa overwrites itself
-	  error_string.appfmt("Connection with id %d done from wrong host ip %s,",
-			      *nodeId, inet_ntoa(((struct sockaddr_in *)
-						  (client_addr))->sin_addr));
-	  error_string.appfmt(" expected %s(%s).", config_hostname,
-			      r_config_addr ?
-			      "lookup failed" : inet_ntoa(config_addr));
-          error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-	}
-        else
-        {
-	  error_string.appfmt("Id %d already allocated by another node.",
-			      *nodeId);
-          error_code = NDB_MGM_ALLOCID_ERROR;
-        }
-      }
-      else
-      {
-	error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
-			    *nodeId, type_c_string.c_str(),
-			    type_string.c_str());
-        error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-      }
-    }
-    else
-    {
-      error_string.appfmt("No node defined with id=%d in config file.",
-			  *nodeId);
-      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
-    }
+    const char *alias, *str;
+    alias= ndb_mgm_get_node_type_alias_string(type, &str);
+    error_string.appfmt("No free node id found for %s(%s).",
+                        alias, str);
+  }
+  error_code = NDB_MGM_ALLOCID_ERROR;
+
+ error:
+  if (error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)
+  {
+    // we have a temporary error which might be due to that
+    // we have got the latest connect status from db-nodes.  Force update.
+    updateStatus();
   }
 
   if (log_event || error_code == NDB_MGM_ALLOCID_CONFIG_MISMATCH)
@@ -2548,27 +2596,35 @@ MgmtSrvr::alloc_node_id(NodeId * nodeId,
 			   : "<none>",
 			   error_string.c_str());
 
-    NodeBitmask connected_nodes2;
-    get_connected_nodes(connected_nodes2);
     BaseString tmp_connected, tmp_not_connected;
     for(Uint32 i = 0; i < MAX_NODES; i++)
     {
-      if (connected_nodes2.get(i))
+      if (connected_nodes.get(i))
       {
-	if (!m_reserved_nodes.get(i))
-	  tmp_connected.appfmt(" %d", i);
+        if (!m_reserved_nodes.get(i))
+        {
+          tmp_connected.appfmt("%d ", i);
+        }
       }
       else if (m_reserved_nodes.get(i))
       {
-	tmp_not_connected.appfmt(" %d", i);
+        tmp_not_connected.appfmt("%d ", i);
       }
     }
+
     if (tmp_connected.length() > 0)
-      g_eventLogger->info("Mgmt server state: node id's %s connected but not reserved", 
-			  tmp_connected.c_str());
+    {
+      g_eventLogger->info
+        ("Mgmt server state: node id's %sconnected but not reserved",
+         tmp_connected.c_str());
+    }
+
     if (tmp_not_connected.length() > 0)
-      g_eventLogger->info("Mgmt server state: node id's %s not connected but reserved",
-			  tmp_not_connected.c_str());
+    {
+      g_eventLogger->info
+        ("Mgmt server state: node id's %snot connected but reserved",
+         tmp_not_connected.c_str());
+    }
   }
   DBUG_RETURN(false);
 }
@@ -3112,3 +3168,4 @@ template class MutexVector<NodeId>;
 template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
 template class Vector<EventSubscribeReq>;
 template class MutexVector<EventSubscribeReq>;
+template class Vector<MgmtSrvr::nodeid_and_host>;

=== modified file 'storage/ndb/src/mgmsrv/MgmtSrvr.hpp'
--- a/storage/ndb/src/mgmsrv/MgmtSrvr.hpp	2009-01-08 15:25:14 +0000
+++ b/storage/ndb/src/mgmsrv/MgmtSrvr.hpp	2009-02-18 14:33:50 +0000
@@ -399,10 +399,11 @@ public:
    */
   bool getNextNodeId(NodeId * _nodeId, enum ndb_mgm_node_type type) const ;
   bool alloc_node_id(NodeId * _nodeId, enum ndb_mgm_node_type type,
-		     struct sockaddr *client_addr,
+		     const struct sockaddr *client_addr,
                      SOCKET_SIZE_TYPE *client_addr_len,
 		     int &error_code, BaseString &error_string,
-                     int log_event = 1);
+                     int log_event = 1,
+                     int timeout_s = 20);
   
   /**
    *
@@ -491,7 +492,9 @@ private:
    */
   int getBlockNumber(const BaseString &blockName);
 
-  int alloc_node_id_req(NodeId free_node_id, enum ndb_mgm_node_type type);
+  int alloc_node_id_req(NodeId free_node_id,
+                        enum ndb_mgm_node_type type,
+                        Uint32 timeout_ms);
 
   int check_nodes_starting();
   int check_nodes_stopping();
@@ -644,6 +647,21 @@ private:
   Config *_props;
 
   ConfigRetriever *m_config_retriever;
+
+  struct nodeid_and_host
+  {
+    unsigned id;
+    BaseString host;
+  };
+  int find_node_type(unsigned node_id, enum ndb_mgm_node_type type,
+                     const struct sockaddr *client_addr,
+                     NodeBitmask &nodes,
+                     NodeBitmask &exact_nodes,
+                     Vector<nodeid_and_host> &nodes_info,
+                     int &error_code, BaseString &error_string);
+  int match_hostname(const struct sockaddr *, const char *) const;
+  int try_alloc(unsigned id,  const char *, enum ndb_mgm_node_type type,
+                const struct sockaddr *client_addr, Uint32 timeout_ms);
 };
 
 inline

=== modified file 'storage/ndb/src/mgmsrv/Services.cpp'
--- a/storage/ndb/src/mgmsrv/Services.cpp	2009-01-08 15:25:14 +0000
+++ b/storage/ndb/src/mgmsrv/Services.cpp	2009-02-18 14:33:50 +0000
@@ -510,7 +510,8 @@ MgmApiSession::get_nodeid(Parser_t::Cont
     while (!m_mgmsrv.alloc_node_id(&tmp, (enum ndb_mgm_node_type)nodetype, 
                                    (struct sockaddr*)&addr, &addrlen,
                                    error_code, error_string,
-                                   tick == 0 ? 0 : log_event))
+                                   tick == 0 ? 0 : log_event,
+                                   timeout))
     {
       /* NDB_MGM_ALLOCID_CONFIG_MISMATCH is a non retriable error */
       if (tick == 0 && error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)

Thread
bzr commit into mysql-5.1-telco-6.3 branch (jonas:2860)Jonas Oreland18 Feb