List:Commits« Previous MessageNext Message »
From:Stewart Smith Date:June 7 2006 4:40pm
Subject:bk commit into 5.0 tree (stewart:1.2170)
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of stewart. When stewart does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2170 06/06/08 02:40:16 stewart@stripped +7 -0
  Merge mysql.com:/home/stewart/Documents/MySQL/5.0/jonas
  into  mysql.com:/home/stewart/Documents/MySQL/5.0/merge-queue

  ndb/src/mgmsrv/MgmtSrvr.cpp
    1.99 06/06/08 02:40:12 stewart@stripped +2 -2
    hand merge bug fix by tomas with my fix for stop/shutdown behaviour

  ndb/src/mgmsrv/Services.hpp
    1.20 06/06/08 02:35:36 stewart@stripped +0 -0
    Auto merged

  ndb/src/mgmsrv/Services.cpp
    1.65 06/06/08 02:35:36 stewart@stripped +0 -0
    Auto merged

  ndb/src/mgmsrv/MgmtSrvr.hpp
    1.44 06/06/08 02:35:36 stewart@stripped +0 -0
    Auto merged

  ndb/src/mgmclient/CommandInterpreter.cpp
    1.61 06/06/08 02:35:35 stewart@stripped +0 -0
    Auto merged

  ndb/src/mgmapi/mgmapi.cpp
    1.63 06/06/08 02:35:35 stewart@stripped +0 -0
    Auto merged

  ndb/include/mgmapi/mgmapi.h
    1.49 06/06/08 02:35:34 stewart@stripped +0 -0
    Auto merged

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	stewart
# Host:	willster.(none)
# Root:	/home/stewart/Documents/MySQL/5.0/merge-queue/RESYNC

--- 1.48/ndb/include/mgmapi/mgmapi.h	2006-04-26 23:55:24 +10:00
+++ 1.49/ndb/include/mgmapi/mgmapi.h	2006-06-08 02:35:34 +10:00
@@ -701,6 +701,28 @@
 		    const int * node_list, int abort);
 
   /**
+   * Stops cluster nodes
+   *
+   * @param   handle        Management handle.
+   * @param   no_of_nodes   Number of database nodes to stop<br>
+   *                         -1: All database and management nodes<br>
+   *                          0: All database nodes in cluster<br>
+   *                          n: Stop the <var>n</var> node(s) specified in
+   *                            the array node_list
+   * @param   node_list     List of node IDs of database nodes to be stopped
+   * @param   abort         Don't perform graceful stop,
+   *                        but rather stop immediately
+   * @param   disconnect    Returns true if you need to disconnect to apply
+   *                        the stop command (e.g. stopping the mgm server
+   *                        that handle is connected to)
+   *
+   * @return                Number of nodes stopped (-1 on error).
+   */
+  int ndb_mgm_stop3(NdbMgmHandle handle, int no_of_nodes,
+		    const int * node_list, int abort, int *disconnect);
+
+
+  /**
    * Restart database nodes
    *
    * @param   handle        Management handle.
@@ -740,6 +762,31 @@
 		       int nostart, int abort);
 
   /**
+   * Restart nodes
+   *
+   * @param   handle        Management handle.
+   * @param   no_of_nodes   Number of database nodes to be restarted:<br>
+   *                          0: Restart all database nodes in the cluster<br>
+   *                          n: Restart the <var>n</var> node(s) specified in the
+   *                            array node_list
+   * @param   node_list     List of node IDs of database nodes to be restarted
+   * @param   initial       Remove filesystem from restarting node(s)
+   * @param   nostart       Don't actually start node(s) but leave them
+   *                        waiting for start command
+   * @param   abort         Don't perform graceful restart,
+   *                        but rather restart immediately
+   * @param   disconnect    Returns true if mgmapi client must disconnect from
+   *                        server to apply the requested operation. (e.g.
+   *                        restart the management server)
+   *
+   *
+   * @return                Number of nodes stopped (-1 on error).
+   */
+  int ndb_mgm_restart3(NdbMgmHandle handle, int no_of_nodes,
+		       const int * node_list, int initial,
+		       int nostart, int abort, int *disconnect);
+
+  /**
    * Start database nodes
    *
    * @param   handle        Management handle.
@@ -1028,6 +1075,16 @@
    * Get the node id of the mgm server we're connected to
    */
   Uint32 ndb_mgm_get_mgmd_nodeid(NdbMgmHandle handle);
+
+  /**
+   * Get the version of the mgm server we're talking to.
+   * Designed to allow switching of protocol depending on version
+   * so that new clients can speak to old servers in a compat mode
+   */
+  int ndb_mgm_get_version(NdbMgmHandle handle,
+                          int *major, int *minor, int* build,
+                          int len, char* str);
+
 
   /**
    * Config iterator

--- 1.62/ndb/src/mgmapi/mgmapi.cpp	2006-06-08 02:33:20 +10:00
+++ 1.63/ndb/src/mgmapi/mgmapi.cpp	2006-06-08 02:35:35 +10:00
@@ -27,6 +27,7 @@
 #include <mgmapi_debug.h>
 #include "mgmapi_configuration.hpp"
 #include <socket_io.h>
+#include <version.h>
 
 #include <NdbOut.hpp>
 #include <SocketServer.hpp>
@@ -103,6 +104,9 @@
 #endif
   FILE *errstream;
   char *m_name;
+  int mgmd_version_major;
+  int mgmd_version_minor;
+  int mgmd_version_build;
 };
 
 #define SET_ERROR(h, e, s) setError(h, e, __LINE__, s)
@@ -168,6 +172,10 @@
   h->logfile = 0;
 #endif
 
+  h->mgmd_version_major= -1;
+  h->mgmd_version_minor= -1;
+  h->mgmd_version_build= -1;
+
   DBUG_PRINT("info", ("handle=0x%x", (UintPtr)h));
   DBUG_RETURN(h);
 }
@@ -851,37 +859,81 @@
   return ndb_mgm_stop2(handle, no_of_nodes, node_list, 0);
 }
 
-
 extern "C"
-int 
+int
 ndb_mgm_stop2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
 	      int abort)
 {
-  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_stop2");
-  const ParserRow<ParserDummy> stop_reply[] = {
+  int disconnect;
+  return ndb_mgm_stop3(handle, no_of_nodes, node_list, abort, &disconnect);
+}
+
+
+extern "C"
+int
+ndb_mgm_stop3(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
+	      int abort, int *disconnect)
+{
+  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_stop3");
+  const ParserRow<ParserDummy> stop_reply_v1[] = {
+    MGM_CMD("stop reply", NULL, ""),
+    MGM_ARG("stopped", Int, Optional, "No of stopped nodes"),
+    MGM_ARG("result", String, Mandatory, "Error message"),
+    MGM_END()
+  };
+  const ParserRow<ParserDummy> stop_reply_v2[] = {
     MGM_CMD("stop reply", NULL, ""),
     MGM_ARG("stopped", Int, Optional, "No of stopped nodes"),
     MGM_ARG("result", String, Mandatory, "Error message"),
+    MGM_ARG("disconnect", Int, Mandatory, "Need to disconnect"),
     MGM_END()
   };
+
   CHECK_HANDLE(handle, -1);
   CHECK_CONNECTED(handle, -1);
 
-  if(no_of_nodes < 0){
+  if(handle->mgmd_version_build==-1)
+  {
+    char verstr[50];
+    if(!ndb_mgm_get_version(handle,
+                        &(handle->mgmd_version_major),
+                        &(handle->mgmd_version_minor),
+                        &(handle->mgmd_version_build),
+                        sizeof(verstr),
+                            verstr))
+    {
+      return -1;
+    }
+  }
+  int use_v2= ((handle->mgmd_version_major==5)
+    && (
+        (handle->mgmd_version_minor==0 && handle->mgmd_version_build>=21)
+        ||(handle->mgmd_version_minor==1 && handle->mgmd_version_build>=12)
+        ||(handle->mgmd_version_minor>1)
+        )
+               )
+    || (handle->mgmd_version_major>5);
+
+  if(no_of_nodes < -1){
     SET_ERROR(handle, NDB_MGM_ILLEGAL_NUMBER_OF_NODES, 
 	      "Negative number of nodes requested to stop");
     return -1;
   }
 
   Uint32 stoppedNoOfNodes = 0;
-  if(no_of_nodes == 0){
+  if(no_of_nodes <= 0){
     /**
-     * All database nodes should be stopped
+     * All nodes should be stopped (all or just db)
      */
     Properties args;
     args.put("abort", abort);
+    if(use_v2)
+      args.put("stop", (no_of_nodes==-1)?"mgm,db":"db");
     const Properties *reply;
-    reply = ndb_mgm_call(handle, stop_reply, "stop all", &args);
+    if(use_v2)
+      reply = ndb_mgm_call(handle, stop_reply_v2, "stop all", &args);
+    else
+      reply = ndb_mgm_call(handle, stop_reply_v1, "stop all", &args);
     CHECK_REPLY(reply, -1);
 
     if(!reply->get("stopped", &stoppedNoOfNodes)){
@@ -890,6 +942,10 @@
       delete reply;
       return -1;
     }
+    if(use_v2)
+      reply->get("disconnect", (Uint32*)disconnect);
+    else
+      *disconnect= 0;
     BaseString result;
     reply->get("result", result);
     if(strcmp(result.c_str(), "Ok") != 0) {
@@ -915,7 +971,11 @@
   args.put("abort", abort);
 
   const Properties *reply;
-  reply = ndb_mgm_call(handle, stop_reply, "stop", &args);
+  if(use_v2)
+    reply = ndb_mgm_call(handle, stop_reply_v2, "stop v2", &args);
+  else
+    reply = ndb_mgm_call(handle, stop_reply_v1, "stop", &args);
+
   CHECK_REPLY(reply, stoppedNoOfNodes);
   if(!reply->get("stopped", &stoppedNoOfNodes)){
     SET_ERROR(handle, NDB_MGM_STOP_FAILED, 
@@ -923,6 +983,10 @@
     delete reply;
     return -1;
   }
+  if(use_v2)
+    reply->get("disconnect", (Uint32*)disconnect);
+  else
+    *disconnect= 0;
   BaseString result;
   reply->get("result", result);
   if(strcmp(result.c_str(), "Ok") != 0) {
@@ -936,20 +1000,69 @@
 
 extern "C"
 int
+ndb_mgm_restart(NdbMgmHandle handle, int no_of_nodes, const int *node_list) 
+{
+  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart");
+  return ndb_mgm_restart2(handle, no_of_nodes, node_list, 0, 0, 0);
+}
+
+extern "C"
+int
 ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
 		 int initial, int nostart, int abort)
 {
-  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart2");
+  int disconnect;
+
+  return ndb_mgm_restart3(handle, no_of_nodes, node_list, initial, nostart,
+                          abort, &disconnect);
+}
+
+extern "C"
+int
+ndb_mgm_restart3(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
+		 int initial, int nostart, int abort, int *disconnect)
+{
+  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart3");
   Uint32 restarted = 0;
-  const ParserRow<ParserDummy> restart_reply[] = {
+  const ParserRow<ParserDummy> restart_reply_v1[] = {
+    MGM_CMD("restart reply", NULL, ""),
+    MGM_ARG("result", String, Mandatory, "Error message"),
+    MGM_ARG("restarted", Int, Optional, "No of restarted nodes"),
+    MGM_END()
+  };
+  const ParserRow<ParserDummy> restart_reply_v2[] = {
     MGM_CMD("restart reply", NULL, ""),
     MGM_ARG("result", String, Mandatory, "Error message"),
     MGM_ARG("restarted", Int, Optional, "No of restarted nodes"),
+    MGM_ARG("disconnect", Int, Optional, "Disconnect to apply"),
     MGM_END()
   };
+
   CHECK_HANDLE(handle, -1);
   CHECK_CONNECTED(handle, -1);
-  
+
+  if(handle->mgmd_version_build==-1)
+  {
+    char verstr[50];
+    if(!ndb_mgm_get_version(handle,
+                        &(handle->mgmd_version_major),
+                        &(handle->mgmd_version_minor),
+                        &(handle->mgmd_version_build),
+                        sizeof(verstr),
+                            verstr))
+    {
+      return -1;
+    }
+  }
+  int use_v2= ((handle->mgmd_version_major==5)
+    && (
+        (handle->mgmd_version_minor==0 && handle->mgmd_version_build>=21)
+        ||(handle->mgmd_version_minor==1 && handle->mgmd_version_build>=12)
+        ||(handle->mgmd_version_minor>1)
+        )
+               )
+    || (handle->mgmd_version_major>5);
+
   if(no_of_nodes < 0){
     SET_ERROR(handle, NDB_MGM_RESTART_FAILED, 
 	      "Restart requested of negative number of nodes");
@@ -964,7 +1077,7 @@
     const Properties *reply;
     const int timeout = handle->read_timeout;
     handle->read_timeout= 5*60*1000; // 5 minutes
-    reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
+    reply = ndb_mgm_call(handle, restart_reply_v1, "restart all", &args);
     handle->read_timeout= timeout;
     CHECK_REPLY(reply, -1);
 
@@ -1000,7 +1113,10 @@
   const Properties *reply;
   const int timeout = handle->read_timeout;
   handle->read_timeout= 5*60*1000; // 5 minutes
-  reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
+  if(use_v2)
+    reply = ndb_mgm_call(handle, restart_reply_v2, "restart node v2", &args);
+  else
+    reply = ndb_mgm_call(handle, restart_reply_v1, "restart node", &args);
   handle->read_timeout= timeout;
   if(reply != NULL) {
     BaseString result;
@@ -1011,20 +1127,16 @@
       return -1;
     }
     reply->get("restarted", &restarted);
+    if(use_v2)
+      reply->get("disconnect", (Uint32*)disconnect);
+    else
+      *disconnect= 0;
     delete reply;
   } 
   
   return restarted;
 }
 
-extern "C"
-int
-ndb_mgm_restart(NdbMgmHandle handle, int no_of_nodes, const int *node_list) 
-{
-  SET_ERROR(handle, NDB_MGM_NO_ERROR, "Executing: ndb_mgm_restart");
-  return ndb_mgm_restart2(handle, no_of_nodes, node_list, 0, 0, 0);
-}
-
 static const char *clusterlog_severity_names[]=
   { "enabled", "debug", "info", "warning", "error", "critical", "alert" };
 
@@ -2381,6 +2493,58 @@
   in.gets(buf, sizeof(buf));
 
   DBUG_RETURN(0);
+}
+
+extern "C"
+int ndb_mgm_get_version(NdbMgmHandle handle,
+                        int *major, int *minor, int *build, int len, char* str)
+{
+  DBUG_ENTER("ndb_mgm_get_version");
+  CHECK_HANDLE(handle, 0);
+  CHECK_CONNECTED(handle, 0);
+
+  Properties args;
+
+  const ParserRow<ParserDummy> reply[]= {
+    MGM_CMD("version", NULL, ""),
+    MGM_ARG("id", Int, Mandatory, "ID"),
+    MGM_ARG("major", Int, Mandatory, "Major"),
+    MGM_ARG("minor", Int, Mandatory, "Minor"),
+    MGM_ARG("string", String, Mandatory, "String"),
+    MGM_END()
+  };
+
+  const Properties *prop;
+  prop = ndb_mgm_call(handle, reply, "get version", &args);
+  CHECK_REPLY(prop, 0);
+
+  Uint32 id;
+  if(!prop->get("id",&id)){
+    fprintf(handle->errstream, "Unable to get value\n");
+    return 0;
+  }
+  *build= getBuild(id);
+
+  if(!prop->get("major",(Uint32*)major)){
+    fprintf(handle->errstream, "Unable to get value\n");
+    return 0;
+  }
+
+  if(!prop->get("minor",(Uint32*)minor)){
+    fprintf(handle->errstream, "Unable to get value\n");
+    return 0;
+  }
+
+  BaseString result;
+  if(!prop->get("string", result)){
+    fprintf(handle->errstream, "Unable to get value\n");
+    return 0;
+  }
+
+  strncpy(str, result.c_str(), len);
+
+  delete prop;
+  DBUG_RETURN(1);
 }
 
 template class Vector<const ParserRow<ParserDummy>*>;

--- 1.60/ndb/src/mgmclient/CommandInterpreter.cpp	2006-05-23 16:24:19 +10:00
+++ 1.61/ndb/src/mgmclient/CommandInterpreter.cpp	2006-06-08 02:35:35 +10:00
@@ -1135,7 +1135,7 @@
 	  }
 	  if (node_state->node_group >= 0) {
 	    ndbout << ", Nodegroup: " << node_state->node_group;
-	    if (node_state->dynamic_id == master_id)
+	    if (master_id && node_state->dynamic_id == master_id)
 	      ndbout << ", Master";
 	  }
 	}

--- 1.98/ndb/src/mgmsrv/MgmtSrvr.cpp	2006-05-23 16:24:19 +10:00
+++ 1.99/ndb/src/mgmsrv/MgmtSrvr.cpp	2006-06-08 02:40:12 +10:00
@@ -504,9 +504,10 @@
   if (_ownNodeId == 0) // we did not get node id from other server
   {
     NodeId tmp= m_config_retriever->get_configuration_nodeid();
+    int error_code;
 
     if (!alloc_node_id(&tmp, NDB_MGM_NODE_TYPE_MGM,
-		       0, 0, error_string)){
+		       0, 0, error_code, error_string)){
       ndbout << "Unable to obtain requested nodeid: "
 	     << error_string.c_str() << endl;
       require(false);
@@ -1120,31 +1121,16 @@
       const NFCompleteRep * const rep =
 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
 #ifdef VM_TRACE
-      ndbout_c("Node %d fail completed", rep->failedNodeId);
+      ndbout_c("sendSTOP_REQ Node %d fail completed", rep->failedNodeId);
 #endif
+      nodes.clear(rep->failedNodeId); // clear the failed node
+      if (singleUserNodeId == 0)
+        stoppedNodes.set(rep->failedNodeId);
       break;
     }
     case GSN_NODE_FAILREP:{
       const NodeFailRep * const rep =
 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
-      NodeBitmask failedNodes;
-      failedNodes.assign(NodeBitmask::Size, rep->theNodes);
-#ifdef VM_TRACE
-      {
-	ndbout << "Failed nodes:";
-	for (unsigned i = 0; i < 32*NodeBitmask::Size; i++)
-	  if(failedNodes.get(i))
-	    ndbout << " " << i;
-	ndbout << endl;
-      }
-#endif
-      failedNodes.bitAND(nodes);
-      if (!failedNodes.isclear())
-      {
-	nodes.bitANDC(failedNodes); // clear the failed nodes
-	if (singleUserNodeId == 0)
-	  stoppedNodes.bitOR(failedNodes);
-      }
       break;
     }
     default:
@@ -1293,12 +1279,48 @@
                         abort,
                         false,
                         true,
-                        nostart,
+                        true,
                         initialStart,
                         stopSelf);
+
+  if (ret)
+    return ret;
+
   if (stopCount)
     *stopCount = nodes.count();
-  return ret;
+  
+  // start up the nodes again
+  int waitTime = 12000;
+  NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
+  for (unsigned i = 0; i < node_ids.size(); i++)
+  {
+    NodeId nodeId= node_ids[i];
+    enum ndb_mgm_node_status s;
+    s = NDB_MGM_NODE_STATUS_NO_CONTACT;
+#ifdef VM_TRACE
+    ndbout_c("Waiting for %d not started", nodeId);
+#endif
+    while (s != NDB_MGM_NODE_STATUS_NOT_STARTED && waitTime > 0)
+    {
+      Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
+      Uint32 connectCount = 0;
+      bool system;
+      const char *address;
+      status(nodeId, &s, &version, &startPhase, 
+             &system, &dynamicId, &nodeGroup, &connectCount, &address);
+      NdbSleep_MilliSleep(100);  
+      waitTime = (maxTime - NdbTick_CurrentMillisecond());
+    }
+  }
+
+  if (nostart)
+    return 0;
+
+  for (unsigned i = 0; i < node_ids.size(); i++)
+  {
+    int result = start(node_ids[i]);
+  }
+  return 0;
 }
 
 /*
@@ -1952,7 +1974,8 @@
 			enum ndb_mgm_node_type type,
 			struct sockaddr *client_addr, 
 			SOCKET_SIZE_TYPE *client_addr_len,
-			BaseString &error_string)
+			int &error_code, BaseString &error_string,
+                        int log_event)
 {
   DBUG_ENTER("MgmtSrvr::alloc_node_id");
   DBUG_PRINT("enter", ("nodeid=%d, type=%d, client_addr=%d",
@@ -1961,6 +1984,7 @@
     if (*nodeId == 0) {
       error_string.appfmt("no-nodeid-checks set in management server.\n"
 			  "node id must be set explicitly in connectstring");
+      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
       DBUG_RETURN(false);
     }
     DBUG_RETURN(true);
@@ -1985,8 +2009,10 @@
 
   if(NdbMutex_Lock(m_configMutex))
   {
+    // should not happen
     error_string.appfmt("unable to lock configuration mutex");
-    return false;
+    error_code = NDB_MGM_ALLOCID_ERROR;
+    DBUG_RETURN(false);
   }
   ndb_mgm_configuration_iterator
     iter(* _config->m_configValues, CFG_SECTION_NODE);
@@ -2057,6 +2083,7 @@
 			  "or specifying unique host names in config file.",
 			  id_found, tmp);
       NdbMutex_Unlock(m_configMutex);
+      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
       DBUG_RETURN(false);
     }
     if (config_hostname == 0) {
@@ -2065,6 +2092,7 @@
 			  "or specifying unique host names in config file,\n"
 			  "or specifying just one mgmt server in config file.",
 			  tmp);
+      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
       DBUG_RETURN(false);
     }
     id_found= tmp; // mgmt server matched, check for more matches
@@ -2106,8 +2134,9 @@
     
     char tmp_str[128];
     m_reserved_nodes.getText(tmp_str);
-    g_eventLogger.info("Mgmt server state: nodeid %d reserved for ip %s, m_reserved_nodes %s.",
-		       id_found, get_connect_address(id_found), tmp_str);
+    g_eventLogger.info("Mgmt server state: nodeid %d reserved for ip %s, "
+                       "m_reserved_nodes %s.",
+                       id_found, get_connect_address(id_found), tmp_str);
     DBUG_RETURN(true);
   }
 
@@ -2127,26 +2156,48 @@
     type_c_string.assfmt("%s(%s)", alias, str);
   }
 
-  if (*nodeId == 0) {
+  if (*nodeId == 0)
+  {
     if (found_matching_id)
+    {
       if (found_matching_type)
+      {
 	if (found_free_node)
+        {
 	  error_string.appfmt("Connection done from wrong host ip %s.",
 			      (client_addr)?
-			        inet_ntoa(((struct sockaddr_in *)
+                              inet_ntoa(((struct sockaddr_in *)
 					 (client_addr))->sin_addr):"");
+          error_code = NDB_MGM_ALLOCID_ERROR;
+        }
 	else
+        {
 	  error_string.appfmt("No free node id found for %s.",
 			      type_string.c_str());
+          error_code = NDB_MGM_ALLOCID_ERROR;
+        }
+      }
       else
+      {
 	error_string.appfmt("No %s node defined in config file.",
 			    type_string.c_str());
+        error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+      }
+    }
     else
+    {
       error_string.append("No nodes defined in config file.");
-  } else {
+      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+    }
+  }
+  else
+  {
     if (found_matching_id)
+    {
       if (found_matching_type)
-	if (found_free_node) {
+      {
+	if (found_free_node)
+        {
 	  // have to split these into two since inet_ntoa overwrites itself
 	  error_string.appfmt("Connection with id %d done from wrong host ip %s,",
 			      *nodeId, inet_ntoa(((struct sockaddr_in *)
@@ -2154,27 +2205,44 @@
 	  error_string.appfmt(" expected %s(%s).", config_hostname,
 			      r_config_addr ?
 			      "lookup failed" : inet_ntoa(config_addr));
-	} else
+          error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+	}
+        else
+        {
 	  error_string.appfmt("Id %d already allocated by another node.",
 			      *nodeId);
+          error_code = NDB_MGM_ALLOCID_ERROR;
+        }
+      }
       else
+      {
 	error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
 			    *nodeId, type_c_string.c_str(),
 			    type_string.c_str());
+        error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+      }
+    }
     else
+    {
       error_string.appfmt("No node defined with id=%d in config file.",
 			  *nodeId);
+      error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
+    }
   }
 
-  g_eventLogger.warning("Allocate nodeid (%d) failed. Connection from ip %s. "
-			"Returned error string \"%s\"",
-			*nodeId,
-			client_addr != 0 ? inet_ntoa(((struct sockaddr_in *)(client_addr))->sin_addr) : "<none>",
-			error_string.c_str());
-
-  NodeBitmask connected_nodes2;
-  get_connected_nodes(connected_nodes2);
+  if (log_event || error_code == NDB_MGM_ALLOCID_CONFIG_MISMATCH)
   {
+    g_eventLogger.warning("Allocate nodeid (%d) failed. Connection from ip %s."
+                          " Returned error string \"%s\"",
+                          *nodeId,
+                          client_addr != 0
+                          ? inet_ntoa(((struct sockaddr_in *)
+                                       (client_addr))->sin_addr)
+                          : "<none>",
+                          error_string.c_str());
+
+    NodeBitmask connected_nodes2;
+    get_connected_nodes(connected_nodes2);
     BaseString tmp_connected, tmp_not_connected;
     for(Uint32 i = 0; i < MAX_NODES; i++)
     {
@@ -2374,6 +2442,7 @@
 MgmtSrvr::abortBackup(Uint32 backupId)
 {
   SignalSender ss(theFacade);
+  ss.lock(); // lock will be released on exit
 
   bool next;
   NodeId nodeId = 0;
@@ -2412,6 +2481,8 @@
 MgmtSrvr::Allocated_resources::Allocated_resources(MgmtSrvr &m)
   : m_mgmsrv(m)
 {
+  m_reserved_nodes.clear();
+  m_alloc_timeout= 0;
 }
 
 MgmtSrvr::Allocated_resources::~Allocated_resources()
@@ -2430,9 +2501,22 @@
 }
 
 void
-MgmtSrvr::Allocated_resources::reserve_node(NodeId id)
+MgmtSrvr::Allocated_resources::reserve_node(NodeId id, NDB_TICKS timeout)
 {
   m_reserved_nodes.set(id);
+  m_alloc_timeout= NdbTick_CurrentMillisecond() + timeout;
+}
+
+bool
+MgmtSrvr::Allocated_resources::is_timed_out(NDB_TICKS tick)
+{
+  if (m_alloc_timeout && tick > m_alloc_timeout)
+  {
+    g_eventLogger.info("Mgmt server state: nodeid %d timed out.",
+                       get_nodeid());
+    return true;
+  }
+  return false;
 }
 
 NodeId

--- 1.43/ndb/src/mgmsrv/MgmtSrvr.hpp	2006-05-23 16:24:19 +10:00
+++ 1.44/ndb/src/mgmsrv/MgmtSrvr.hpp	2006-06-08 02:35:36 +10:00
@@ -106,7 +106,8 @@
     ~Allocated_resources();
     // methods to reserve/allocate resources which
     // will be freed when running destructor
-    void reserve_node(NodeId id);
+    void reserve_node(NodeId id, NDB_TICKS timeout);
+    bool is_timed_out(NDB_TICKS tick);
     bool is_reserved(NodeId nodeId) { return m_reserved_nodes.get(nodeId); }
     bool is_reserved(NodeBitmask mask) { return !mask.bitAND(m_reserved_nodes).isclear(); }
     bool isclear() { return m_reserved_nodes.isclear(); }
@@ -114,6 +115,7 @@
   private:
     MgmtSrvr &m_mgmsrv;
     NodeBitmask m_reserved_nodes;
+    NDB_TICKS m_alloc_timeout;
   };
   NdbMutex *m_node_id_mutex;
 
@@ -435,8 +437,10 @@
    */
   bool getNextNodeId(NodeId * _nodeId, enum ndb_mgm_node_type type) const ;
   bool alloc_node_id(NodeId * _nodeId, enum ndb_mgm_node_type type,
-		     struct sockaddr *client_addr, SOCKET_SIZE_TYPE *client_addr_len,
-		     BaseString &error_string);
+		     struct sockaddr *client_addr,
+                     SOCKET_SIZE_TYPE *client_addr_len,
+		     int &error_code, BaseString &error_string,
+                     int log_event = 1);
   
   /**
    *

--- 1.64/ndb/src/mgmsrv/Services.cpp	2006-06-07 16:20:46 +10:00
+++ 1.65/ndb/src/mgmsrv/Services.cpp	2006-06-08 02:35:36 +10:00
@@ -138,6 +138,8 @@
     MGM_ARG("public key", String, Mandatory, "Public key"),
     MGM_ARG("endian", String, Optional, "Endianness"),
     MGM_ARG("name", String, Optional, "Name of connection"),
+    MGM_ARG("timeout", Int, Optional, "Timeout in seconds"),
+    MGM_ARG("log_event", Int, Optional, "Log failure in cluster log"),
 
   MGM_CMD("get version", &MgmApiSession::getVersion, ""),
   
@@ -271,6 +273,15 @@
   MGM_END()
 };
 
+struct PurgeStruct
+{
+  NodeBitmask free_nodes;/* free nodes as reported
+			  * by ndbd in apiRegReqConf
+			  */
+  BaseString *str;
+  NDB_TICKS tick;
+};
+
 MgmApiSession::MgmApiSession(class MgmtSrvr & mgm, NDB_SOCKET_TYPE sock)
   : SocketServer::Session(sock), m_mgmsrv(mgm)
 {
@@ -425,12 +436,15 @@
 {
   const char *cmd= "get nodeid reply";
   Uint32 version, nodeid= 0, nodetype= 0xff;
+  Uint32 timeout= 20;  // default seconds timeout
   const char * transporter;
   const char * user;
   const char * password;
   const char * public_key;
   const char * endian= NULL;
   const char * name= NULL;
+  Uint32 log_event= 1;
+  bool log_event_version;
   union { long l; char c[sizeof(long)]; } endian_check;
 
   args.get("version", &version);
@@ -442,6 +456,9 @@
   args.get("public key", &public_key);
   args.get("endian", &endian);
   args.get("name", &name);
+  args.get("timeout", &timeout);
+  /* for backwards compatability keep track if client uses new protocol */
+  log_event_version= args.get("log_event", &log_event);
 
   endian_check.l = 1;
   if(endian 
@@ -468,9 +485,9 @@
     return;
   }
 
-  struct sockaddr addr;
+  struct sockaddr_in addr;
   SOCKET_SIZE_TYPE addrlen= sizeof(addr);
-  int r = getpeername(m_socket, &addr, &addrlen);
+  int r = getpeername(m_socket, (struct sockaddr*)&addr, &addrlen);
   if (r != 0 ) {
     m_output->println(cmd);
     m_output->println("result: getpeername(%d) failed, err= %d", m_socket, r);
@@ -481,14 +498,39 @@
   NodeId tmp= nodeid;
   if(tmp == 0 || !m_allocated_resources->is_reserved(tmp)){
     BaseString error_string;
-    if (!m_mgmsrv.alloc_node_id(&tmp, (enum ndb_mgm_node_type)nodetype, 
-				&addr, &addrlen, error_string)){
+    int error_code;
+    NDB_TICKS tick= 0;
+    /* only report error on second attempt as not to clog the cluster log */
+    while (!m_mgmsrv.alloc_node_id(&tmp, (enum ndb_mgm_node_type)nodetype, 
+                                   (struct sockaddr*)&addr, &addrlen, error_code, error_string,
+                                   tick == 0 ? 0 : log_event))
+    {
+      /* NDB_MGM_ALLOCID_CONFIG_MISMATCH is a non retriable error */
+      if (tick == 0 && error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)
+      {
+        // attempt to free any timed out reservations
+        tick= NdbTick_CurrentMillisecond();
+        struct PurgeStruct ps;
+        m_mgmsrv.get_connected_nodes(ps.free_nodes);
+        // invert connected_nodes to get free nodes
+        ps.free_nodes.bitXORC(NodeBitmask());
+        ps.str= 0;
+        ps.tick= tick;
+        m_mgmsrv.get_socket_server()->
+          foreachSession(stop_session_if_timed_out,&ps);
+	m_mgmsrv.get_socket_server()->checkSessions();
+        error_string = "";
+        continue;
+      }
       const char *alias;
       const char *str;
       alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)
 						nodetype, &str);
       m_output->println(cmd);
       m_output->println("result: %s", error_string.c_str());
+      /* only use error_code protocol if client knows about it */
+      if (log_event_version)
+        m_output->println("error_code: %d", error_code);
       m_output->println("");
       return;
     }
@@ -508,7 +550,7 @@
   m_output->println("nodeid: %u", tmp);
   m_output->println("result: Ok");
   m_output->println("");
-  m_allocated_resources->reserve_node(tmp);
+  m_allocated_resources->reserve_node(tmp, timeout*1000);
   
   if (name)
     g_eventLogger.info("Node %d: %s", tmp, name);
@@ -1538,14 +1580,6 @@
   m_output->println("");
 }
 
-struct PurgeStruct
-{
-  NodeBitmask free_nodes;/* free nodes as reported
-			  * by ndbd in apiRegReqConf
-			  */
-  BaseString *str;
-};
-
 void
 MgmApiSession::stop_session_if_not_connected(SocketServer::Session *_s, void *data)
 {
@@ -1553,7 +1587,20 @@
   struct PurgeStruct &ps= *(struct PurgeStruct *)data;
   if (s->m_allocated_resources->is_reserved(ps.free_nodes))
   {
-    ps.str->appfmt(" %d", s->m_allocated_resources->get_nodeid());
+    if (ps.str)
+      ps.str->appfmt(" %d", s->m_allocated_resources->get_nodeid());
+    s->stopSession();
+  }
+}
+
+void
+MgmApiSession::stop_session_if_timed_out(SocketServer::Session *_s, void *data)
+{
+  MgmApiSession *s= (MgmApiSession *)_s;
+  struct PurgeStruct &ps= *(struct PurgeStruct *)data;
+  if (s->m_allocated_resources->is_reserved(ps.free_nodes) &&
+      s->m_allocated_resources->is_timed_out(ps.tick))
+  {
     s->stopSession();
   }
 }
@@ -1570,6 +1617,7 @@
   ps.free_nodes.bitXORC(NodeBitmask()); // invert connected_nodes to get free nodes
 
   m_mgmsrv.get_socket_server()->foreachSession(stop_session_if_not_connected,&ps);
+  m_mgmsrv.get_socket_server()->checkSessions();
 
   m_output->println("purge stale sessions reply");
   if (str.length() > 0)

--- 1.19/ndb/src/mgmsrv/Services.hpp	2006-06-07 16:20:46 +10:00
+++ 1.20/ndb/src/mgmsrv/Services.hpp	2006-06-08 02:35:36 +10:00
@@ -30,6 +30,7 @@
 
 class MgmApiSession : public SocketServer::Session
 {
+  static void stop_session_if_timed_out(SocketServer::Session *_s, void *data);
   static void stop_session_if_not_connected(SocketServer::Session *_s, void *data);
 private:
   typedef Parser<MgmApiSession> Parser_t;
Thread
bk commit into 5.0 tree (stewart:1.2170)Stewart Smith7 Jun