List:Commits« Previous MessageNext Message »
From:Frazer Clement Date:December 15 2010 1:47pm
Subject:bzr commit into mysql-5.1-telco-7.0 branch (frazer:4062)
View as plain text  
#At file:///home/frazer/bzr/mysql-5.1-telco-7.0-juniper/ based on revid:jonas@stripped

 4062 Frazer Clement	2010-12-15
      CluB commit of Connectivity Check in 7.0
      
      With default delay of 1500 millis
      
      Re-commit with Cmakelists change for Windows

    added:
      storage/ndb/include/kernel/signaldata/NodePing.hpp
      storage/ndb/src/common/debugger/signaldata/NodePing.cpp
    modified:
      storage/ndb/include/kernel/GlobalSignalNumbers.h
      storage/ndb/include/kernel/signaldata/FailRep.hpp
      storage/ndb/include/kernel/signaldata/SignalData.hpp
      storage/ndb/include/mgmapi/mgmapi_config_parameters.h
      storage/ndb/include/mgmapi/ndb_logevent.h
      storage/ndb/include/transporter/TransporterRegistry.hpp
      storage/ndb/src/common/debugger/EventLogger.cpp
      storage/ndb/src/common/debugger/signaldata/CMakeLists.txt
      storage/ndb/src/common/debugger/signaldata/Makefile.am
      storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp
      storage/ndb/src/common/debugger/signaldata/SignalNames.cpp
      storage/ndb/src/common/transporter/TransporterRegistry.cpp
      storage/ndb/src/kernel/blocks/ERROR_codes.txt
      storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
      storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
      storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
      storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
      storage/ndb/src/mgmsrv/ConfigInfo.cpp
      storage/ndb/test/ndbapi/testNodeRestart.cpp
=== modified file 'storage/ndb/include/kernel/GlobalSignalNumbers.h'
--- a/storage/ndb/include/kernel/GlobalSignalNumbers.h	2010-10-20 07:12:58 +0000
+++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h	2010-12-15 13:47:15 +0000
@@ -25,7 +25,7 @@
  *
  * When adding a new signal, remember to update MAX_GSN and SignalNames.cpp
  */
-const GlobalSignalNumber MAX_GSN = 768;
+const GlobalSignalNumber MAX_GSN = 770;
 
 struct GsnName {
   GlobalSignalNumber gsn;
@@ -1075,4 +1075,8 @@ extern const GlobalSignalNumber NO_OF_SI
 #define GSN_RELEASE_PAGES_REQ           680
 #define GSN_RELEASE_PAGES_CONF          681
 
+/* NODE_PING signals */
+#define GSN_NODE_PING_REQ               769 /* distr. */
+#define GSN_NODE_PING_CONF              770 /* distr. */
+
 #endif

=== modified file 'storage/ndb/include/kernel/signaldata/FailRep.hpp'
--- a/storage/ndb/include/kernel/signaldata/FailRep.hpp	2010-12-13 16:43:31 +0000
+++ b/storage/ndb/include/kernel/signaldata/FailRep.hpp	2010-12-15 13:47:15 +0000
@@ -52,7 +52,8 @@ public:
     ZLINK_FAILURE=5,
     ZOTHERNODE_FAILED_DURING_START=6,
     ZMULTI_NODE_SHUTDOWN = 7,
-    ZPARTITIONED_CLUSTER = 8
+    ZPARTITIONED_CLUSTER = 8,
+    ZCONNECT_CHECK_FAILURE = 9
   };
 
   Uint32 getFailSourceNodeId(Uint32 sigLen) const

=== added file 'storage/ndb/include/kernel/signaldata/NodePing.hpp'
--- a/storage/ndb/include/kernel/signaldata/NodePing.hpp	1970-01-01 00:00:00 +0000
+++ b/storage/ndb/include/kernel/signaldata/NodePing.hpp	2010-12-15 13:47:15 +0000
@@ -0,0 +1,76 @@
+/*
+   Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#ifndef NODEPING_HPP
+#define NODEPING_HPP
+
+#include "SignalData.hpp"
+
+
+/*
+ * NodePingReq/Conf is sent between QMGR nodes to help determine the
+ * available connectivity in a cluster experiencing heartbeat problems
+ * 
+ * When a node detects that it has not received a heartbeat from a 
+ * connected node for the heartbeat period, it initiates a global
+ * connectivity check protocol by sending a NODE_PING_REQ signal to all
+ * nodes considered to be running.
+ *
+ * On receiving this signal, a node will respond with NODE_PING_CONF to
+ * the sender, and begin its own connectivity check, if it is not
+ * already involved in one.
+ *
+ * In this way, all nodes reachable within some latency n will begin
+ * a connectivity check.  If they do not receive a NODE_PING_CONF from a 
+ * peer node within some further latency m, then they consider it to
+ * be suspect, and after a further latency p they consider it failed.
+ *
+ * In environments where latency between nodes fluctuates, but 
+ * connectivity is maintained (for example where TCP connections observe 
+ * latency due to underlying IP re-routing/failover), the connectivity
+ * check allows nodes to arm themselves in preparation for the potential
+ * race of FAIL_REP signals that can arise in these situations, by marking
+ * connections experiencing latency as SUSPECT.  Once a node is marked as
+ * SUSPECT, FAIL_REP signals originating from it may not be trusted or
+ * acted upon.
+ */
+
+class NodePingReq {
+  /**
+   * Sender(s) / Receiver(s)
+   */
+  friend class Qmgr;
+public:
+  STATIC_CONST( SignalLength = 2 );
+
+  Uint32 senderData;
+  Uint32 senderRef;
+};
+
+class NodePingConf {
+  /**
+   * Sender(s) / Receiver(s)
+   */
+  friend class Qmgr;
+public:
+  STATIC_CONST( SignalLength = 2 );
+
+  Uint32 senderData;
+  Uint32 senderRef;
+};
+
+#endif

=== modified file 'storage/ndb/include/kernel/signaldata/SignalData.hpp'
--- a/storage/ndb/include/kernel/signaldata/SignalData.hpp	2010-08-17 09:54:53 +0000
+++ b/storage/ndb/include/kernel/signaldata/SignalData.hpp	2010-12-15 13:47:15 +0000
@@ -305,4 +305,7 @@ GSN_PRINT_SIGNATURE(printLOCAL_ROUTE_ORD
 GSN_PRINT_SIGNATURE(printDBINFO_SCAN);
 GSN_PRINT_SIGNATURE(printDBINFO_SCAN_REF);
 
+GSN_PRINT_SIGNATURE(printNODE_PING_REQ);
+GSN_PRINT_SIGNATURE(printNODE_PING_CONF);
+
 #endif

=== modified file 'storage/ndb/include/mgmapi/mgmapi_config_parameters.h'
--- a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2010-12-02 11:02:29 +0000
+++ b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2010-12-15 13:47:15 +0000
@@ -179,6 +179,8 @@
 #define CFG_DB_EVENTLOG_BUFFER_SIZE   613
 #define CFG_DB_NUMA                   614
 
+#define CFG_DB_CONNECT_CHECK_DELAY    615
+
 #define CFG_NODE_ARBIT_RANK           200
 #define CFG_NODE_ARBIT_DELAY          201
 #define CFG_RESERVED_SEND_BUFFER_MEMORY 202

=== modified file 'storage/ndb/include/mgmapi/ndb_logevent.h'
--- a/storage/ndb/include/mgmapi/ndb_logevent.h	2010-10-13 12:22:51 +0000
+++ b/storage/ndb/include/mgmapi/ndb_logevent.h	2010-12-15 13:47:15 +0000
@@ -118,6 +118,12 @@ extern "C" {
     NDB_LE_LCP_TakeoverStarted = 33,
     /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
     NDB_LE_LCP_TakeoverCompleted = 34,
+    /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
+    NDB_LE_ConnectCheckStarted = 82,
+    /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
+    NDB_LE_ConnectCheckCompleted = 83,
+    /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
+    NDB_LE_NodeFailRejected = 84,
 
     /** NDB_MGM_EVENT_CATEGORY_STATISTIC */
     NDB_LE_TransReportCounters = 35,
@@ -478,6 +484,21 @@ extern "C" {
   struct ndb_logevent_LCP_TakeoverCompleted {
     unsigned state;
   };
+  struct ndb_logevent_ConnectCheckStarted {
+    unsigned other_node_count;
+    unsigned reason;
+    unsigned causing_node;
+  };
+  struct ndb_logevent_ConnectCheckCompleted {
+    unsigned nodes_checked;
+    unsigned nodes_suspect;
+    unsigned nodes_failed;
+  };
+  struct ndb_logevent_NodeFailRejected {
+    unsigned reason;
+    unsigned failed_node;
+    unsigned source_node;
+  };
 
   /* STATISTIC */
   struct ndb_logevent_TransReportCounters {
@@ -834,6 +855,9 @@ extern "C" {
       struct ndb_logevent_GCP_TakeoverCompleted GCP_TakeoverCompleted;
       struct ndb_logevent_LCP_TakeoverStarted LCP_TakeoverStarted;
       struct ndb_logevent_LCP_TakeoverCompleted LCP_TakeoverCompleted;
+      struct ndb_logevent_ConnectCheckStarted ConnectCheckStarted;
+      struct ndb_logevent_ConnectCheckCompleted ConnectCheckCompleted;
+      struct ndb_logevent_NodeFailRejected NodeFailRejected;
 
       /* STATISTIC */
       struct ndb_logevent_TransReportCounters TransReportCounters;

=== modified file 'storage/ndb/include/transporter/TransporterRegistry.hpp'
--- a/storage/ndb/include/transporter/TransporterRegistry.hpp	2010-11-09 20:40:03 +0000
+++ b/storage/ndb/include/transporter/TransporterRegistry.hpp	2010-12-15 13:47:15 +0000
@@ -325,6 +325,13 @@ public:
 #ifdef DEBUG_TRANSPORTER
   void printState();
 #endif
+
+#ifdef ERROR_INSERT
+  /* Utils for testing latency issues */
+  bool isBlocked(NodeId nodeId);
+  void blockReceive(NodeId nodeId);
+  void unblockReceive(NodeId nodeId);
+#endif
   
   class Transporter_interface {
   public:
@@ -355,6 +362,13 @@ private:
   int nSCITransporters;
   int nSHMTransporters;
 
+#ifdef ERROR_INSERT
+  Bitmask<MAX_NTRANSPORTERS/32> m_blocked;
+  Bitmask<MAX_NTRANSPORTERS/32> m_blocked_with_data;
+  Bitmask<MAX_NTRANSPORTERS/32> m_blocked_disconnected;
+  int m_disconnect_errors[MAX_NTRANSPORTERS];
+#endif
+
   /**
    * Bitmask of transporters that has data "carried over" since
    *   last performReceive

=== modified file 'storage/ndb/src/common/debugger/EventLogger.cpp'
--- a/storage/ndb/src/common/debugger/EventLogger.cpp	2010-10-13 12:22:51 +0000
+++ b/storage/ndb/src/common/debugger/EventLogger.cpp	2010-12-15 13:47:15 +0000
@@ -24,6 +24,7 @@
 #include <NdbConfig.h>
 #include <kernel/BlockNumbers.h>
 #include <signaldata/ArbitSignalData.hpp>
+#include <signaldata/FailRep.hpp>
 #include <NodeState.hpp>
 #include <version.h>
 #include <ndb_version.h>
@@ -1181,6 +1182,141 @@ void getTextSavedEvent(QQQQ)
   abort();
 }
 
+void getTextConnectCheckStarted(QQQQ)
+{
+  /* EventReport format :
+   * 1 : other_node_count
+   * 2 : reason (FailRep causes or 0)
+   * 3 : causing_node (if from FailRep)
+   * 4 : bitmask wordsize
+   * 5 : bitmask[2]
+   */
+  Uint32 other_node_count = theData[1];
+  Uint32 reason = theData[2];
+  Uint32 causing_node = theData[3];
+  Uint32 bitmaskSz = theData[4];
+  char otherNodeMask[100];
+  char suspectNodeMask[100];
+  BitmaskImpl::getText(bitmaskSz, theData + 5 + (0 * bitmaskSz), otherNodeMask);
+  BitmaskImpl::getText(bitmaskSz, theData + 5 + (1 * bitmaskSz), suspectNodeMask);
+  Uint32 suspectCount = BitmaskImpl::count(bitmaskSz, theData + 5 + (1 * bitmaskSz));
+
+  if (reason)
+  {
+    /* Connect check started for specific reason */
+    const char * reasonText = NULL;
+    switch (reason)
+    {
+    case FailRep::ZHEARTBEAT_FAILURE:
+      reasonText = "Heartbeat failure";
+      break;
+    case FailRep::ZCONNECT_CHECK_FAILURE:
+      reasonText = "Connectivity check request";
+      break;
+    default:
+      reasonText = "UNKNOWN";
+      break;
+    }
+    
+    BaseString::snprintf(m_text, m_text_len,
+                         "Connectivity Check of %u other nodes (%s) started due to %s from node %u.",
+                         other_node_count,
+                         otherNodeMask,
+                         reasonText,
+                         causing_node);
+  }
+  else
+  {
+    /* Connect check restarted due to suspect nodes */
+    BaseString::snprintf(m_text, m_text_len,
+                         "Connectivity Check of %u nodes (%s) restarting due to %u suspect nodes (%s).",
+                         other_node_count,
+                         otherNodeMask,
+                         suspectCount,
+                         suspectNodeMask);
+  }
+}
+
+void getTextConnectCheckCompleted(QQQQ)
+{
+  /* EventReport format
+   * 1 : Nodes checked
+   * 2 : Suspect nodes
+   * 3 : Failed nodes
+   */
+  
+  Uint32 nodes_checked = theData[1];
+  Uint32 suspect_nodes = theData[2];
+  Uint32 failed_nodes = theData[3];
+  
+  if ((failed_nodes + suspect_nodes) == 0)
+  { 
+    /* All connectivity ok */
+    BaseString::snprintf(m_text, m_text_len,
+                         "Connectivity Check completed on %u nodes, connectivity ok",
+                         nodes_checked);
+  }
+  else
+  {
+    if (failed_nodes > 0)
+    {
+      if (suspect_nodes > 0)
+      {
+        BaseString::snprintf(m_text, m_text_len,
+                             "Connectivity Check completed on %u nodes.  %u nodes failed.  "
+                             "%u nodes still suspect, repeating check.", 
+                             nodes_checked,
+                             failed_nodes,
+                             suspect_nodes);
+      }
+      else
+      {
+        BaseString::snprintf(m_text, m_text_len,
+                             "Connectivity Check completed on %u nodes.  %u nodes failed.  "
+                             "Connectivity now OK",
+                             nodes_checked,
+                             failed_nodes);
+      }
+    }
+    else
+    {
+      /* Just suspect nodes */
+      BaseString::snprintf(m_text, m_text_len,
+                           "Connectivity Check completed on %u nodes.  %u nodes still suspect, "
+                           "repeating check.",
+                           nodes_checked,
+                           suspect_nodes);
+    }
+  }
+}
+
+void getTextNodeFailRejected(QQQQ)
+{
+  Uint32 reason = theData[1];
+  Uint32 failed_node = theData[2];
+  Uint32 source_node = theData[3];
+
+  const char* reasonText = "Unknown";
+  switch (reason)
+  {
+  case FailRep::ZCONNECT_CHECK_FAILURE:
+    reasonText = "Connect Check Failure";
+    break;
+  case FailRep::ZLINK_FAILURE:
+    reasonText = "Link Failure";
+    break;
+  }
+
+  BaseString::snprintf(m_text, m_text_len,
+                       "Received FAIL_REP (%s (%u)) for node %u sourced by suspect node %u.  "
+                       "Rejecting as failure of node %u.",
+                       reasonText,
+                       reason,
+                       failed_node,
+                       source_node,
+                       source_node);
+}
+
 #if 0
 BaseString::snprintf(m_text, 
 		     m_text_len, 
@@ -1254,6 +1390,10 @@ const EventLoggerBase::EventRepLogLevelM
   ROW(LCP_TakeoverStarted,     LogLevel::llNodeRestart, 7, Logger::LL_INFO ),
   ROW(LCP_TakeoverCompleted,   LogLevel::llNodeRestart, 7, Logger::LL_INFO ),
 
+  ROW(ConnectCheckStarted,     LogLevel::llNodeRestart, 6, Logger::LL_INFO ),
+  ROW(ConnectCheckCompleted,   LogLevel::llNodeRestart, 6, Logger::LL_INFO ),
+  ROW(NodeFailRejected,        LogLevel::llNodeRestart, 6, Logger::LL_ALERT ),
+
   // STATISTIC
   ROW(TransReportCounters,     LogLevel::llStatistic,   8, Logger::LL_INFO ),
   ROW(OperationReportCounters, LogLevel::llStatistic,   8, Logger::LL_INFO ), 

=== modified file 'storage/ndb/src/common/debugger/signaldata/CMakeLists.txt'
--- a/storage/ndb/src/common/debugger/signaldata/CMakeLists.txt	2010-11-17 11:35:04 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/CMakeLists.txt	2010-12-15 13:47:15 +0000
@@ -44,5 +44,5 @@ ADD_LIBRARY(ndbsignaldata STATIC
         LqhTrans.cpp ReadNodesConf.cpp CntrStart.cpp
         ScanFrag.cpp ApiVersion.cpp
         LocalRouteOrd.cpp
-	DbinfoScan.cpp)
+	DbinfoScan.cpp NodePing.cpp)
 

=== modified file 'storage/ndb/src/common/debugger/signaldata/Makefile.am'
--- a/storage/ndb/src/common/debugger/signaldata/Makefile.am	2010-08-06 08:19:19 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/Makefile.am	2010-12-15 13:47:15 +0000
@@ -47,7 +47,7 @@ libsignaldataprint_la_SOURCES = \
   	  CreateTrigImpl.cpp DropTrigImpl.cpp \
  	  CreateIndxImpl.cpp DropIndxImpl.cpp AlterIndxImpl.cpp \
  	  BuildIndx.cpp BuildIndxImpl.cpp ApiVersion.cpp \
-          LocalRouteOrd.cpp DbinfoScan.cpp
+          LocalRouteOrd.cpp DbinfoScan.cpp NodePing.cpp
 
 include $(top_srcdir)/storage/ndb/config/common.mk.am
 include $(top_srcdir)/storage/ndb/config/type_ndbapi.mk.am

=== added file 'storage/ndb/src/common/debugger/signaldata/NodePing.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/NodePing.cpp	1970-01-01 00:00:00 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/NodePing.cpp	2010-12-15 13:47:15 +0000
@@ -0,0 +1,38 @@
+/*
+   Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+#include <signaldata/NodePing.hpp>
+
+bool
+printNODE_PING_REQ(FILE * output, const Uint32 * theData, 
+                   Uint32 len, Uint16 receiverBlockNo) {
+  const NodePingReq * const sig = CAST_CONSTPTR(NodePingReq, theData);
+  fprintf(output, " senderRef : %x round : %u\n", 
+          sig->senderRef,
+          sig->senderData);
+  return true;
+}
+
+bool
+printNODE_PING_CONF(FILE * output, const Uint32 * theData, 
+                    Uint32 len, Uint16 receiverBlockNo) {
+  const NodePingConf * const sig = CAST_CONSTPTR(NodePingConf, theData);
+  fprintf(output, " senderRef : %x round : %u\n", 
+          sig->senderRef,
+          sig->senderData);
+  return true;
+}

=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp	2010-08-27 17:07:19 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp	2010-12-15 13:47:15 +0000
@@ -257,6 +257,9 @@ SignalDataPrintFunctions[] = {
   ,{ GSN_DBINFO_SCANCONF, printDBINFO_SCAN }
   ,{ GSN_DBINFO_SCANREF, printDBINFO_SCAN_REF }
 
+  ,{ GSN_NODE_PING_REQ, printNODE_PING_REQ }
+  ,{ GSN_NODE_PING_CONF, printNODE_PING_CONF }
+
   ,{ 0, 0 }
 };
 

=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalNames.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp	2010-10-20 07:12:58 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp	2010-12-15 13:47:15 +0000
@@ -761,5 +761,8 @@ const GsnName SignalNames [] = {
 
   ,{ GSN_SYNC_PATH_REQ, "SYNC_PATH_REQ" }
   ,{ GSN_SYNC_PATH_CONF, "SYNC_PATH_CONF" }
+
+  ,{ GSN_NODE_PING_REQ, "NODE_PING_REQ" }
+  ,{ GSN_NODE_PING_CONF, "NODE_PING_CONF" }
 };
 const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);

=== modified file 'storage/ndb/src/common/transporter/TransporterRegistry.cpp'
--- a/storage/ndb/src/common/transporter/TransporterRegistry.cpp	2010-12-15 06:07:35 +0000
+++ b/storage/ndb/src/common/transporter/TransporterRegistry.cpp	2010-12-15 13:47:15 +0000
@@ -129,6 +129,11 @@ TransporterRegistry::TransporterRegistry
  }
 
 #endif
+#ifdef ERROR_INSERT
+  m_blocked.clear();
+  m_blocked_with_data.clear();
+  m_blocked_disconnected.clear();
+#endif
   // Initialize member variables
   nTransporters    = 0;
   nTCPTransporters = 0;
@@ -674,6 +679,16 @@ TransporterRegistry::prepareSend(Transpo
 	return SEND_MESSAGE_TOO_BIG;
       }
     } else {
+#ifdef ERROR_INSERT
+      if (m_blocked.get(nodeId))
+      {
+        /* Looks like it disconnected while blocked.  We'll pretend
+         * not to notice for now
+         */
+        WARNING("Signal to " << nodeId << " discarded as node blocked + disconnected");
+        return SEND_OK;
+      }
+#endif
       DEBUG("Signal to " << nodeId << " lost(disconnect) ");
       return SEND_DISCONNECTED;
     }
@@ -747,6 +762,16 @@ TransporterRegistry::prepareSend(Transpo
 	return SEND_MESSAGE_TOO_BIG;
       }
     } else {
+#ifdef ERROR_INSERT
+      if (m_blocked.get(nodeId))
+      {
+        /* Looks like it disconnected while blocked.  We'll pretend
+         * not to notice for now
+         */
+        WARNING("Signal to " << nodeId << " discarded as node blocked + disconnected");
+        return SEND_OK;
+      }
+#endif
       DEBUG("Signal to " << nodeId << " lost(disconnect) ");
       return SEND_DISCONNECTED;
     }
@@ -960,6 +985,15 @@ TransporterRegistry::pollReceive(Uint32 
     {
       for (int i = 0; i < num_socket_events; i++)
       {
+        Uint32 trpid = m_epoll_events[i].data.u32;
+#ifdef ERROR_INSERT
+        if (m_blocked.get(trpid))
+        {
+          /* Don't pull from socket now, wait till unblocked */
+          m_blocked_with_data.set(trpid);
+          continue;
+        }
+#endif
         mask.set(m_epoll_events[i].data.u32);
       }
     }
@@ -1099,6 +1133,14 @@ TransporterRegistry::poll_TCP(Uint32 tim
       if (idx[i] != MAX_NODES + 1)
       {
         Uint32 node_id = t->getRemoteNodeId();
+#ifdef ERROR_INSERT
+        if (m_blocked.get(i))
+        {
+          /* Don't pull from socket now, wait till unblocked */
+          m_blocked_with_data.set(i);
+          continue;
+        }
+#endif
         if (m_socket_poller.has_read(idx[i]))
           mask.set(node_id);
       }
@@ -1179,6 +1221,19 @@ TransporterRegistry::performReceive()
     consume_extra_sockets();
   }
 
+#ifdef ERROR_INSERT
+  if (!m_blocked.isclear())
+  {
+    if (m_has_data_transporters.isclear())
+    {
+        /* poll sees data, but we want to ignore for now
+         * sleep a little to avoid busy loop
+         */
+      NdbSleep_MilliSleep(1);
+    }
+  }
+#endif
+
 #ifdef NDB_TCP_TRANSPORTER
   Uint32 id = 0;
   while ((id = m_has_data_transporters.find(id + 1)) != BitmaskImpl::NotFound)
@@ -1369,6 +1424,59 @@ TransporterRegistry::printState(){
 }
 #endif
 
+#ifdef ERROR_INSERT
+bool
+TransporterRegistry::isBlocked(NodeId nodeId)
+{
+  return m_blocked.get(nodeId);
+}
+
+void
+TransporterRegistry::blockReceive(NodeId nodeId)
+{
+  /* Check that node is not already blocked?
+   * Stop pulling from its socket (but track received data etc)
+   */
+  /* Shouldn't already be blocked with data */
+  assert(!m_blocked.get(nodeId));
+
+  m_blocked.set(nodeId);
+
+  if (m_has_data_transporters.get(nodeId))
+  {
+    assert(!m_blocked_with_data.get(nodeId));
+    m_blocked_with_data.set(nodeId);
+    m_has_data_transporters.clear(nodeId);
+  }
+}
+
+void
+TransporterRegistry::unblockReceive(NodeId nodeId)
+{
+  /* Check that node is blocked?
+   * Resume pulling from its socket
+   * Ensure in-flight data is processed if there was some
+   */
+  assert(m_blocked.get(nodeId));
+  assert(!m_has_data_transporters.get(nodeId));
+
+  m_blocked.clear(nodeId);
+
+  if (m_blocked_with_data.get(nodeId))
+  {
+    m_has_data_transporters.set(nodeId);
+  }
+
+  if (m_blocked_disconnected.get(nodeId))
+  {
+    /* Process disconnect notification/handling now */
+    m_blocked_disconnected.clear(nodeId);
+
+    report_disconnect(nodeId, m_disconnect_errors[nodeId]);
+  }
+}
+#endif
+
 IOState
 TransporterRegistry::ioState(NodeId nodeId) { 
   return ioStates[nodeId]; 
@@ -1488,6 +1596,19 @@ TransporterRegistry::report_disconnect(N
 {
   DBUG_ENTER("TransporterRegistry::report_disconnect");
   DBUG_PRINT("info",("performStates[%d]=DISCONNECTED",node_id));
+
+#ifdef ERROR_INSERT
+  if (m_blocked.get(node_id))
+  {
+    /* We are simulating real latency, so control events experience
+     * it too
+     */
+    m_blocked_disconnected.set(node_id);
+    m_disconnect_errors[node_id] = errnum;
+    DBUG_VOID_RETURN;
+  }
+#endif
+
   performStates[node_id] = DISCONNECTED;
   m_has_data_transporters.clear(node_id);
   callbackObj->reportDisconnect(node_id, errnum);

=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2010-12-04 11:20:36 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2010-12-15 13:47:15 +0000
@@ -28,6 +28,8 @@ Crash president when he starts to run in
 935 : Crash master on node failure (delayed) 
       and skip sending GSN_COMMIT_FAILREQ to specified node
 
+938 : Resume communications (DUMP 9991) when > 25% nodes failed
+
 ERROR CODES FOR TESTING NODE FAILURE, GLOBAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------
 

=== modified file 'storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp'
--- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp	2010-11-10 10:14:04 +0000
+++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp	2010-12-15 13:47:15 +0000
@@ -1781,6 +1781,108 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal
 #endif
 #endif
 
+#ifdef ERROR_INSERT
+  /* <Target NodeId> dump 9992 <NodeId list>
+   * On Target NodeId, block receiving signals from NodeId list
+   *
+   * <Target NodeId> dump 9993 <NodeId list>
+   * On Target NodeId, resume receiving signals from NodeId list
+   * 
+   * <Target NodeId> dump 9991
+   * On Target NodeId, resume receiving signals from any blocked node
+   *
+   *
+   * See also code in QMGR for blocking receive from nodes based
+   * on HB roles.
+   * 
+   */
+  if((arg == 9993) ||  /* Unblock recv from nodeid */
+     (arg == 9992))    /* Block recv from nodeid */
+  {
+    bool block = (arg == 9992);
+    for (Uint32 n = 1; n < signal->getLength(); n++)
+    {
+      Uint32 nodeId = signal->theData[n];
+
+      if ((nodeId > 0) &&
+          (nodeId < MAX_NODES))
+      {
+        if (block)
+        {
+          ndbout_c("CMVMI : Blocking receive from node %u", nodeId);
+          
+          globalTransporterRegistry.blockReceive(nodeId);
+        }
+        else
+        {
+          ndbout_c("CMVMI : Unblocking receive from node %u", nodeId);
+          
+          globalTransporterRegistry.unblockReceive(nodeId);
+        }
+      }
+      else
+      {
+        ndbout_c("CMVMI : Ignoring dump %u for node %u",
+                 arg, nodeId);
+      }
+    }
+  }
+  if (arg == 9990) /* Block recv from all ndbd matching pattern */
+  {
+    Uint32 pattern = 0;
+    if (signal->getLength() > 1)
+    {
+      pattern = signal->theData[1];
+      ndbout_c("CMVMI : Blocking receive from all ndbds matching pattern -%s-",
+               ((pattern == 1)? "Other side":"Unknown"));
+    }
+    
+    for (Uint32 node = 1; node < MAX_NDB_NODES; node++)
+    {
+      if (globalTransporterRegistry.is_connected(node))
+      {
+        if (getNodeInfo(node).m_type == NodeInfo::DB)
+        {
+          if (!globalTransporterRegistry.isBlocked(node))
+          {
+            switch (pattern)
+            {
+            case 1:
+            {
+              /* Match if given node is on 'other side' of
+               * 2-replica cluster 
+               */
+              if ((getOwnNodeId() & 1) != (node & 1))
+              {
+                /* Node is on the 'other side', match */
+                break;
+              }
+              /* Node is on 'my side', don't match */
+              continue;
+            }
+            default:
+              break;
+            }
+            ndbout_c("CMVMI : Blocking receive from node %u", node);
+            globalTransporterRegistry.blockReceive(node);
+          }
+        }
+      }
+    }
+  }
+  if (arg == 9991) /* Unblock recv from all blocked */
+  {
+    for (Uint32 node = 0; node < MAX_NODES; node++)
+    {
+      if (globalTransporterRegistry.isBlocked(node))
+      {
+        ndbout_c("CMVMI : Unblocking receive from node %u", node);
+        globalTransporterRegistry.unblockReceive(node);
+      }
+    }
+  }
+#endif
+
   if (arg == 9999)
   {
     Uint32 delay = 1000;

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2010-12-13 15:34:50 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2010-12-15 13:47:15 +0000
@@ -147,7 +147,38 @@ public:
   NdbNodeBitmask c_readnodes_nodes;
 
   Uint32 c_maxDynamicId;
-  
+
+  struct ConnectCheckRec
+  {
+    bool m_active;                      // Connectivity check underway?
+    Timer m_timer;                      // Check timer object
+    Uint32 m_currentRound;              // Last round started
+    Uint32 m_tick;                      // Periods elapsed in current check
+    NdbNodeBitmask m_nodesPinged;       // Nodes sent a NodePingReq in round
+    NdbNodeBitmask m_nodesWaiting;      // Nodes which have not sent a response
+    NdbNodeBitmask m_nodesFailedDuring; // Nodes which failed during check
+    NdbNodeBitmask m_nodesSuspect;      // Nodes with suspect connectivity
+  
+    ConnectCheckRec()
+    {
+      m_active = false;
+      m_currentRound = 0;
+      m_tick = 0;
+      m_nodesPinged.clear();
+      m_nodesWaiting.clear();
+      m_nodesFailedDuring.clear();
+      m_nodesSuspect.clear();
+    }
+    
+    void reportNodeConnect(Uint32 nodeId);
+    /* reportNodeFailure.
+     * return code true means the connect check is completed
+     */
+    bool reportNodeFailure(Uint32 nodeId);
+  };
+
+  ConnectCheckRec m_connectivity_check;
+
   // Records
   struct NodeRec {
     /*
@@ -319,6 +350,10 @@ private:
 
   void execUPGRADE_PROTOCOL_ORD(Signal*);
   
+  // Connectivity check signals
+  void execNODE_PINGREQ(Signal* signal);
+  void execNODE_PINGCONF(Signal* signal);
+
   // Statement blocks
   void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
   Uint32 check_startup(Signal* signal);
@@ -377,6 +412,7 @@ private:
   void setHbDelay(UintR aHbDelay);
   void setHbApiDelay(UintR aHbApiDelay);
   void setArbitTimeout(UintR aArbitTimeout);
+  void setCCDelay(UintR aCCDelay);
 
   // Interface to arbitration module
   void handleArbitStart(Signal* signal);
@@ -400,6 +436,17 @@ private:
   void computeArbitNdbMask(NdbNodeBitmaskPOD& aMask);
   void reportArbitEvent(Signal* signal, Ndb_logevent_type type,
                         const NodeBitmask mask = NodeBitmask());
+
+  // Interface to Connectivity Check
+  void startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 node);
+  void checkConnectivityTimeSignal(Signal* signal);
+  void connectivityCheckCompleted(Signal* signal);
+  bool isNodeConnectivitySuspect(Uint32 nodeId) const;
+  void handleFailFromSuspect(Signal* signal,
+                             Uint32 reason,
+                             Uint16 aFailedNode,
+                             Uint16 sourceNode);
+
   // Initialisation
   void initData();
   void initRecords();
@@ -511,6 +558,10 @@ private:
   // user-defined hbOrder must set all values non-zero and distinct
   int check_hb_order_config();
   bool m_hb_order_config_used;
+
+#ifdef ERROR_INSERT
+  Uint32 nodeFailCount;
+#endif
 };
 
 #endif

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp	2010-06-16 16:56:34 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp	2010-12-15 13:47:15 +0000
@@ -70,6 +70,10 @@ void Qmgr::initData() 
   ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
   
   setHbApiDelay(hbDBAPI);
+
+#ifdef ERROR_INSERT
+  nodeFailCount = 0;
+#endif
 }//Qmgr::initData()
 
 void Qmgr::initRecords() 
@@ -146,6 +150,10 @@ Qmgr::Qmgr(Block_context& ctx)
 
   addRecSignal(GSN_UPGRADE_PROTOCOL_ORD, &Qmgr::execUPGRADE_PROTOCOL_ORD);
   
+  // Connectivity check signals
+  addRecSignal(GSN_NODE_PING_REQ, &Qmgr::execNODE_PINGREQ);
+  addRecSignal(GSN_NODE_PING_CONF, &Qmgr::execNODE_PINGCONF);
+
   initData();
 }//Qmgr::Qmgr()
 

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2010-12-13 16:52:27 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2010-12-15 13:47:15 +0000
@@ -39,6 +39,7 @@
 #include <signaldata/Upgrade.hpp>
 #include <signaldata/EnableCom.hpp>
 #include <signaldata/RouteOrd.hpp>
+#include <signaldata/NodePing.hpp>
 
 #include <ndb_version.h>
 
@@ -437,6 +438,21 @@ void Qmgr::setArbitTimeout(UintR aArbitT
   arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
 }
 
+void Qmgr::setCCDelay(UintR aCCDelay)
+{
+  NDB_TICKS now = NdbTick_CurrentMillisecond();
+  if (aCCDelay == 0)
+  {
+    /* Connectivity check disabled */
+    m_connectivity_check.m_timer.setDelay(0);
+  }
+  else
+  {
+    m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
+    m_connectivity_check.m_timer.reset(now);
+  }
+}
+
 void Qmgr::execCONNECT_REP(Signal* signal)
 {
   jamEntry();
@@ -1991,6 +2007,7 @@ void Qmgr::execCM_ADD(Signal* signal) 
     jam();
     ndbrequire(addNodePtr.p->phase == ZSTARTING);
     addNodePtr.p->phase = ZRUNNING;
+    m_connectivity_check.reportNodeConnect(addNodePtr.i);
     setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0;
     c_clusterNodes.set(addNodePtr.i);
     findNeighbours(signal, __LINE__);
@@ -2389,6 +2406,7 @@ void Qmgr::initData(Signal* signal) 
   Uint32 hbDBDB = 1500;
   Uint32 arbitTimeout = 1000;
   Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
+  Uint32 ccInterval = 0;
   c_restartPartialTimeout = 30000;
   c_restartPartionedTimeout = 60000;
   c_restartFailureTimeout = ~0;
@@ -2401,6 +2419,8 @@ void Qmgr::initData(Signal* signal) 
 			    &c_restartPartionedTimeout);
   ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
 			    &c_restartFailureTimeout);
+  ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
+                            &ccInterval);
  
   if(c_restartPartialTimeout == 0)
   {
@@ -2418,6 +2438,7 @@ void Qmgr::initData(Signal* signal) 
   }
 
   setHbDelay(hbDBDB);
+  setCCDelay(ccInterval);
   setArbitTimeout(arbitTimeout);
 
   arbitRec.method = (ArbitRec::Method)arbitMethod;
@@ -2536,10 +2557,22 @@ void Qmgr::timerHandlingLab(Signal* sign
       sendHeartbeat(signal);
       hb_send_timer.reset(TcurrentTime);
     }
-    if (hb_check_timer.check(TcurrentTime)) {
-      jam();
-      checkHeartbeat(signal);
-      hb_check_timer.reset(TcurrentTime);
+    if (likely(! m_connectivity_check.m_active))
+    {
+      if (hb_check_timer.check(TcurrentTime)) {
+        jam();
+        checkHeartbeat(signal);
+        hb_check_timer.reset(TcurrentTime);
+      }
+    }
+    else
+    {
+      /* Connectivity check */
+      if (m_connectivity_check.m_timer.check(TcurrentTime)) {
+        jam();
+        checkConnectivityTimeSignal(signal);
+        m_connectivity_check.m_timer.reset(TcurrentTime);
+      }
     }
   }
   
@@ -2644,16 +2677,26 @@ void Qmgr::checkHeartbeat(Signal* signal
 
   if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) {
     jam();
-    /**----------------------------------------------------------------------
-     * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT 
-     * PERIODS. THUS WE DECLARE HIM DOWN.
-     *----------------------------------------------------------------------*/
-    signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
-    signal->theData[1] = nodePtr.i;
-    sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
-    failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
-    return;
+    if (m_connectivity_check.m_timer.getDelay() > 0)
+    {
+      jam();
+      /* Start connectivity check, indicating the cause */
+      startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
+      return;
+    }
+    else
+    {
+      /**----------------------------------------------------------------------
+       * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT 
+       * PERIODS. THUS WE DECLARE HIM DOWN.
+       *----------------------------------------------------------------------*/
+      signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
+      signal->theData[1] = nodePtr.i;
+      sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
+      
+      failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
+      return;
+    }
   }//if
 }//Qmgr::checkHeartbeat()
 
@@ -3574,6 +3617,24 @@ void Qmgr::failReportLab(Signal* signal,
     jam();
     return;
   }
+
+  if (isNodeConnectivitySuspect(sourceNode) &&
+      // (! isNodeConnectivitySuspect(aFailedNode)) &&  // TODO : Required?
+      ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
+       (aFailCause == FailRep::ZLINK_FAILURE)))
+  {
+    jam();
+    /* Connectivity related failure report from a node with suspect
+     * connectivity, handle differently
+     */
+    ndbrequire(sourceNode != getOwnNodeId());
+    
+    handleFailFromSuspect(signal,
+                          aFailCause,
+                          aFailedNode,
+                          sourceNode);
+    return;
+  }
   
   if (failedNodePtr.i == getOwnNodeId()) {
     jam();
@@ -3630,6 +3691,9 @@ void Qmgr::failReportLab(Signal* signal,
     case FailRep::ZMULTI_NODE_SHUTDOWN:
       msg = "Multi node shutdown";
       break;
+    case FailRep::ZCONNECT_CHECK_FAILURE:
+      msg = "Connectivity check failure";
+      break;
     default:
       msg = "<UNKNOWN>";
     }
@@ -4485,6 +4549,34 @@ void Qmgr::failReport(Signal* signal,
   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
   if (failedNodePtr.p->phase == ZRUNNING) {
     jam();
+
+#ifdef ERROR_INSERT
+    if (ERROR_INSERTED(938))
+    {
+      nodeFailCount++;
+      ndbout_c("QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount);
+      /* Count DB nodes */
+      Uint32 nodeCount = 0;
+      for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
+      {
+        if (getNodeInfo(i).getType() == NODE_TYPE_DB)
+          nodeCount++;
+      }
+      
+      /* When > 25% of cluster has failed, resume communications */
+      if (nodeFailCount > (nodeCount / 4))
+      {
+        ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
+        Signal save = *signal;
+        signal->theData[0] = 9991;
+        sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+        *signal = save;
+        nodeFailCount = 0;
+        CLEAR_ERROR_INSERT_VALUE;
+      }
+    }
+#endif
+
 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
     if (cpresident == getOwnNodeId()) {
       jam();
@@ -4534,6 +4626,13 @@ void Qmgr::failReport(Signal* signal,
       jam();
       return;
     }//if
+    
+    if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
+    {
+      jam();
+      connectivityCheckCompleted(signal);
+    }
+
     failedNodePtr.p->ndynamicId = 0;
     findNeighbours(signal, __LINE__);
     if (failedNodePtr.i == cpresident) {
@@ -5810,6 +5909,55 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
     }
     ndbout << buf << endl;
   }
+
+#ifdef ERROR_INSERT
+  Uint32 dumpCode = signal->theData[0];
+  if ((dumpCode == 9992) ||
+      (dumpCode == 9993))
+  {
+    if (signal->getLength() == 2)
+    {
+      Uint32 nodeId = signal->theData[1];
+      Uint32& newNodeId = signal->theData[1];
+      Uint32 length = 2;
+      assert(257 > MAX_NODES);
+      if (nodeId > MAX_NODES)
+      {
+        const char* type = "None";
+        switch (nodeId)
+        {
+        case 257:
+        {
+          /* Left (lower) neighbour */
+          newNodeId = cneighbourl;
+          type = "Left neighbour";
+          break;
+        }
+        case 258:
+        {
+          /* Right (higher) neighbour */
+          newNodeId = cneighbourh;
+          type = "Right neighbour";
+          break;
+        }
+        case 259:
+        {
+          /* President */
+          newNodeId = cpresident;
+          type = "President";
+          break;
+        }
+        }
+        ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
+                 nodeId, newNodeId, type);
+        if (newNodeId != nodeId)
+        {
+          sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
+        }
+      }
+    }
+  }
+#endif
 }//Qmgr::execDUMP_STATE_ORD()
 
 
@@ -6206,3 +6354,491 @@ Qmgr::check_hb_order_config()
   m_hb_order_config_used = true;
   return 0;
 }
+
+static const Uint32 CC_SuspectTicks = 1;
+static const Uint32 CC_FailedTicks = 2;
+
+void 
+Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
+{
+  jam();
+  ndbrequire(m_connectivity_check.m_timer.getDelay() > 0);
+  
+  if (m_connectivity_check.m_active)
+  {
+    jam();
+    /* Connectivity check underway already
+     * do nothing
+     */
+    return;
+  }
+
+  
+  m_connectivity_check.m_nodesPinged.clear();
+
+  /* Send NODE_PINGREQ signal to all other running nodes, and 
+   * initialise connectivity check bitmasks.
+   * Note that nodes may already be considered suspect due to 
+   * a previous connectivity check round.
+   */
+  Uint32 ownId = getOwnNodeId();
+  NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
+  pingReq->senderData = ++m_connectivity_check.m_currentRound;
+  pingReq->senderRef = reference();
+
+  for (Uint32 i=1; i < MAX_NDB_NODES; i++)
+  {
+    if (i != ownId)
+    {
+      NodeRec& node = nodeRec[i];
+      if (node.phase == ZRUNNING)
+      {
+        /* If connection was considered ok, treat as unknown,
+         * If it was considered slow, continue to treat
+         *   as slow
+         */
+        sendSignal(node.blockRef, 
+                   GSN_NODE_PING_REQ, 
+                   signal, 
+                   NodePingReq::SignalLength, 
+                   JBA);
+
+        m_connectivity_check.m_nodesPinged.set(i);
+      }
+    }
+  }
+  
+  /* Initialise result bitmasks */
+  m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
+  m_connectivity_check.m_nodesFailedDuring.clear();
+
+  /* Ensure only live nodes are considered suspect */
+  m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
+
+  const char* reasonText = "Unknown";
+  bool firstTime = true;
+
+  switch(reason)
+  {
+  case FailRep::ZHEARTBEAT_FAILURE:
+    reasonText = "Heartbeat failure";
+    break;
+  case FailRep::ZCONNECT_CHECK_FAILURE:
+    reasonText = "Connectivity check request";
+    break;
+  default:
+    firstTime = false;
+    ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
+    break;
+  }
+  
+  if (!m_connectivity_check.m_nodesPinged.isclear())
+  {
+    jam();
+    {
+      char buff[100];
+      m_connectivity_check.m_nodesPinged.getText(buff);
+      if (firstTime)
+      {
+        g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
+                            m_connectivity_check.m_nodesPinged.count(),
+                            buff,
+                            reasonText,
+                            causingNode);
+      }
+      else
+      {
+        char buff2[100];
+        m_connectivity_check.m_nodesSuspect.getText(buff2);
+        g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
+                            m_connectivity_check.m_nodesPinged.count(),
+                            buff,
+                            m_connectivity_check.m_nodesSuspect.count(),
+                            buff2);
+      }
+    }
+
+    /* Generate cluster log event */
+    Uint32 bitmaskSz = NdbNodeBitmask::Size;
+    signal->theData[0] = NDB_LE_ConnectCheckStarted;
+    signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
+    signal->theData[2] = reason;
+    signal->theData[3] = causingNode;
+    signal->theData[4] = bitmaskSz;
+    Uint32* sigPtr = &signal->theData[5];
+    m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
+    m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr); 
+    sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
+        
+    m_connectivity_check.m_active = true;
+    m_connectivity_check.m_tick = 0;
+    NDB_TICKS now = NdbTick_CurrentMillisecond();
+    m_connectivity_check.m_timer.reset(now);
+  }
+  else
+  {
+    g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
+                        reasonText,
+                        causingNode);
+  }
+}
+
+void 
+Qmgr::execNODE_PINGREQ(Signal* signal)
+{
+  jamEntry();
+  Uint32 ownId = getOwnNodeId();
+  const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
+  Uint32 sendersRef = signal->getSendersBlockRef();
+  Uint32 sendersNodeId = refToNode(sendersRef);
+  Uint32 senderData = pingReq->senderData;
+
+  ndbrequire(sendersNodeId != ownId);
+  
+  /* We will start our own connectivity check if necessary
+   * before responding with PING_CONF to the requestor.
+   * This means that the sending node will receive our PING_REQ
+   * before our PING_CONF, which should avoid them starting an 
+   * unnecessary extra connectivity check round in some cases.
+   */
+  if (likely(m_connectivity_check.m_timer.getDelay() > 0))
+  {
+    jam();
+    /* We have connectivity checking configured */
+    if (! m_connectivity_check.m_active)
+    {
+      jam();
+
+      {
+        /* Don't start a new connectivity check if the requesting
+         * node has failed from our point of view 
+         */
+        NodeRecPtr nodePtr;
+        nodePtr.i = sendersNodeId;
+        ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
+        if (unlikely(nodePtr.p->phase != ZRUNNING))
+        {
+          jam();
+          
+          g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
+                                 sendersNodeId, nodePtr.p->phase);
+          return;
+        }
+      }
+
+      /* Start our own Connectivity Check now indicating reason and causing node */
+      startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
+    }
+  }
+  else
+  {
+    jam();
+    g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
+                           "checking not configured on this node.  Ensure all "
+                           "nodes have the same configuration for parameter "
+                           "ConnectCheckIntervalMillis.",
+                           sendersNodeId);
+  }
+
+  /* Now respond with NODE_PINGCONF */
+  NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
+  
+  pingConf->senderData = senderData;
+  pingConf->senderRef = reference();
+  
+  sendSignal(sendersRef,
+             GSN_NODE_PING_CONF,
+             signal,
+             NodePingConf::SignalLength,
+             JBA);
+}
+
+void
+Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
+{
+  /* Clear any suspicion */
+  m_nodesSuspect.clear(nodeId);
+}
+
+bool
+Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
+{
+  if (unlikely(m_active))
+  {
+    m_nodesFailedDuring.set(nodeId);
+    
+    if (m_nodesWaiting.get(nodeId))
+    {
+      /* We were waiting for a NODE_PING_CONF from this node,
+       * remove it from the set
+       */
+      m_nodesWaiting.clear(nodeId);
+      
+      return m_nodesWaiting.isclear();
+    }
+  }
+  return false;
+}
+
+void
+Qmgr::execNODE_PINGCONF(Signal* signal)
+{
+  jamEntry();
+  
+  ndbrequire(m_connectivity_check.m_timer.getDelay() > 0);
+
+  const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
+  Uint32 sendersBlockRef = signal->getSendersBlockRef();
+  Uint32 sendersNodeId = refToNode(sendersBlockRef);
+  Uint32 roundNumber = pingConf->senderData;
+
+  ndbrequire(sendersNodeId != getOwnNodeId());
+  ndbrequire((m_connectivity_check.m_active)                                || /* Normal */
+             (m_connectivity_check.m_nodesWaiting.get(sendersNodeId)          || /* We killed last round */
+              m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId)));     /* Someone killed */
+
+  if (unlikely((! m_connectivity_check.m_active) ||
+               (roundNumber != m_connectivity_check.m_currentRound)))
+  {
+    g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
+                           "but we are %sactive on round %u.  Discarding.",
+                           sendersNodeId,
+                           roundNumber,
+                           ((m_connectivity_check.m_active)?"":"in"),
+                           m_connectivity_check.m_currentRound);
+    return;
+  }
+
+  /* Node must have been pinged, we must be waiting for the response,
+   * or the node must have already failed
+   */
+  ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
+  ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) || 
+             m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
+  
+  m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
+
+  if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
+  {
+    jam();
+    /* Node responded on time, clear any suspicion about it */
+    m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
+  }
+  
+  if (m_connectivity_check.m_nodesWaiting.isclear())
+  {
+    jam();
+    /* Connectivity check round is now finished */
+    connectivityCheckCompleted(signal);
+  }
+}
+
+void
+Qmgr::connectivityCheckCompleted(Signal* signal)
+{
+  jam();
+  
+  m_connectivity_check.m_active = false;
+  
+  /* Log the following :
+   * Nodes checked
+   * Nodes responded ok
+   * Nodes responded late (now suspect)
+   * Nodes failed to respond.
+   * Nodes failed during
+   */
+  char pinged[100];
+  char late[100];
+  char silent[100];
+  char failed[100];
+  
+  /* Any 'waiting' nodes have been killed
+   * Surviving suspects do not include them.
+   */
+  NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
+  survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
+    
+  /* Nodes that failed during the check are also excluded */
+  survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
+
+  m_connectivity_check.m_nodesPinged.getText(pinged);
+  survivingSuspects.getText(late);
+  m_connectivity_check.m_nodesWaiting.getText(silent);
+  m_connectivity_check.m_nodesFailedDuring.getText(failed);
+  
+  g_eventLogger->info("QMGR : Connectivity check completed, "
+                      "%u other nodes checked (%s), "
+                      "%u responded on time, "
+                      "%u responded late (%s), "
+                      "%u no response will be failed (%s), "
+                      "%u failed during check (%s)\n",
+                      m_connectivity_check.m_nodesPinged.count(),
+                      pinged,
+                      m_connectivity_check.m_nodesPinged.count() - 
+                      m_connectivity_check.m_nodesSuspect.count(),
+                      survivingSuspects.count(),
+                      late,
+                      m_connectivity_check.m_nodesWaiting.count(),
+                      silent,
+                      m_connectivity_check.m_nodesFailedDuring.count(),
+                      failed);
+  
+  /* Log in Cluster log */
+  signal->theData[0] = NDB_LE_ConnectCheckCompleted;
+  signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
+  signal->theData[2] = survivingSuspects.count();
+  signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() + 
+    m_connectivity_check.m_nodesFailedDuring.count();
+  
+  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
+  
+  if (survivingSuspects.count() > 0)
+  {
+    jam();
+    /* Still suspect nodes, start another round */
+    g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
+    /* Restart connectivity check, no external reason or cause */
+    startConnectivityCheck(signal, 0, 0);
+  }
+  else
+  {
+    jam();
+    /* No suspect nodes, stop the protocol now */
+    
+    g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
+                        m_connectivity_check.m_nodesPinged.count() - 
+                        (m_connectivity_check.m_nodesWaiting.count() + 
+                         m_connectivity_check.m_nodesFailedDuring.count()));
+    
+    /* Send a heartbeat to our right neighbour at this point as a gesture
+     * of goodwill
+     */
+    sendHeartbeat(signal);
+    hb_send_timer.reset(NdbTick_CurrentMillisecond());
+  };
+}
+
+void
+Qmgr::checkConnectivityTimeSignal(Signal* signal)
+{
+  /* Executed periodically when a connectivity check is
+   * underway.
+   * After CC_SuspectTicks have elapsed, any nodes
+   * which have not responded are considered
+   * 'Suspect'.
+   * After CC_FailedTicks have elapsed, any nodes
+   * which have not responded are considered
+   * to have failed, and failure handling
+   * begins.
+   */
+  jam();
+  
+  /* Preconditions, otherwise we shouldn't have been called */
+  ndbrequire(m_connectivity_check.m_timer.getDelay() > 0);
+  ndbrequire(m_connectivity_check.m_active);
+  ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
+  
+  m_connectivity_check.m_tick++;
+
+  switch (m_connectivity_check.m_tick)
+  {
+  case CC_SuspectTicks:
+  {
+    jam();
+    /* Still waiting to hear from some nodes, they are now
+     * suspect
+     */
+    m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
+    return;
+  }
+  case CC_FailedTicks:
+  {
+    jam();
+    /* Still waiting to hear from some nodes, they will now
+     * be failed
+     */
+    m_connectivity_check.m_active = false;
+    Uint32 nodeId = 0;
+
+    while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId)) 
+           != BitmaskImpl::NotFound)
+    {
+      jam();
+      /* Log failure reason */
+      /* Todo : Connectivity Check specific failure log? */
+      signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
+      signal->theData[1] = nodeId;
+      
+      sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
+      
+      /* Fail the node */
+      /* TODO : Consider real time break here */
+      failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
+      nodeId++;
+    }
+  
+    /* Now handle the end of the Connectivity Check */
+    connectivityCheckCompleted(signal);
+  }
+  }
+}
+
+bool
+Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
+{
+  return m_connectivity_check.m_nodesSuspect.get(nodeId);
+}
+
+void 
+Qmgr::handleFailFromSuspect(Signal* signal,
+                            Uint32 reason,
+                            Uint16 aFailedNode,
+                            Uint16 sourceNode)
+{
+  jam();
+  
+  const char* reasonText = "Unknown";
+
+  /* We have received a failure report about some node X from 
+   * some other node that we consider to have suspect connectivity
+   * which may have caused the report.
+   * 
+   * We will 'invert' the sense of this, and handle it as
+   * a failure report of the sender, with the same cause.
+   */
+  switch(reason)
+  {
+  case FailRep::ZCONNECT_CHECK_FAILURE:
+    jam();
+    /* Suspect says that connectivity check failed for another node.
+     * As suspect has bad connectivity from our point of view, we 
+     * blame him.
+     */
+    reasonText = "ZCONNECT_CHECK_FAILURE";
+    break;
+  case FailRep::ZLINK_FAILURE:
+    jam();
+    /* Suspect says that link failed for another node.
+     * As suspect has bad connectivity from our point of view, we
+     * blame her.
+     */
+    reasonText = "ZLINK_FAILURE";
+    break;
+  default:
+    ndbrequire(false);
+  }
+  
+  g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
+                         "%u from suspect node %u with reason %s.  "
+                         "Mapping to failure of %u sourced by me.",
+                         aFailedNode, sourceNode, reasonText, sourceNode);
+  
+  signal->theData[0] = NDB_LE_NodeFailRejected;
+  signal->theData[1] = reason;
+  signal->theData[2] = aFailedNode;
+  signal->theData[3] = sourceNode;
+
+  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
+
+  failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
+}

=== modified file 'storage/ndb/src/mgmsrv/ConfigInfo.cpp'
--- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2010-12-02 11:02:29 +0000
+++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2010-12-15 13:47:15 +0000
@@ -987,6 +987,18 @@ const ConfigInfo::ParamInfo ConfigInfo::
     STR_VALUE(MAX_INT_RNIL) },
 
   {
+    CFG_DB_CONNECT_CHECK_DELAY,
+    "ConnectCheckIntervalDelay",
+    DB_TOKEN,
+    "Time between "DB_TOKEN_PRINT" connectivity check stages.  "DB_TOKEN_PRINT" considered suspect after 1 and dead after 2 intervals.",
+    ConfigInfo::CI_USED,
+    0,
+    ConfigInfo::CI_INT,
+    "1500",
+    "0",
+    STR_VALUE(MAX_INT_RNIL) },
+
+  {
     CFG_DB_API_HEARTBEAT_INTERVAL,
     "HeartbeatIntervalDbApi",
     DB_TOKEN,

=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp	2010-11-24 12:16:55 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp	2010-12-15 13:47:15 +0000
@@ -4180,6 +4180,513 @@ runBug58453(NDBT_Context* ctx, NDBT_Step
   return NDBT_OK;
 }
 
+
+
+int runRestartToDynamicOrder(NDBT_Context* ctx, NDBT_Step* step)
+{
+  /* Here we perform node restarts to get the various node's
+   * dynamic ids in a particular order
+   * This affects which nodes heartbeat which (low -> high)
+   * and which is the president (lowest).
+   * Each restarting node gets a higher dynamic id, so the
+   * first node restarted will eventually become president
+   * Note that we're assuming NoOfReplicas == 2 here.
+   */
+  /* TODO :
+   * Refactor into
+   *   1) Get current cluster dynorder info
+   *   2) Choose a dynorder info
+   *   3) Restart to given dynorder if necessary
+   */
+  Uint32 dynOrder = ctx->getProperty("DynamicOrder", Uint32(0));
+  NdbRestarter restarter;
+  Uint32 numNodes = restarter.getNumDbNodes();
+
+  Vector<Uint32> currOrder;
+  Vector<Uint32> newOrder;
+  Vector<Uint32> odds;
+  Vector<Uint32> evens;
+
+  if (numNodes == 2)
+  {
+    ndbout_c("No Dynamic reordering possible with 2 nodes");
+    return NDBT_OK;
+  }
+  if (numNodes & 1)
+  {
+    ndbout_c("Non multiple-of-2 number of nodes.  Not supported");
+    return NDBT_FAILED;
+  }
+
+  Uint32 master = restarter.getMasterNodeId();
+  
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    currOrder.push_back(master);
+    master = restarter.getNextMasterNodeId(master);
+  }
+
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    Uint32 nodeId = restarter.getDbNodeId(n);
+    if (nodeId & 1)
+    {
+      odds.push_back(nodeId);
+    }
+    else
+    {
+      evens.push_back(nodeId);
+    }
+  }
+  
+  if (odds.size() != evens.size())
+  {
+    ndbout_c("Failed - odds.size() (%u) != evens.size() (%u)",
+             odds.size(),
+             evens.size());
+    return NDBT_FAILED;
+  }
+
+  ndbout_c("Current dynamic ordering : ");
+  for (Uint32 n=0; n<numNodes; n++)
+  {
+    ndbout_c("  %u %s", currOrder[n], ((n==0)?"*":""));
+  }
+
+  if (dynOrder == 0)
+  {
+    ndbout_c("No change in dynamic order");
+    return NDBT_OK;
+  }
+
+  Uint32 control= dynOrder - 1;
+  
+  bool oddPresident = control & 1;
+  bool interleave = control & 2;
+  bool reverseSideA = control & 4;
+  bool reverseSideB = control & 8;
+
+  /*     Odds first    Interleave O/E  Reverse A  Reverse B
+   * 1       N              N              N         N
+   * 2       Y              N              N         N
+   * 3       N              Y              N         N
+   * 4       Y              Y              N         N
+   * 5       N              N              Y         N
+   * 6       Y              N              Y         N
+   * 7       N              Y              Y         N
+   * 8       Y              Y              Y         N
+   * 9       N              N              N         Y
+   * 10      Y              N              N         Y
+   * 11      N              Y              N         Y
+   * 12      Y              Y              N         Y
+   * 13      N              N              Y         Y
+   * 14      Y              N              Y         Y
+   * 15      N              Y              Y         Y
+   * 16      Y              Y              Y         Y
+   *
+   * Interesting values
+   *   1) Even first, no interleave, no reverse
+   *      e.g. 2->4->6->3->5->7
+   *   2) Odd first, no interleave, no reverse
+   *      e.g. 3->5->7->2->4->6
+   *   3) Even first, interleave, no reverse
+   *      e.g. 2->3->4->5->6->7
+   *   9) Even first, no interleave, reverse B
+   *      e.g. 2->4->6->7->5->3
+   * 
+   *  'First' node becomes president.
+   *  Which node(s) monitor president affects when 
+   *  arbitration may be required
+   */
+
+  ndbout_c("Generating ordering with %s president, sides %sinterleaved",
+           (oddPresident?"odd": "even"), 
+           (interleave?"":"not "));
+  if (reverseSideA)
+    ndbout_c("  %s reversed", (oddPresident?"odds": "evens"));
+
+    if (reverseSideB)
+    ndbout_c("  %s reversed", (oddPresident?"evens": "odds"));
+
+  Vector<Uint32>* sideA;
+  Vector<Uint32>* sideB;
+
+  if (oddPresident)
+  {
+    sideA = &odds;
+    sideB = &evens;
+  }
+  else
+  {
+    sideA = &evens;
+    sideB = &odds;
+  }
+  
+  if (interleave)
+  {
+    for (Uint32 n=0; n < sideA->size(); n++)
+    {
+      Uint32 indexA = reverseSideA? (sideA->size() - (n+1)) : n;
+      newOrder.push_back((*sideA)[indexA]);
+      Uint32 indexB = reverseSideB? (sideB->size() - (n+1)) : n;
+      newOrder.push_back((*sideB)[indexB]);
+    }
+  }
+  else
+  {
+    for (Uint32 n=0; n < sideA->size(); n++)
+    {
+      Uint32 indexA = reverseSideA? (sideA->size() - (n+1)) : n;
+      newOrder.push_back((*sideA)[indexA]);
+    }
+    for (Uint32 n=0; n < sideB->size(); n++)
+    {
+      Uint32 indexB = reverseSideB? (sideB->size() - (n+1)) : n;
+      newOrder.push_back((*sideB)[indexB]);
+    }
+  }
+
+
+  bool diff = false;
+  for (Uint32 n=0; n < newOrder.size(); n++)
+  {
+    ndbout_c("  %u %s",
+             newOrder[n], 
+             ((n==0)?"*":" "));
+    
+    diff |= (newOrder[n] != currOrder[n]);
+  }
+  
+  if (!diff)
+  {
+    ndbout_c("Cluster already in correct configuration");
+    return NDBT_OK;
+  }
+  
+  for (Uint32 n=0; n < newOrder.size(); n++)
+  {
+    ndbout_c("Now restarting node %u", newOrder[n]);
+    if (restarter.restartOneDbNode(newOrder[n],
+                                   false, // initial
+                                   true,  // nostart
+                                   true)  // abort
+        != NDBT_OK)
+    {
+      ndbout_c("Failed to restart node");
+      return NDBT_FAILED;
+    }
+    if (restarter.waitNodesNoStart((const int*) &newOrder[n], 1) != NDBT_OK)
+    {
+      ndbout_c("Failed waiting for node to enter NOSTART state");
+      return NDBT_FAILED;
+    }
+    if (restarter.startNodes((const int*) &newOrder[n], 1) != NDBT_OK)
+    {
+      ndbout_c("Failed to start node");
+      return NDBT_FAILED;
+    }
+    if (restarter.waitNodesStarted((const int*) &newOrder[n], 1) != NDBT_OK)
+    {
+      ndbout_c("Failed waiting for node to start");
+      return NDBT_FAILED;
+    }
+    ndbout_c("  Done.");
+  }
+  
+  ndbout_c("All restarts completed.  NdbRestarter says master is %u",
+           restarter.getMasterNodeId());
+  if (restarter.getMasterNodeId() != (int) newOrder[0])
+  {
+    ndbout_c("  Should be %u, failing", newOrder[0]);
+    return NDBT_FAILED;
+  }
+
+  return NDBT_OK;
+}
+
+struct NodeGroupMembers
+{
+  Uint32 ngid;
+  Uint32 membCount;
+  Uint32 members[4];
+};
+
+template class Vector<NodeGroupMembers>;
+
+int analyseDynamicOrder(NDBT_Context* ctx, NDBT_Step* step)
+{
+  NdbRestarter restarter;
+  Uint32 numNodes = restarter.getNumDbNodes();
+  Uint32 master = restarter.getMasterNodeId();
+  Vector<Uint32> dynamicOrder;
+  Vector<Uint32> nodeGroup;
+  Vector<Uint32> monitorsNode;
+  Vector<Uint32> monitoredByNode;
+  Vector<Uint32> monitorsRemote;
+  Vector<Uint32> remoteMonitored;
+  Vector<Uint32> sameNGMonitored;
+  Vector<Uint32> distanceToRemote;
+  Vector<Uint32> nodeIdToDynamicIndex;
+  Uint32 maxDistanceToRemoteLink = 0;
+
+  /* TODO :
+   * Refactor into :
+   *   1) Determine dynorder from running cluster
+   *   2) Analyse dynorder in general
+   *   3) Analyse dynorder from point of view of latency split
+   *
+   *   4) Support splits other than odd/even total
+   *      - Partial split
+   *      - Some link failures
+   */
+  
+  /* Determine dynamic order from running cluster */
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    dynamicOrder.push_back(master);
+    nodeGroup.push_back(restarter.getNodeGroup(master));
+    master = restarter.getNextMasterNodeId(master);
+    Uint32 zero=0;
+    nodeIdToDynamicIndex.set(n, master, zero);
+  }
+  
+  /* Look at implied HB links */
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    Uint32 nodeId = dynamicOrder[n];
+    Uint32 monitoredByIndex = (n+1) % numNodes;
+    Uint32 monitorsIndex = (n+ numNodes - 1) % numNodes;
+    monitoredByNode.push_back(dynamicOrder[monitoredByIndex]);
+    monitorsNode.push_back(dynamicOrder[monitorsIndex]);
+    remoteMonitored.push_back((nodeId & 1) != (monitoredByNode[n] & 1));
+    monitorsRemote.push_back((nodeId & 1) != (monitorsNode[n] & 1));
+    sameNGMonitored.push_back(nodeGroup[n] == nodeGroup[monitoredByIndex]);
+  }
+
+  /* Look at split implications */
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    Uint32 distanceToRemoteHBLink = 0;
+    for (Uint32 m=0; m < numNodes; m++)
+    {
+      if (remoteMonitored[n+m])
+        break;
+      distanceToRemoteHBLink++;
+    }
+    
+    distanceToRemote.push_back(distanceToRemoteHBLink);
+    maxDistanceToRemoteLink = MAX(maxDistanceToRemoteLink, distanceToRemoteHBLink);
+  }
+
+  
+  ndbout_c("Dynamic order analysis");
+  
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    ndbout_c("  %u %s %u%s%u%s%u \t Monitored by %s nodegroup, Dist to remote link : %u",
+             dynamicOrder[n],
+             ((n==0)?"*":" "),
+             monitorsNode[n],
+             ((monitorsRemote[n])?"  >":"-->"),
+             dynamicOrder[n],
+             ((remoteMonitored[n])?"  >":"-->"),
+             monitoredByNode[n],
+             ((sameNGMonitored[n])?"same":"other"),
+             distanceToRemote[n]);
+  }
+
+  ndbout_c("\n");
+
+  Vector<NodeGroupMembers> nodeGroupMembers;
+  
+  for (Uint32 n=0; n < numNodes; n++)
+  {
+    Uint32 ng = nodeGroup[n];
+
+    bool ngfound = false;
+    for (Uint32 m = 0; m < nodeGroupMembers.size(); m++)
+    {
+      if (nodeGroupMembers[m].ngid == ng)
+      {
+        NodeGroupMembers& ngInfo = nodeGroupMembers[m];
+        ngInfo.members[ngInfo.membCount++] = dynamicOrder[n];
+        ngfound = true;
+        break;
+      }
+    }
+
+    if (!ngfound)
+    {
+      NodeGroupMembers newGroupInfo;
+      newGroupInfo.ngid = ng;
+      newGroupInfo.membCount = 1;
+      newGroupInfo.members[0] = dynamicOrder[n];
+      nodeGroupMembers.push_back(newGroupInfo);
+    }
+  }
+    
+  ndbout_c("Nodegroups");
+  
+  for (Uint32 n=0; n < nodeGroupMembers.size(); n++)
+  {
+    ndbout << "  " << nodeGroupMembers[n].ngid << " (";
+    bool allRemoteMonitored = true;
+    for (Uint32 m=0; m < nodeGroupMembers[n].membCount; m++)
+    {
+      Uint32 nodeId = nodeGroupMembers[n].members[m];
+      ndbout << nodeId;
+      if ((m+1) < nodeGroupMembers[n].membCount)
+        ndbout << ",";
+      Uint32 dynamicIndex = nodeIdToDynamicIndex[nodeId];
+      allRemoteMonitored &= remoteMonitored[dynamicIndex];
+    }
+    ndbout << ") Entirely remote monitored NGs risk : " 
+           << (allRemoteMonitored?"Y":"N") << "\n";
+  }
+  ndbout_c("\n");
+
+  ndbout_c("Cluster-split latency behaviour");
+
+  Uint32 oddPresident = dynamicOrder[0];
+  Uint32 evenPresident = dynamicOrder[0];
+
+  for (Uint32 n=0; n <= maxDistanceToRemoteLink; n++)
+  {
+    Vector<Uint32> failedNodeGroups;
+    ndbout << "  " << n <<" HB latency period(s), nodes (";
+    bool useComma = false;
+    bool presidentFailed = false;
+    for (Uint32 m=0; m < numNodes; m++)
+    {
+      if (distanceToRemote[m] == n)
+      {
+        Uint32 failingNodeId = dynamicOrder[m];
+        if (useComma)
+          ndbout << ",";
+
+        useComma = true;
+        ndbout << failingNodeId;
+
+        if ((failingNodeId == evenPresident) ||
+            (failingNodeId == oddPresident))
+        {
+          ndbout << "*";
+          presidentFailed = true;
+        }
+        
+        {
+          Uint32 ng = nodeGroup[m];
+          for (Uint32 i=0; i< nodeGroupMembers.size(); i++)
+          {
+            if (nodeGroupMembers[i].ngid == ng)
+            {
+              if ((--nodeGroupMembers[i].membCount) == 0)
+              {
+                failedNodeGroups.push_back(ng);
+              }
+            }
+          }
+        }
+      }
+    }
+    ndbout << ") will be declared failed." << endl;
+    if (failedNodeGroups.size() != 0)
+    {
+      ndbout << "    NG failure risk on reconnect for nodegroups : ";
+      for (Uint32 i=0; i< failedNodeGroups.size(); i++)
+      {
+        if (i > 0)
+          ndbout << ",";
+        ndbout << failedNodeGroups[i];
+      }
+      ndbout << endl;
+    }
+    if (presidentFailed)
+    {
+      /* A president (even/odd/both) has failed, we should
+       * calculate the new president(s) from the p.o.v.
+       * of both sides
+       */
+      Uint32 newOdd=0;
+      Uint32 newEven=0;
+      for (Uint32 i=0; i< numNodes; i++)
+      {
+        /* Each side finds either the first node on their
+         * side, or the first node on the other side which
+         * is still 'alive' from their point of view
+         */
+        bool candidateIsOdd = dynamicOrder[i] & 1;
+        
+        if (!newOdd)
+        {
+          if (candidateIsOdd ||
+              (distanceToRemote[i] > n))
+          {
+            newOdd = dynamicOrder[i];
+          }
+        }
+        if (!newEven)
+        {
+          if ((!candidateIsOdd) ||
+              (distanceToRemote[i] > n))
+          {
+            newEven = dynamicOrder[i];
+          }
+        }
+      }
+      
+      bool oddPresidentFailed = (oddPresident != newOdd);
+      bool evenPresidentFailed = (evenPresident != newEven);
+      
+      if (oddPresidentFailed)
+      {
+        ndbout_c("    Odd president (%u) failed, new odd president : %u",
+                 oddPresident, newOdd);
+        oddPresident = newOdd;
+      }
+      if (evenPresidentFailed)
+      {
+        ndbout_c("    Even president (%u) failed, new even president : %u",
+                 evenPresident, newEven);
+        evenPresident = newEven;
+      }
+      
+      if (oddPresident != evenPresident)
+      {
+        ndbout_c("    President role duplicated, Odd (%u), Even (%u)",
+                 oddPresident, evenPresident);
+      }
+        
+    }
+  }
+
+  ndbout << endl << endl;
+
+  return NDBT_OK;
+}
+
+int runSplitLatency25PctFail(NDBT_Context* ctx, NDBT_Step* step)
+{
+  /* Use dump commands to inject artificial inter-node latency
+   * Use an error insert to cause latency to disappear when
+   * a node observes > 25% of nodes failed.
+   * This should trigger a race of FAIL_REQs from both sides
+   * of the cluster, and can result in cluster failure
+   */
+  NdbRestarter restarter;
+
+  /* First the error insert which will drop latency (QMGR) */
+  restarter.insertErrorInAllNodes(938);
+
+  /* Now the dump code which causes the system to experience
+   * latency along odd/even lines (CMVMI)
+   */
+  int dumpStateArgs[] = {9990, 1};
+  restarter.dumpStateAllNodes(dumpStateArgs, 2);
+
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -4715,6 +5222,14 @@ TESTCASE("ForceStopAndRestart", "Test re
 {
   STEP(runForceStopAndRestart);
 }
+TESTCASE("ClusterSplitLatency",
+         "Test behaviour of 2-replica cluster with latency between halves")
+{
+  TC_PROPERTY("DynamicOrder", Uint32(9));
+  INITIALIZER(runRestartToDynamicOrder);
+  INITIALIZER(analyseDynamicOrder);
+  INITIALIZER(runSplitLatency25PctFail);
+}
 NDBT_TESTSUITE_END(testNodeRestart);
 
 int main(int argc, const char** argv){


Attachment: [text/bzr-bundle] bzr/frazer@mysql.com-20101215134715-ktx5r9r5hz7pyru7.bundle
Thread
bzr commit into mysql-5.1-telco-7.0 branch (frazer:4062) Frazer Clement15 Dec