4022 Frazer Clement 2010-12-15
Commit of Connect Check functionality
Default for ConnectCheckIntervalDelay set to 0.
Modify CMakelists.txt for Windows
added:
storage/ndb/include/kernel/signaldata/NodePing.hpp
storage/ndb/src/common/debugger/signaldata/NodePing.cpp
modified:
storage/ndb/include/kernel/GlobalSignalNumbers.h
storage/ndb/include/kernel/signaldata/FailRep.hpp
storage/ndb/include/kernel/signaldata/SignalData.hpp
storage/ndb/include/mgmapi/mgmapi_config_parameters.h
storage/ndb/include/mgmapi/ndb_logevent.h
storage/ndb/include/transporter/TransporterRegistry.hpp
storage/ndb/src/common/debugger/EventLogger.cpp
storage/ndb/src/common/debugger/signaldata/CMakeLists.txt
storage/ndb/src/common/debugger/signaldata/Makefile.am
storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp
storage/ndb/src/common/transporter/TransporterRegistry.cpp
storage/ndb/src/kernel/blocks/ERROR_codes.txt
storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
storage/ndb/src/mgmsrv/ConfigInfo.cpp
storage/ndb/test/ndbapi/testNodeRestart.cpp
4021 Craig L Russell 2010-12-14
Fix bug in varchar silently truncating data too long to fit in column
modified:
storage/ndb/clusterj/clusterj-tie/src/main/java/com/mysql/clusterj/tie/Utility.java
storage/ndb/clusterj/clusterj-tie/src/main/resources/com/mysql/clusterj/tie/Bundle.properties
=== modified file 'storage/ndb/include/kernel/GlobalSignalNumbers.h'
--- a/storage/ndb/include/kernel/GlobalSignalNumbers.h 2010-10-20 07:12:58 +0000
+++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h 2010-12-15 13:50:17 +0000
@@ -25,7 +25,7 @@
*
* When adding a new signal, remember to update MAX_GSN and SignalNames.cpp
*/
-const GlobalSignalNumber MAX_GSN = 768;
+const GlobalSignalNumber MAX_GSN = 770;
struct GsnName {
GlobalSignalNumber gsn;
@@ -1075,4 +1075,8 @@ extern const GlobalSignalNumber NO_OF_SI
#define GSN_RELEASE_PAGES_REQ 680
#define GSN_RELEASE_PAGES_CONF 681
+/* NODE_PING signals */
+#define GSN_NODE_PING_REQ 769 /* distr. */
+#define GSN_NODE_PING_CONF 770 /* distr. */
+
#endif
=== modified file 'storage/ndb/include/kernel/signaldata/FailRep.hpp'
--- a/storage/ndb/include/kernel/signaldata/FailRep.hpp 2010-12-13 16:43:31 +0000
+++ b/storage/ndb/include/kernel/signaldata/FailRep.hpp 2010-12-15 13:50:17 +0000
@@ -52,7 +52,8 @@ public:
ZLINK_FAILURE=5,
ZOTHERNODE_FAILED_DURING_START=6,
ZMULTI_NODE_SHUTDOWN = 7,
- ZPARTITIONED_CLUSTER = 8
+ ZPARTITIONED_CLUSTER = 8,
+ ZCONNECT_CHECK_FAILURE = 9
};
Uint32 getFailSourceNodeId(Uint32 sigLen) const
=== added file 'storage/ndb/include/kernel/signaldata/NodePing.hpp'
--- a/storage/ndb/include/kernel/signaldata/NodePing.hpp 1970-01-01 00:00:00 +0000
+++ b/storage/ndb/include/kernel/signaldata/NodePing.hpp 2010-12-15 13:50:17 +0000
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef NODEPING_HPP
+#define NODEPING_HPP
+
+#include "SignalData.hpp"
+
+
+/*
+ * NodePingReq/Conf is sent between QMGR nodes to help determine the
+ * available connectivity in a cluster experiencing heartbeat problems
+ *
+ * When a node detects that it has not received a heartbeat from a
+ * connected node for the heartbeat period, it initiates a global
+ * connectivity check protocol by sending a NODE_PING_REQ signal to all
+ * nodes considered to be running.
+ *
+ * On receiving this signal, a node will respond with NODE_PING_CONF to
+ * the sender, and begin its own connectivity check, if it is not
+ * already involved in one.
+ *
+ * In this way, all nodes reachable within some latency n will begin
+ * a connectivity check. If they do not receive a NODE_PING_CONF from a
+ * peer node within some further latency m, then they consider it to
+ * be suspect, and after a further latency p they consider it failed.
+ *
+ * In environments where latency between nodes fluctuates, but
+ * connectivity is maintained (for example where TCP connections observe
+ * latency due to underlying IP re-routing/failover), the connectivity
+ * check allows nodes to arm themselves in preparation for the potential
+ * race of FAIL_REP signals that can arise in these situations, by marking
+ * connections experiencing latency as SUSPECT. Once a node is marked as
+ * SUSPECT, FAIL_REP signals originating from it may not be trusted or
+ * acted upon.
+ */
+
+class NodePingReq {
+ /**
+ * Sender(s) / Receiver(s)
+ */
+ friend class Qmgr;
+public:
+ STATIC_CONST( SignalLength = 2 );
+
+ Uint32 senderData;
+ Uint32 senderRef;
+};
+
+class NodePingConf {
+ /**
+ * Sender(s) / Receiver(s)
+ */
+ friend class Qmgr;
+public:
+ STATIC_CONST( SignalLength = 2 );
+
+ Uint32 senderData;
+ Uint32 senderRef;
+};
+
+#endif
=== modified file 'storage/ndb/include/kernel/signaldata/SignalData.hpp'
--- a/storage/ndb/include/kernel/signaldata/SignalData.hpp 2010-08-17 09:54:53 +0000
+++ b/storage/ndb/include/kernel/signaldata/SignalData.hpp 2010-12-15 13:50:17 +0000
@@ -305,4 +305,7 @@ GSN_PRINT_SIGNATURE(printLOCAL_ROUTE_ORD
GSN_PRINT_SIGNATURE(printDBINFO_SCAN);
GSN_PRINT_SIGNATURE(printDBINFO_SCAN_REF);
+GSN_PRINT_SIGNATURE(printNODE_PING_REQ);
+GSN_PRINT_SIGNATURE(printNODE_PING_CONF);
+
#endif
=== modified file 'storage/ndb/include/mgmapi/mgmapi_config_parameters.h'
--- a/storage/ndb/include/mgmapi/mgmapi_config_parameters.h 2010-12-02 11:02:29 +0000
+++ b/storage/ndb/include/mgmapi/mgmapi_config_parameters.h 2010-12-15 13:50:17 +0000
@@ -179,6 +179,8 @@
#define CFG_DB_EVENTLOG_BUFFER_SIZE 613
#define CFG_DB_NUMA 614
+#define CFG_DB_CONNECT_CHECK_DELAY 615
+
#define CFG_NODE_ARBIT_RANK 200
#define CFG_NODE_ARBIT_DELAY 201
#define CFG_RESERVED_SEND_BUFFER_MEMORY 202
=== modified file 'storage/ndb/include/mgmapi/ndb_logevent.h'
--- a/storage/ndb/include/mgmapi/ndb_logevent.h 2010-10-13 12:22:51 +0000
+++ b/storage/ndb/include/mgmapi/ndb_logevent.h 2010-12-15 13:50:17 +0000
@@ -118,6 +118,12 @@ extern "C" {
NDB_LE_LCP_TakeoverStarted = 33,
/** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
NDB_LE_LCP_TakeoverCompleted = 34,
+ /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
+ NDB_LE_ConnectCheckStarted = 82,
+ /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
+ NDB_LE_ConnectCheckCompleted = 83,
+ /** NDB_MGM_EVENT_CATEGORY_NODE_RESTART */
+ NDB_LE_NodeFailRejected = 84,
/** NDB_MGM_EVENT_CATEGORY_STATISTIC */
NDB_LE_TransReportCounters = 35,
@@ -478,6 +484,21 @@ extern "C" {
struct ndb_logevent_LCP_TakeoverCompleted {
unsigned state;
};
+ struct ndb_logevent_ConnectCheckStarted {
+ unsigned other_node_count;
+ unsigned reason;
+ unsigned causing_node;
+ };
+ struct ndb_logevent_ConnectCheckCompleted {
+ unsigned nodes_checked;
+ unsigned nodes_suspect;
+ unsigned nodes_failed;
+ };
+ struct ndb_logevent_NodeFailRejected {
+ unsigned reason;
+ unsigned failed_node;
+ unsigned source_node;
+ };
/* STATISTIC */
struct ndb_logevent_TransReportCounters {
@@ -834,6 +855,9 @@ extern "C" {
struct ndb_logevent_GCP_TakeoverCompleted GCP_TakeoverCompleted;
struct ndb_logevent_LCP_TakeoverStarted LCP_TakeoverStarted;
struct ndb_logevent_LCP_TakeoverCompleted LCP_TakeoverCompleted;
+ struct ndb_logevent_ConnectCheckStarted ConnectCheckStarted;
+ struct ndb_logevent_ConnectCheckCompleted ConnectCheckCompleted;
+ struct ndb_logevent_NodeFailRejected NodeFailRejected;
/* STATISTIC */
struct ndb_logevent_TransReportCounters TransReportCounters;
=== modified file 'storage/ndb/include/transporter/TransporterRegistry.hpp'
--- a/storage/ndb/include/transporter/TransporterRegistry.hpp 2010-11-09 20:40:03 +0000
+++ b/storage/ndb/include/transporter/TransporterRegistry.hpp 2010-12-15 13:50:17 +0000
@@ -325,6 +325,13 @@ public:
#ifdef DEBUG_TRANSPORTER
void printState();
#endif
+
+#ifdef ERROR_INSERT
+ /* Utils for testing latency issues */
+ bool isBlocked(NodeId nodeId);
+ void blockReceive(NodeId nodeId);
+ void unblockReceive(NodeId nodeId);
+#endif
class Transporter_interface {
public:
@@ -355,6 +362,13 @@ private:
int nSCITransporters;
int nSHMTransporters;
+#ifdef ERROR_INSERT
+ Bitmask<MAX_NTRANSPORTERS/32> m_blocked;
+ Bitmask<MAX_NTRANSPORTERS/32> m_blocked_with_data;
+ Bitmask<MAX_NTRANSPORTERS/32> m_blocked_disconnected;
+ int m_disconnect_errors[MAX_NTRANSPORTERS];
+#endif
+
/**
* Bitmask of transporters that has data "carried over" since
* last performReceive
=== modified file 'storage/ndb/src/common/debugger/EventLogger.cpp'
--- a/storage/ndb/src/common/debugger/EventLogger.cpp 2010-10-13 12:22:51 +0000
+++ b/storage/ndb/src/common/debugger/EventLogger.cpp 2010-12-15 13:50:17 +0000
@@ -24,6 +24,7 @@
#include <NdbConfig.h>
#include <kernel/BlockNumbers.h>
#include <signaldata/ArbitSignalData.hpp>
+#include <signaldata/FailRep.hpp>
#include <NodeState.hpp>
#include <version.h>
#include <ndb_version.h>
@@ -1181,6 +1182,141 @@ void getTextSavedEvent(QQQQ)
abort();
}
+void getTextConnectCheckStarted(QQQQ)
+{
+ /* EventReport format :
+ * 1 : other_node_count
+ * 2 : reason (FailRep causes or 0)
+ * 3 : causing_node (if from FailRep)
+ * 4 : bitmask wordsize
+ * 5 : bitmask[2]
+ */
+ Uint32 other_node_count = theData[1];
+ Uint32 reason = theData[2];
+ Uint32 causing_node = theData[3];
+ Uint32 bitmaskSz = theData[4];
+ char otherNodeMask[100];
+ char suspectNodeMask[100];
+ BitmaskImpl::getText(bitmaskSz, theData + 5 + (0 * bitmaskSz), otherNodeMask);
+ BitmaskImpl::getText(bitmaskSz, theData + 5 + (1 * bitmaskSz), suspectNodeMask);
+ Uint32 suspectCount = BitmaskImpl::count(bitmaskSz, theData + 5 + (1 * bitmaskSz));
+
+ if (reason)
+ {
+ /* Connect check started for specific reason */
+ const char * reasonText = NULL;
+ switch (reason)
+ {
+ case FailRep::ZHEARTBEAT_FAILURE:
+ reasonText = "Heartbeat failure";
+ break;
+ case FailRep::ZCONNECT_CHECK_FAILURE:
+ reasonText = "Connectivity check request";
+ break;
+ default:
+ reasonText = "UNKNOWN";
+ break;
+ }
+
+ BaseString::snprintf(m_text, m_text_len,
+ "Connectivity Check of %u other nodes (%s) started due to %s from node %u.",
+ other_node_count,
+ otherNodeMask,
+ reasonText,
+ causing_node);
+ }
+ else
+ {
+ /* Connect check restarted due to suspect nodes */
+ BaseString::snprintf(m_text, m_text_len,
+ "Connectivity Check of %u nodes (%s) restarting due to %u suspect nodes (%s).",
+ other_node_count,
+ otherNodeMask,
+ suspectCount,
+ suspectNodeMask);
+ }
+}
+
+void getTextConnectCheckCompleted(QQQQ)
+{
+ /* EventReport format
+ * 1 : Nodes checked
+ * 2 : Suspect nodes
+ * 3 : Failed nodes
+ */
+
+ Uint32 nodes_checked = theData[1];
+ Uint32 suspect_nodes = theData[2];
+ Uint32 failed_nodes = theData[3];
+
+ if ((failed_nodes + suspect_nodes) == 0)
+ {
+ /* All connectivity ok */
+ BaseString::snprintf(m_text, m_text_len,
+ "Connectivity Check completed on %u nodes, connectivity ok",
+ nodes_checked);
+ }
+ else
+ {
+ if (failed_nodes > 0)
+ {
+ if (suspect_nodes > 0)
+ {
+ BaseString::snprintf(m_text, m_text_len,
+ "Connectivity Check completed on %u nodes. %u nodes failed. "
+ "%u nodes still suspect, repeating check.",
+ nodes_checked,
+ failed_nodes,
+ suspect_nodes);
+ }
+ else
+ {
+ BaseString::snprintf(m_text, m_text_len,
+ "Connectivity Check completed on %u nodes. %u nodes failed. "
+ "Connectivity now OK",
+ nodes_checked,
+ failed_nodes);
+ }
+ }
+ else
+ {
+ /* Just suspect nodes */
+ BaseString::snprintf(m_text, m_text_len,
+ "Connectivity Check completed on %u nodes. %u nodes still suspect, "
+ "repeating check.",
+ nodes_checked,
+ suspect_nodes);
+ }
+ }
+}
+
+void getTextNodeFailRejected(QQQQ)
+{
+ Uint32 reason = theData[1];
+ Uint32 failed_node = theData[2];
+ Uint32 source_node = theData[3];
+
+ const char* reasonText = "Unknown";
+ switch (reason)
+ {
+ case FailRep::ZCONNECT_CHECK_FAILURE:
+ reasonText = "Connect Check Failure";
+ break;
+ case FailRep::ZLINK_FAILURE:
+ reasonText = "Link Failure";
+ break;
+ }
+
+ BaseString::snprintf(m_text, m_text_len,
+ "Received FAIL_REP (%s (%u)) for node %u sourced by suspect node %u. "
+ "Rejecting as failure of node %u.",
+ reasonText,
+ reason,
+ failed_node,
+ source_node,
+ source_node);
+}
+
#if 0
BaseString::snprintf(m_text,
m_text_len,
@@ -1254,6 +1390,10 @@ const EventLoggerBase::EventRepLogLevelM
ROW(LCP_TakeoverStarted, LogLevel::llNodeRestart, 7, Logger::LL_INFO ),
ROW(LCP_TakeoverCompleted, LogLevel::llNodeRestart, 7, Logger::LL_INFO ),
+ ROW(ConnectCheckStarted, LogLevel::llNodeRestart, 6, Logger::LL_INFO ),
+ ROW(ConnectCheckCompleted, LogLevel::llNodeRestart, 6, Logger::LL_INFO ),
+ ROW(NodeFailRejected, LogLevel::llNodeRestart, 6, Logger::LL_ALERT ),
+
// STATISTIC
ROW(TransReportCounters, LogLevel::llStatistic, 8, Logger::LL_INFO ),
ROW(OperationReportCounters, LogLevel::llStatistic, 8, Logger::LL_INFO ),
=== modified file 'storage/ndb/src/common/debugger/signaldata/CMakeLists.txt'
--- a/storage/ndb/src/common/debugger/signaldata/CMakeLists.txt 2010-11-17 11:35:04 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/CMakeLists.txt 2010-12-15 13:50:17 +0000
@@ -44,5 +44,5 @@ ADD_LIBRARY(ndbsignaldata STATIC
LqhTrans.cpp ReadNodesConf.cpp CntrStart.cpp
ScanFrag.cpp ApiVersion.cpp
LocalRouteOrd.cpp
- DbinfoScan.cpp)
+ DbinfoScan.cpp NodePing.cpp)
=== modified file 'storage/ndb/src/common/debugger/signaldata/Makefile.am'
--- a/storage/ndb/src/common/debugger/signaldata/Makefile.am 2010-08-06 08:19:19 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/Makefile.am 2010-12-15 13:50:17 +0000
@@ -47,7 +47,7 @@ libsignaldataprint_la_SOURCES = \
CreateTrigImpl.cpp DropTrigImpl.cpp \
CreateIndxImpl.cpp DropIndxImpl.cpp AlterIndxImpl.cpp \
BuildIndx.cpp BuildIndxImpl.cpp ApiVersion.cpp \
- LocalRouteOrd.cpp DbinfoScan.cpp
+ LocalRouteOrd.cpp DbinfoScan.cpp NodePing.cpp
include $(top_srcdir)/storage/ndb/config/common.mk.am
include $(top_srcdir)/storage/ndb/config/type_ndbapi.mk.am
=== added file 'storage/ndb/src/common/debugger/signaldata/NodePing.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/NodePing.cpp 1970-01-01 00:00:00 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/NodePing.cpp 2010-12-15 13:50:17 +0000
@@ -0,0 +1,38 @@
+/*
+ Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <signaldata/NodePing.hpp>
+
+bool
+printNODE_PING_REQ(FILE * output, const Uint32 * theData,
+ Uint32 len, Uint16 receiverBlockNo) {
+ const NodePingReq * const sig = CAST_CONSTPTR(NodePingReq, theData);
+ fprintf(output, " senderRef : %x round : %u\n",
+ sig->senderRef,
+ sig->senderData);
+ return true;
+}
+
+bool
+printNODE_PING_CONF(FILE * output, const Uint32 * theData,
+ Uint32 len, Uint16 receiverBlockNo) {
+ const NodePingConf * const sig = CAST_CONSTPTR(NodePingConf, theData);
+ fprintf(output, " senderRef : %x round : %u\n",
+ sig->senderRef,
+ sig->senderData);
+ return true;
+}
=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp 2010-08-27 17:07:19 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp 2010-12-15 13:50:17 +0000
@@ -257,6 +257,9 @@ SignalDataPrintFunctions[] = {
,{ GSN_DBINFO_SCANCONF, printDBINFO_SCAN }
,{ GSN_DBINFO_SCANREF, printDBINFO_SCAN_REF }
+ ,{ GSN_NODE_PING_REQ, printNODE_PING_REQ }
+ ,{ GSN_NODE_PING_CONF, printNODE_PING_CONF }
+
,{ 0, 0 }
};
=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalNames.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2010-10-20 07:12:58 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2010-12-15 13:50:17 +0000
@@ -761,5 +761,8 @@ const GsnName SignalNames [] = {
,{ GSN_SYNC_PATH_REQ, "SYNC_PATH_REQ" }
,{ GSN_SYNC_PATH_CONF, "SYNC_PATH_CONF" }
+
+ ,{ GSN_NODE_PING_REQ, "NODE_PING_REQ" }
+ ,{ GSN_NODE_PING_CONF, "NODE_PING_CONF" }
};
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
=== modified file 'storage/ndb/src/common/transporter/TransporterRegistry.cpp'
--- a/storage/ndb/src/common/transporter/TransporterRegistry.cpp 2010-11-09 20:40:03 +0000
+++ b/storage/ndb/src/common/transporter/TransporterRegistry.cpp 2010-12-15 13:50:17 +0000
@@ -128,6 +128,11 @@ TransporterRegistry::TransporterRegistry
}
#endif
+#ifdef ERROR_INSERT
+ m_blocked.clear();
+ m_blocked_with_data.clear();
+ m_blocked_disconnected.clear();
+#endif
// Initialize member variables
nTransporters = 0;
nTCPTransporters = 0;
@@ -665,6 +670,16 @@ TransporterRegistry::prepareSend(Transpo
return SEND_MESSAGE_TOO_BIG;
}
} else {
+#ifdef ERROR_INSERT
+ if (m_blocked.get(nodeId))
+ {
+ /* Looks like it disconnected while blocked. We'll pretend
+ * not to notice for now
+ */
+ WARNING("Signal to " << nodeId << " discarded as node blocked + disconnected");
+ return SEND_OK;
+ }
+#endif
DEBUG("Signal to " << nodeId << " lost(disconnect) ");
return SEND_DISCONNECTED;
}
@@ -738,6 +753,16 @@ TransporterRegistry::prepareSend(Transpo
return SEND_MESSAGE_TOO_BIG;
}
} else {
+#ifdef ERROR_INSERT
+ if (m_blocked.get(nodeId))
+ {
+ /* Looks like it disconnected while blocked. We'll pretend
+ * not to notice for now
+ */
+ WARNING("Signal to " << nodeId << " discarded as node blocked + disconnected");
+ return SEND_OK;
+ }
+#endif
DEBUG("Signal to " << nodeId << " lost(disconnect) ");
return SEND_DISCONNECTED;
}
@@ -951,6 +976,15 @@ TransporterRegistry::pollReceive(Uint32
{
for (int i = 0; i < num_socket_events; i++)
{
+ Uint32 trpid = m_epoll_events[i].data.u32;
+#ifdef ERROR_INSERT
+ if (m_blocked.get(trpid))
+ {
+ /* Don't pull from socket now, wait till unblocked */
+ m_blocked_with_data.set(trpid);
+ continue;
+ }
+#endif
mask.set(m_epoll_events[i].data.u32);
}
}
@@ -1090,6 +1124,14 @@ TransporterRegistry::poll_TCP(Uint32 tim
if (idx[i] != MAX_NODES + 1)
{
Uint32 node_id = t->getRemoteNodeId();
+#ifdef ERROR_INSERT
+ if (m_blocked.get(i))
+ {
+ /* Don't pull from socket now, wait till unblocked */
+ m_blocked_with_data.set(i);
+ continue;
+ }
+#endif
if (m_socket_poller.has_read(idx[i]))
mask.set(node_id);
}
@@ -1170,6 +1212,19 @@ TransporterRegistry::performReceive()
consume_extra_sockets();
}
+#ifdef ERROR_INSERT
+ if (!m_blocked.isclear())
+ {
+ if (m_has_data_transporters.isclear())
+ {
+ /* poll sees data, but we want to ignore for now
+ * sleep a little to avoid busy loop
+ */
+ NdbSleep_MilliSleep(1);
+ }
+ }
+#endif
+
#ifdef NDB_TCP_TRANSPORTER
Uint32 id = 0;
while ((id = m_has_data_transporters.find(id + 1)) != BitmaskImpl::NotFound)
@@ -1360,6 +1415,59 @@ TransporterRegistry::printState(){
}
#endif
+#ifdef ERROR_INSERT
+bool
+TransporterRegistry::isBlocked(NodeId nodeId)
+{
+ return m_blocked.get(nodeId);
+}
+
+void
+TransporterRegistry::blockReceive(NodeId nodeId)
+{
+ /* Check that node is not already blocked?
+ * Stop pulling from its socket (but track received data etc)
+ */
+ /* Shouldn't already be blocked with data */
+ assert(!m_blocked.get(nodeId));
+
+ m_blocked.set(nodeId);
+
+ if (m_has_data_transporters.get(nodeId))
+ {
+ assert(!m_blocked_with_data.get(nodeId));
+ m_blocked_with_data.set(nodeId);
+ m_has_data_transporters.clear(nodeId);
+ }
+}
+
+void
+TransporterRegistry::unblockReceive(NodeId nodeId)
+{
+ /* Check that node is blocked?
+ * Resume pulling from its socket
+ * Ensure in-flight data is processed if there was some
+ */
+ assert(m_blocked.get(nodeId));
+ assert(!m_has_data_transporters.get(nodeId));
+
+ m_blocked.clear(nodeId);
+
+ if (m_blocked_with_data.get(nodeId))
+ {
+ m_has_data_transporters.set(nodeId);
+ }
+
+ if (m_blocked_disconnected.get(nodeId))
+ {
+ /* Process disconnect notification/handling now */
+ m_blocked_disconnected.clear(nodeId);
+
+ report_disconnect(nodeId, m_disconnect_errors[nodeId]);
+ }
+}
+#endif
+
IOState
TransporterRegistry::ioState(NodeId nodeId) {
return ioStates[nodeId];
@@ -1479,6 +1587,19 @@ TransporterRegistry::report_disconnect(N
{
DBUG_ENTER("TransporterRegistry::report_disconnect");
DBUG_PRINT("info",("performStates[%d]=DISCONNECTED",node_id));
+
+#ifdef ERROR_INSERT
+ if (m_blocked.get(node_id))
+ {
+ /* We are simulating real latency, so control events experience
+ * it too
+ */
+ m_blocked_disconnected.set(node_id);
+ m_disconnect_errors[node_id] = errnum;
+ DBUG_VOID_RETURN;
+ }
+#endif
+
performStates[node_id] = DISCONNECTED;
m_has_data_transporters.clear(node_id);
callbackObj->reportDisconnect(node_id, errnum);
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2010-12-04 11:20:36 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2010-12-15 13:50:17 +0000
@@ -28,6 +28,8 @@ Crash president when he starts to run in
935 : Crash master on node failure (delayed)
and skip sending GSN_COMMIT_FAILREQ to specified node
+938 : Resume communications (DUMP 9991) when > 25% nodes failed
+
ERROR CODES FOR TESTING NODE FAILURE, GLOBAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
=== modified file 'storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp'
--- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 2010-11-10 10:14:04 +0000
+++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 2010-12-15 13:50:17 +0000
@@ -1781,6 +1781,108 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal
#endif
#endif
+#ifdef ERROR_INSERT
+ /* <Target NodeId> dump 9992 <NodeId list>
+ * On Target NodeId, block receiving signals from NodeId list
+ *
+ * <Target NodeId> dump 9993 <NodeId list>
+ * On Target NodeId, resume receiving signals from NodeId list
+ *
+ * <Target NodeId> dump 9991
+ * On Target NodeId, resume receiving signals from any blocked node
+ *
+ *
+ * See also code in QMGR for blocking receive from nodes based
+ * on HB roles.
+ *
+ */
+ if((arg == 9993) || /* Unblock recv from nodeid */
+ (arg == 9992)) /* Block recv from nodeid */
+ {
+ bool block = (arg == 9992);
+ for (Uint32 n = 1; n < signal->getLength(); n++)
+ {
+ Uint32 nodeId = signal->theData[n];
+
+ if ((nodeId > 0) &&
+ (nodeId < MAX_NODES))
+ {
+ if (block)
+ {
+ ndbout_c("CMVMI : Blocking receive from node %u", nodeId);
+
+ globalTransporterRegistry.blockReceive(nodeId);
+ }
+ else
+ {
+ ndbout_c("CMVMI : Unblocking receive from node %u", nodeId);
+
+ globalTransporterRegistry.unblockReceive(nodeId);
+ }
+ }
+ else
+ {
+ ndbout_c("CMVMI : Ignoring dump %u for node %u",
+ arg, nodeId);
+ }
+ }
+ }
+ if (arg == 9990) /* Block recv from all ndbd matching pattern */
+ {
+ Uint32 pattern = 0;
+ if (signal->getLength() > 1)
+ {
+ pattern = signal->theData[1];
+ ndbout_c("CMVMI : Blocking receive from all ndbds matching pattern -%s-",
+ ((pattern == 1)? "Other side":"Unknown"));
+ }
+
+ for (Uint32 node = 1; node < MAX_NDB_NODES; node++)
+ {
+ if (globalTransporterRegistry.is_connected(node))
+ {
+ if (getNodeInfo(node).m_type == NodeInfo::DB)
+ {
+ if (!globalTransporterRegistry.isBlocked(node))
+ {
+ switch (pattern)
+ {
+ case 1:
+ {
+ /* Match if given node is on 'other side' of
+ * 2-replica cluster
+ */
+ if ((getOwnNodeId() & 1) != (node & 1))
+ {
+ /* Node is on the 'other side', match */
+ break;
+ }
+ /* Node is on 'my side', don't match */
+ continue;
+ }
+ default:
+ break;
+ }
+ ndbout_c("CMVMI : Blocking receive from node %u", node);
+ globalTransporterRegistry.blockReceive(node);
+ }
+ }
+ }
+ }
+ }
+ if (arg == 9991) /* Unblock recv from all blocked */
+ {
+ for (Uint32 node = 0; node < MAX_NODES; node++)
+ {
+ if (globalTransporterRegistry.isBlocked(node))
+ {
+ ndbout_c("CMVMI : Unblocking receive from node %u", node);
+ globalTransporterRegistry.unblockReceive(node);
+ }
+ }
+ }
+#endif
+
if (arg == 9999)
{
Uint32 delay = 1000;
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2010-12-13 15:34:50 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2010-12-15 13:50:17 +0000
@@ -147,7 +147,38 @@ public:
NdbNodeBitmask c_readnodes_nodes;
Uint32 c_maxDynamicId;
-
+
+ struct ConnectCheckRec
+ {
+ bool m_active; // Connectivity check underway?
+ Timer m_timer; // Check timer object
+ Uint32 m_currentRound; // Last round started
+ Uint32 m_tick; // Periods elapsed in current check
+ NdbNodeBitmask m_nodesPinged; // Nodes sent a NodePingReq in round
+ NdbNodeBitmask m_nodesWaiting; // Nodes which have not sent a response
+ NdbNodeBitmask m_nodesFailedDuring; // Nodes which failed during check
+ NdbNodeBitmask m_nodesSuspect; // Nodes with suspect connectivity
+
+ ConnectCheckRec()
+ {
+ m_active = false;
+ m_currentRound = 0;
+ m_tick = 0;
+ m_nodesPinged.clear();
+ m_nodesWaiting.clear();
+ m_nodesFailedDuring.clear();
+ m_nodesSuspect.clear();
+ }
+
+ void reportNodeConnect(Uint32 nodeId);
+ /* reportNodeFailure.
+ * return code true means the connect check is completed
+ */
+ bool reportNodeFailure(Uint32 nodeId);
+ };
+
+ ConnectCheckRec m_connectivity_check;
+
// Records
struct NodeRec {
/*
@@ -319,6 +350,10 @@ private:
void execUPGRADE_PROTOCOL_ORD(Signal*);
+ // Connectivity check signals
+ void execNODE_PINGREQ(Signal* signal);
+ void execNODE_PINGCONF(Signal* signal);
+
// Statement blocks
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
Uint32 check_startup(Signal* signal);
@@ -377,6 +412,7 @@ private:
void setHbDelay(UintR aHbDelay);
void setHbApiDelay(UintR aHbApiDelay);
void setArbitTimeout(UintR aArbitTimeout);
+ void setCCDelay(UintR aCCDelay);
// Interface to arbitration module
void handleArbitStart(Signal* signal);
@@ -400,6 +436,17 @@ private:
void computeArbitNdbMask(NdbNodeBitmaskPOD& aMask);
void reportArbitEvent(Signal* signal, Ndb_logevent_type type,
const NodeBitmask mask = NodeBitmask());
+
+ // Interface to Connectivity Check
+ void startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 node);
+ void checkConnectivityTimeSignal(Signal* signal);
+ void connectivityCheckCompleted(Signal* signal);
+ bool isNodeConnectivitySuspect(Uint32 nodeId) const;
+ void handleFailFromSuspect(Signal* signal,
+ Uint32 reason,
+ Uint16 aFailedNode,
+ Uint16 sourceNode);
+
// Initialisation
void initData();
void initRecords();
@@ -511,6 +558,10 @@ private:
// user-defined hbOrder must set all values non-zero and distinct
int check_hb_order_config();
bool m_hb_order_config_used;
+
+#ifdef ERROR_INSERT
+ Uint32 nodeFailCount;
+#endif
};
#endif
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2010-06-16 16:56:34 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2010-12-15 13:50:17 +0000
@@ -70,6 +70,10 @@ void Qmgr::initData()
ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI);
setHbApiDelay(hbDBAPI);
+
+#ifdef ERROR_INSERT
+ nodeFailCount = 0;
+#endif
}//Qmgr::initData()
void Qmgr::initRecords()
@@ -146,6 +150,10 @@ Qmgr::Qmgr(Block_context& ctx)
addRecSignal(GSN_UPGRADE_PROTOCOL_ORD, &Qmgr::execUPGRADE_PROTOCOL_ORD);
+ // Connectivity check signals
+ addRecSignal(GSN_NODE_PING_REQ, &Qmgr::execNODE_PINGREQ);
+ addRecSignal(GSN_NODE_PING_CONF, &Qmgr::execNODE_PINGCONF);
+
initData();
}//Qmgr::Qmgr()
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2010-12-13 16:52:27 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2010-12-15 13:50:17 +0000
@@ -39,6 +39,7 @@
#include <signaldata/Upgrade.hpp>
#include <signaldata/EnableCom.hpp>
#include <signaldata/RouteOrd.hpp>
+#include <signaldata/NodePing.hpp>
#include <ndb_version.h>
@@ -437,6 +438,21 @@ void Qmgr::setArbitTimeout(UintR aArbitT
arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
}
+void Qmgr::setCCDelay(UintR aCCDelay)
+{
+ NDB_TICKS now = NdbTick_CurrentMillisecond();
+ if (aCCDelay == 0)
+ {
+ /* Connectivity check disabled */
+ m_connectivity_check.m_timer.setDelay(0);
+ }
+ else
+ {
+ m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
+ m_connectivity_check.m_timer.reset(now);
+ }
+}
+
void Qmgr::execCONNECT_REP(Signal* signal)
{
jamEntry();
@@ -1991,6 +2007,7 @@ void Qmgr::execCM_ADD(Signal* signal)
jam();
ndbrequire(addNodePtr.p->phase == ZSTARTING);
addNodePtr.p->phase = ZRUNNING;
+ m_connectivity_check.reportNodeConnect(addNodePtr.i);
setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0;
c_clusterNodes.set(addNodePtr.i);
findNeighbours(signal, __LINE__);
@@ -2389,6 +2406,7 @@ void Qmgr::initData(Signal* signal)
Uint32 hbDBDB = 1500;
Uint32 arbitTimeout = 1000;
Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
+ Uint32 ccInterval = 0;
c_restartPartialTimeout = 30000;
c_restartPartionedTimeout = 60000;
c_restartFailureTimeout = ~0;
@@ -2401,6 +2419,8 @@ void Qmgr::initData(Signal* signal)
&c_restartPartionedTimeout);
ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
&c_restartFailureTimeout);
+ ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
+ &ccInterval);
if(c_restartPartialTimeout == 0)
{
@@ -2418,6 +2438,7 @@ void Qmgr::initData(Signal* signal)
}
setHbDelay(hbDBDB);
+ setCCDelay(ccInterval);
setArbitTimeout(arbitTimeout);
arbitRec.method = (ArbitRec::Method)arbitMethod;
@@ -2536,10 +2557,22 @@ void Qmgr::timerHandlingLab(Signal* sign
sendHeartbeat(signal);
hb_send_timer.reset(TcurrentTime);
}
- if (hb_check_timer.check(TcurrentTime)) {
- jam();
- checkHeartbeat(signal);
- hb_check_timer.reset(TcurrentTime);
+ if (likely(! m_connectivity_check.m_active))
+ {
+ if (hb_check_timer.check(TcurrentTime)) {
+ jam();
+ checkHeartbeat(signal);
+ hb_check_timer.reset(TcurrentTime);
+ }
+ }
+ else
+ {
+ /* Connectivity check */
+ if (m_connectivity_check.m_timer.check(TcurrentTime)) {
+ jam();
+ checkConnectivityTimeSignal(signal);
+ m_connectivity_check.m_timer.reset(TcurrentTime);
+ }
}
}
@@ -2644,16 +2677,26 @@ void Qmgr::checkHeartbeat(Signal* signal
if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) {
jam();
- /**----------------------------------------------------------------------
- * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
- * PERIODS. THUS WE DECLARE HIM DOWN.
- *----------------------------------------------------------------------*/
- signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
- signal->theData[1] = nodePtr.i;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
- failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
- return;
+ if (m_connectivity_check.m_timer.getDelay() > 0)
+ {
+ jam();
+ /* Start connectivity check, indicating the cause */
+ startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
+ return;
+ }
+ else
+ {
+ /**----------------------------------------------------------------------
+ * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
+ * PERIODS. THUS WE DECLARE HIM DOWN.
+ *----------------------------------------------------------------------*/
+ signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
+ signal->theData[1] = nodePtr.i;
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
+
+ failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
+ return;
+ }
}//if
}//Qmgr::checkHeartbeat()
@@ -3574,6 +3617,24 @@ void Qmgr::failReportLab(Signal* signal,
jam();
return;
}
+
+ if (isNodeConnectivitySuspect(sourceNode) &&
+ // (! isNodeConnectivitySuspect(aFailedNode)) && // TODO : Required?
+ ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
+ (aFailCause == FailRep::ZLINK_FAILURE)))
+ {
+ jam();
+ /* Connectivity related failure report from a node with suspect
+ * connectivity, handle differently
+ */
+ ndbrequire(sourceNode != getOwnNodeId());
+
+ handleFailFromSuspect(signal,
+ aFailCause,
+ aFailedNode,
+ sourceNode);
+ return;
+ }
if (failedNodePtr.i == getOwnNodeId()) {
jam();
@@ -3630,6 +3691,9 @@ void Qmgr::failReportLab(Signal* signal,
case FailRep::ZMULTI_NODE_SHUTDOWN:
msg = "Multi node shutdown";
break;
+ case FailRep::ZCONNECT_CHECK_FAILURE:
+ msg = "Connectivity check failure";
+ break;
default:
msg = "<UNKNOWN>";
}
@@ -4485,6 +4549,34 @@ void Qmgr::failReport(Signal* signal,
ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
if (failedNodePtr.p->phase == ZRUNNING) {
jam();
+
+#ifdef ERROR_INSERT
+ if (ERROR_INSERTED(938))
+ {
+ nodeFailCount++;
+ ndbout_c("QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount);
+ /* Count DB nodes */
+ Uint32 nodeCount = 0;
+ for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
+ {
+ if (getNodeInfo(i).getType() == NODE_TYPE_DB)
+ nodeCount++;
+ }
+
+ /* When > 25% of cluster has failed, resume communications */
+ if (nodeFailCount > (nodeCount / 4))
+ {
+ ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
+ Signal save = *signal;
+ signal->theData[0] = 9991;
+ sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+ *signal = save;
+ nodeFailCount = 0;
+ CLEAR_ERROR_INSERT_VALUE;
+ }
+ }
+#endif
+
/* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
if (cpresident == getOwnNodeId()) {
jam();
@@ -4534,6 +4626,13 @@ void Qmgr::failReport(Signal* signal,
jam();
return;
}//if
+
+ if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
+ {
+ jam();
+ connectivityCheckCompleted(signal);
+ }
+
failedNodePtr.p->ndynamicId = 0;
findNeighbours(signal, __LINE__);
if (failedNodePtr.i == cpresident) {
@@ -5810,6 +5909,55 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
}
ndbout << buf << endl;
}
+
+#ifdef ERROR_INSERT
+ Uint32 dumpCode = signal->theData[0];
+ if ((dumpCode == 9992) ||
+ (dumpCode == 9993))
+ {
+ if (signal->getLength() == 2)
+ {
+ Uint32 nodeId = signal->theData[1];
+ Uint32& newNodeId = signal->theData[1];
+ Uint32 length = 2;
+ assert(257 > MAX_NODES);
+ if (nodeId > MAX_NODES)
+ {
+ const char* type = "None";
+ switch (nodeId)
+ {
+ case 257:
+ {
+ /* Left (lower) neighbour */
+ newNodeId = cneighbourl;
+ type = "Left neighbour";
+ break;
+ }
+ case 258:
+ {
+ /* Right (higher) neighbour */
+ newNodeId = cneighbourh;
+ type = "Right neighbour";
+ break;
+ }
+ case 259:
+ {
+ /* President */
+ newNodeId = cpresident;
+ type = "President";
+ break;
+ }
+ }
+ ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
+ nodeId, newNodeId, type);
+ if (newNodeId != nodeId)
+ {
+ sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
+ }
+ }
+ }
+ }
+#endif
}//Qmgr::execDUMP_STATE_ORD()
@@ -6206,3 +6354,491 @@ Qmgr::check_hb_order_config()
m_hb_order_config_used = true;
return 0;
}
+
+static const Uint32 CC_SuspectTicks = 1;
+static const Uint32 CC_FailedTicks = 2;
+
+void
+Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
+{
+ jam();
+ ndbrequire(m_connectivity_check.m_timer.getDelay() > 0);
+
+ if (m_connectivity_check.m_active)
+ {
+ jam();
+ /* Connectivity check underway already
+ * do nothing
+ */
+ return;
+ }
+
+
+ m_connectivity_check.m_nodesPinged.clear();
+
+ /* Send NODE_PINGREQ signal to all other running nodes, and
+ * initialise connectivity check bitmasks.
+ * Note that nodes may already be considered suspect due to
+ * a previous connectivity check round.
+ */
+ Uint32 ownId = getOwnNodeId();
+ NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
+ pingReq->senderData = ++m_connectivity_check.m_currentRound;
+ pingReq->senderRef = reference();
+
+ for (Uint32 i=1; i < MAX_NDB_NODES; i++)
+ {
+ if (i != ownId)
+ {
+ NodeRec& node = nodeRec[i];
+ if (node.phase == ZRUNNING)
+ {
+ /* If connection was considered ok, treat as unknown,
+ * If it was considered slow, continue to treat
+ * as slow
+ */
+ sendSignal(node.blockRef,
+ GSN_NODE_PING_REQ,
+ signal,
+ NodePingReq::SignalLength,
+ JBA);
+
+ m_connectivity_check.m_nodesPinged.set(i);
+ }
+ }
+ }
+
+ /* Initialise result bitmasks */
+ m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
+ m_connectivity_check.m_nodesFailedDuring.clear();
+
+ /* Ensure only live nodes are considered suspect */
+ m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
+
+ const char* reasonText = "Unknown";
+ bool firstTime = true;
+
+ switch(reason)
+ {
+ case FailRep::ZHEARTBEAT_FAILURE:
+ reasonText = "Heartbeat failure";
+ break;
+ case FailRep::ZCONNECT_CHECK_FAILURE:
+ reasonText = "Connectivity check request";
+ break;
+ default:
+ firstTime = false;
+ ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
+ break;
+ }
+
+ if (!m_connectivity_check.m_nodesPinged.isclear())
+ {
+ jam();
+ {
+ char buff[100];
+ m_connectivity_check.m_nodesPinged.getText(buff);
+ if (firstTime)
+ {
+ g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
+ m_connectivity_check.m_nodesPinged.count(),
+ buff,
+ reasonText,
+ causingNode);
+ }
+ else
+ {
+ char buff2[100];
+ m_connectivity_check.m_nodesSuspect.getText(buff2);
+ g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
+ m_connectivity_check.m_nodesPinged.count(),
+ buff,
+ m_connectivity_check.m_nodesSuspect.count(),
+ buff2);
+ }
+ }
+
+ /* Generate cluster log event */
+ Uint32 bitmaskSz = NdbNodeBitmask::Size;
+ signal->theData[0] = NDB_LE_ConnectCheckStarted;
+ signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
+ signal->theData[2] = reason;
+ signal->theData[3] = causingNode;
+ signal->theData[4] = bitmaskSz;
+ Uint32* sigPtr = &signal->theData[5];
+ m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
+ m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
+
+ m_connectivity_check.m_active = true;
+ m_connectivity_check.m_tick = 0;
+ NDB_TICKS now = NdbTick_CurrentMillisecond();
+ m_connectivity_check.m_timer.reset(now);
+ }
+ else
+ {
+ g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
+ reasonText,
+ causingNode);
+ }
+}
+
+void
+Qmgr::execNODE_PINGREQ(Signal* signal)
+{
+ jamEntry();
+ Uint32 ownId = getOwnNodeId();
+ const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
+ Uint32 sendersRef = signal->getSendersBlockRef();
+ Uint32 sendersNodeId = refToNode(sendersRef);
+ Uint32 senderData = pingReq->senderData;
+
+ ndbrequire(sendersNodeId != ownId);
+
+ /* We will start our own connectivity check if necessary
+ * before responding with PING_CONF to the requestor.
+ * This means that the sending node will receive our PING_REQ
+ * before our PING_CONF, which should avoid them starting an
+ * unnecessary extra connectivity check round in some cases.
+ */
+ if (likely(m_connectivity_check.m_timer.getDelay() > 0))
+ {
+ jam();
+ /* We have connectivity checking configured */
+ if (! m_connectivity_check.m_active)
+ {
+ jam();
+
+ {
+ /* Don't start a new connectivity check if the requesting
+ * node has failed from our point of view
+ */
+ NodeRecPtr nodePtr;
+ nodePtr.i = sendersNodeId;
+ ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
+ if (unlikely(nodePtr.p->phase != ZRUNNING))
+ {
+ jam();
+
+ g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
+ sendersNodeId, nodePtr.p->phase);
+ return;
+ }
+ }
+
+ /* Start our own Connectivity Check now indicating reason and causing node */
+ startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
+ }
+ }
+ else
+ {
+ jam();
+ g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
+ "checking not configured on this node. Ensure all "
+ "nodes have the same configuration for parameter "
+ "ConnectCheckIntervalMillis.",
+ sendersNodeId);
+ }
+
+ /* Now respond with NODE_PINGCONF */
+ NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
+
+ pingConf->senderData = senderData;
+ pingConf->senderRef = reference();
+
+ sendSignal(sendersRef,
+ GSN_NODE_PING_CONF,
+ signal,
+ NodePingConf::SignalLength,
+ JBA);
+}
+
+void
+Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
+{
+ /* Clear any suspicion */
+ m_nodesSuspect.clear(nodeId);
+}
+
+bool
+Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
+{
+ if (unlikely(m_active))
+ {
+ m_nodesFailedDuring.set(nodeId);
+
+ if (m_nodesWaiting.get(nodeId))
+ {
+ /* We were waiting for a NODE_PING_CONF from this node,
+ * remove it from the set
+ */
+ m_nodesWaiting.clear(nodeId);
+
+ return m_nodesWaiting.isclear();
+ }
+ }
+ return false;
+}
+
+void
+Qmgr::execNODE_PINGCONF(Signal* signal)
+{
+ jamEntry();
+
+ ndbrequire(m_connectivity_check.m_timer.getDelay() > 0);
+
+ const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
+ Uint32 sendersBlockRef = signal->getSendersBlockRef();
+ Uint32 sendersNodeId = refToNode(sendersBlockRef);
+ Uint32 roundNumber = pingConf->senderData;
+
+ ndbrequire(sendersNodeId != getOwnNodeId());
+ ndbrequire((m_connectivity_check.m_active) || /* Normal */
+ (m_connectivity_check.m_nodesWaiting.get(sendersNodeId) || /* We killed last round */
+ m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId))); /* Someone killed */
+
+ if (unlikely((! m_connectivity_check.m_active) ||
+ (roundNumber != m_connectivity_check.m_currentRound)))
+ {
+ g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
+ "but we are %sactive on round %u. Discarding.",
+ sendersNodeId,
+ roundNumber,
+ ((m_connectivity_check.m_active)?"":"in"),
+ m_connectivity_check.m_currentRound);
+ return;
+ }
+
+ /* Node must have been pinged, we must be waiting for the response,
+ * or the node must have already failed
+ */
+ ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
+ ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
+ m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
+
+ m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
+
+ if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
+ {
+ jam();
+ /* Node responded on time, clear any suspicion about it */
+ m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
+ }
+
+ if (m_connectivity_check.m_nodesWaiting.isclear())
+ {
+ jam();
+ /* Connectivity check round is now finished */
+ connectivityCheckCompleted(signal);
+ }
+}
+
+void
+Qmgr::connectivityCheckCompleted(Signal* signal)
+{
+ jam();
+
+ m_connectivity_check.m_active = false;
+
+ /* Log the following :
+ * Nodes checked
+ * Nodes responded ok
+ * Nodes responded late (now suspect)
+ * Nodes failed to respond.
+ * Nodes failed during
+ */
+ char pinged[100];
+ char late[100];
+ char silent[100];
+ char failed[100];
+
+ /* Any 'waiting' nodes have been killed
+ * Surviving suspects do not include them.
+ */
+ NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
+ survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
+
+ /* Nodes that failed during the check are also excluded */
+ survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
+
+ m_connectivity_check.m_nodesPinged.getText(pinged);
+ survivingSuspects.getText(late);
+ m_connectivity_check.m_nodesWaiting.getText(silent);
+ m_connectivity_check.m_nodesFailedDuring.getText(failed);
+
+ g_eventLogger->info("QMGR : Connectivity check completed, "
+ "%u other nodes checked (%s), "
+ "%u responded on time, "
+ "%u responded late (%s), "
+ "%u no response will be failed (%s), "
+ "%u failed during check (%s)\n",
+ m_connectivity_check.m_nodesPinged.count(),
+ pinged,
+ m_connectivity_check.m_nodesPinged.count() -
+ m_connectivity_check.m_nodesSuspect.count(),
+ survivingSuspects.count(),
+ late,
+ m_connectivity_check.m_nodesWaiting.count(),
+ silent,
+ m_connectivity_check.m_nodesFailedDuring.count(),
+ failed);
+
+ /* Log in Cluster log */
+ signal->theData[0] = NDB_LE_ConnectCheckCompleted;
+ signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
+ signal->theData[2] = survivingSuspects.count();
+ signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
+ m_connectivity_check.m_nodesFailedDuring.count();
+
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
+
+ if (survivingSuspects.count() > 0)
+ {
+ jam();
+ /* Still suspect nodes, start another round */
+ g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
+ /* Restart connectivity check, no external reason or cause */
+ startConnectivityCheck(signal, 0, 0);
+ }
+ else
+ {
+ jam();
+ /* No suspect nodes, stop the protocol now */
+
+ g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
+ m_connectivity_check.m_nodesPinged.count() -
+ (m_connectivity_check.m_nodesWaiting.count() +
+ m_connectivity_check.m_nodesFailedDuring.count()));
+
+ /* Send a heartbeat to our right neighbour at this point as a gesture
+ * of goodwill
+ */
+ sendHeartbeat(signal);
+ hb_send_timer.reset(NdbTick_CurrentMillisecond());
+ };
+}
+
+void
+Qmgr::checkConnectivityTimeSignal(Signal* signal)
+{
+ /* Executed periodically when a connectivity check is
+ * underway.
+ * After CC_SuspectTicks have elapsed, any nodes
+ * which have not responded are considered
+ * 'Suspect'.
+ * After CC_FailedTicks have elapsed, any nodes
+ * which have not responded are considered
+ * to have failed, and failure handling
+ * begins.
+ */
+ jam();
+
+ /* Preconditions, otherwise we shouldn't have been called */
+ ndbrequire(m_connectivity_check.m_timer.getDelay() > 0);
+ ndbrequire(m_connectivity_check.m_active);
+ ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
+
+ m_connectivity_check.m_tick++;
+
+ switch (m_connectivity_check.m_tick)
+ {
+ case CC_SuspectTicks:
+ {
+ jam();
+ /* Still waiting to hear from some nodes, they are now
+ * suspect
+ */
+ m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
+ return;
+ }
+ case CC_FailedTicks:
+ {
+ jam();
+ /* Still waiting to hear from some nodes, they will now
+ * be failed
+ */
+ m_connectivity_check.m_active = false;
+ Uint32 nodeId = 0;
+
+ while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
+ != BitmaskImpl::NotFound)
+ {
+ jam();
+ /* Log failure reason */
+ /* Todo : Connectivity Check specific failure log? */
+ signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
+ signal->theData[1] = nodeId;
+
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
+
+ /* Fail the node */
+ /* TODO : Consider real time break here */
+ failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
+ nodeId++;
+ }
+
+ /* Now handle the end of the Connectivity Check */
+ connectivityCheckCompleted(signal);
+ }
+ }
+}
+
+bool
+Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
+{
+ return m_connectivity_check.m_nodesSuspect.get(nodeId);
+}
+
+void
+Qmgr::handleFailFromSuspect(Signal* signal,
+ Uint32 reason,
+ Uint16 aFailedNode,
+ Uint16 sourceNode)
+{
+ jam();
+
+ const char* reasonText = "Unknown";
+
+ /* We have received a failure report about some node X from
+ * some other node that we consider to have suspect connectivity
+ * which may have caused the report.
+ *
+ * We will 'invert' the sense of this, and handle it as
+ * a failure report of the sender, with the same cause.
+ */
+ switch(reason)
+ {
+ case FailRep::ZCONNECT_CHECK_FAILURE:
+ jam();
+ /* Suspect says that connectivity check failed for another node.
+ * As suspect has bad connectivity from our point of view, we
+ * blame him.
+ */
+ reasonText = "ZCONNECT_CHECK_FAILURE";
+ break;
+ case FailRep::ZLINK_FAILURE:
+ jam();
+ /* Suspect says that link failed for another node.
+ * As suspect has bad connectivity from our point of view, we
+ * blame her.
+ */
+ reasonText = "ZLINK_FAILURE";
+ break;
+ default:
+ ndbrequire(false);
+ }
+
+ g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
+ "%u from suspect node %u with reason %s. "
+ "Mapping to failure of %u sourced by me.",
+ aFailedNode, sourceNode, reasonText, sourceNode);
+
+ signal->theData[0] = NDB_LE_NodeFailRejected;
+ signal->theData[1] = reason;
+ signal->theData[2] = aFailedNode;
+ signal->theData[3] = sourceNode;
+
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
+
+ failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
+}
=== modified file 'storage/ndb/src/mgmsrv/ConfigInfo.cpp'
--- a/storage/ndb/src/mgmsrv/ConfigInfo.cpp 2010-12-02 11:02:29 +0000
+++ b/storage/ndb/src/mgmsrv/ConfigInfo.cpp 2010-12-15 13:50:17 +0000
@@ -987,6 +987,18 @@ const ConfigInfo::ParamInfo ConfigInfo::
STR_VALUE(MAX_INT_RNIL) },
{
+ CFG_DB_CONNECT_CHECK_DELAY,
+ "ConnectCheckIntervalDelay",
+ DB_TOKEN,
+ "Time between "DB_TOKEN_PRINT" connectivity check stages. "DB_TOKEN_PRINT" considered suspect after 1 and dead after 2 intervals.",
+ ConfigInfo::CI_USED,
+ 0,
+ ConfigInfo::CI_INT,
+ "0",
+ "0",
+ STR_VALUE(MAX_INT_RNIL) },
+
+ {
CFG_DB_API_HEARTBEAT_INTERVAL,
"HeartbeatIntervalDbApi",
DB_TOKEN,
=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2010-11-24 12:16:55 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2010-12-15 13:50:17 +0000
@@ -4180,6 +4180,513 @@ runBug58453(NDBT_Context* ctx, NDBT_Step
return NDBT_OK;
}
+
+
+int runRestartToDynamicOrder(NDBT_Context* ctx, NDBT_Step* step)
+{
+ /* Here we perform node restarts to get the various node's
+ * dynamic ids in a particular order
+ * This affects which nodes heartbeat which (low -> high)
+ * and which is the president (lowest).
+ * Each restarting node gets a higher dynamic id, so the
+ * first node restarted will eventually become president
+ * Note that we're assuming NoOfReplicas == 2 here.
+ */
+ /* TODO :
+ * Refactor into
+ * 1) Get current cluster dynorder info
+ * 2) Choose a dynorder info
+ * 3) Restart to given dynorder if necessary
+ */
+ Uint32 dynOrder = ctx->getProperty("DynamicOrder", Uint32(0));
+ NdbRestarter restarter;
+ Uint32 numNodes = restarter.getNumDbNodes();
+
+ Vector<Uint32> currOrder;
+ Vector<Uint32> newOrder;
+ Vector<Uint32> odds;
+ Vector<Uint32> evens;
+
+ if (numNodes == 2)
+ {
+ ndbout_c("No Dynamic reordering possible with 2 nodes");
+ return NDBT_OK;
+ }
+ if (numNodes & 1)
+ {
+ ndbout_c("Non multiple-of-2 number of nodes. Not supported");
+ return NDBT_FAILED;
+ }
+
+ Uint32 master = restarter.getMasterNodeId();
+
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ currOrder.push_back(master);
+ master = restarter.getNextMasterNodeId(master);
+ }
+
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ Uint32 nodeId = restarter.getDbNodeId(n);
+ if (nodeId & 1)
+ {
+ odds.push_back(nodeId);
+ }
+ else
+ {
+ evens.push_back(nodeId);
+ }
+ }
+
+ if (odds.size() != evens.size())
+ {
+ ndbout_c("Failed - odds.size() (%u) != evens.size() (%u)",
+ odds.size(),
+ evens.size());
+ return NDBT_FAILED;
+ }
+
+ ndbout_c("Current dynamic ordering : ");
+ for (Uint32 n=0; n<numNodes; n++)
+ {
+ ndbout_c(" %u %s", currOrder[n], ((n==0)?"*":""));
+ }
+
+ if (dynOrder == 0)
+ {
+ ndbout_c("No change in dynamic order");
+ return NDBT_OK;
+ }
+
+ Uint32 control= dynOrder - 1;
+
+ bool oddPresident = control & 1;
+ bool interleave = control & 2;
+ bool reverseSideA = control & 4;
+ bool reverseSideB = control & 8;
+
+ /* Odds first Interleave O/E Reverse A Reverse B
+ * 1 N N N N
+ * 2 Y N N N
+ * 3 N Y N N
+ * 4 Y Y N N
+ * 5 N N Y N
+ * 6 Y N Y N
+ * 7 N Y Y N
+ * 8 Y Y Y N
+ * 9 N N N Y
+ * 10 Y N N Y
+ * 11 N Y N Y
+ * 12 Y Y N Y
+ * 13 N N Y Y
+ * 14 Y N Y Y
+ * 15 N Y Y Y
+ * 16 Y Y Y Y
+ *
+ * Interesting values
+ * 1) Even first, no interleave, no reverse
+ * e.g. 2->4->6->3->5->7
+ * 2) Odd first, no interleave, no reverse
+ * e.g. 3->5->7->2->4->6
+ * 3) Even first, interleave, no reverse
+ * e.g. 2->3->4->5->6->7
+ * 9) Even first, no interleave, reverse B
+ * e.g. 2->4->6->7->5->3
+ *
+ * 'First' node becomes president.
+ * Which node(s) monitor president affects when
+ * arbitration may be required
+ */
+
+ ndbout_c("Generating ordering with %s president, sides %sinterleaved",
+ (oddPresident?"odd": "even"),
+ (interleave?"":"not "));
+ if (reverseSideA)
+ ndbout_c(" %s reversed", (oddPresident?"odds": "evens"));
+
+ if (reverseSideB)
+ ndbout_c(" %s reversed", (oddPresident?"evens": "odds"));
+
+ Vector<Uint32>* sideA;
+ Vector<Uint32>* sideB;
+
+ if (oddPresident)
+ {
+ sideA = &odds;
+ sideB = &evens;
+ }
+ else
+ {
+ sideA = &evens;
+ sideB = &odds;
+ }
+
+ if (interleave)
+ {
+ for (Uint32 n=0; n < sideA->size(); n++)
+ {
+ Uint32 indexA = reverseSideA? (sideA->size() - (n+1)) : n;
+ newOrder.push_back((*sideA)[indexA]);
+ Uint32 indexB = reverseSideB? (sideB->size() - (n+1)) : n;
+ newOrder.push_back((*sideB)[indexB]);
+ }
+ }
+ else
+ {
+ for (Uint32 n=0; n < sideA->size(); n++)
+ {
+ Uint32 indexA = reverseSideA? (sideA->size() - (n+1)) : n;
+ newOrder.push_back((*sideA)[indexA]);
+ }
+ for (Uint32 n=0; n < sideB->size(); n++)
+ {
+ Uint32 indexB = reverseSideB? (sideB->size() - (n+1)) : n;
+ newOrder.push_back((*sideB)[indexB]);
+ }
+ }
+
+
+ bool diff = false;
+ for (Uint32 n=0; n < newOrder.size(); n++)
+ {
+ ndbout_c(" %u %s",
+ newOrder[n],
+ ((n==0)?"*":" "));
+
+ diff |= (newOrder[n] != currOrder[n]);
+ }
+
+ if (!diff)
+ {
+ ndbout_c("Cluster already in correct configuration");
+ return NDBT_OK;
+ }
+
+ for (Uint32 n=0; n < newOrder.size(); n++)
+ {
+ ndbout_c("Now restarting node %u", newOrder[n]);
+ if (restarter.restartOneDbNode(newOrder[n],
+ false, // initial
+ true, // nostart
+ true) // abort
+ != NDBT_OK)
+ {
+ ndbout_c("Failed to restart node");
+ return NDBT_FAILED;
+ }
+ if (restarter.waitNodesNoStart((const int*) &newOrder[n], 1) != NDBT_OK)
+ {
+ ndbout_c("Failed waiting for node to enter NOSTART state");
+ return NDBT_FAILED;
+ }
+ if (restarter.startNodes((const int*) &newOrder[n], 1) != NDBT_OK)
+ {
+ ndbout_c("Failed to start node");
+ return NDBT_FAILED;
+ }
+ if (restarter.waitNodesStarted((const int*) &newOrder[n], 1) != NDBT_OK)
+ {
+ ndbout_c("Failed waiting for node to start");
+ return NDBT_FAILED;
+ }
+ ndbout_c(" Done.");
+ }
+
+ ndbout_c("All restarts completed. NdbRestarter says master is %u",
+ restarter.getMasterNodeId());
+ if (restarter.getMasterNodeId() != (int) newOrder[0])
+ {
+ ndbout_c(" Should be %u, failing", newOrder[0]);
+ return NDBT_FAILED;
+ }
+
+ return NDBT_OK;
+}
+
+struct NodeGroupMembers
+{
+ Uint32 ngid;
+ Uint32 membCount;
+ Uint32 members[4];
+};
+
+template class Vector<NodeGroupMembers>;
+
+int analyseDynamicOrder(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter restarter;
+ Uint32 numNodes = restarter.getNumDbNodes();
+ Uint32 master = restarter.getMasterNodeId();
+ Vector<Uint32> dynamicOrder;
+ Vector<Uint32> nodeGroup;
+ Vector<Uint32> monitorsNode;
+ Vector<Uint32> monitoredByNode;
+ Vector<Uint32> monitorsRemote;
+ Vector<Uint32> remoteMonitored;
+ Vector<Uint32> sameNGMonitored;
+ Vector<Uint32> distanceToRemote;
+ Vector<Uint32> nodeIdToDynamicIndex;
+ Uint32 maxDistanceToRemoteLink = 0;
+
+ /* TODO :
+ * Refactor into :
+ * 1) Determine dynorder from running cluster
+ * 2) Analyse dynorder in general
+ * 3) Analyse dynorder from point of view of latency split
+ *
+ * 4) Support splits other than odd/even total
+ * - Partial split
+ * - Some link failures
+ */
+
+ /* Determine dynamic order from running cluster */
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ dynamicOrder.push_back(master);
+ nodeGroup.push_back(restarter.getNodeGroup(master));
+ master = restarter.getNextMasterNodeId(master);
+ Uint32 zero=0;
+ nodeIdToDynamicIndex.set(n, master, zero);
+ }
+
+ /* Look at implied HB links */
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ Uint32 nodeId = dynamicOrder[n];
+ Uint32 monitoredByIndex = (n+1) % numNodes;
+ Uint32 monitorsIndex = (n+ numNodes - 1) % numNodes;
+ monitoredByNode.push_back(dynamicOrder[monitoredByIndex]);
+ monitorsNode.push_back(dynamicOrder[monitorsIndex]);
+ remoteMonitored.push_back((nodeId & 1) != (monitoredByNode[n] & 1));
+ monitorsRemote.push_back((nodeId & 1) != (monitorsNode[n] & 1));
+ sameNGMonitored.push_back(nodeGroup[n] == nodeGroup[monitoredByIndex]);
+ }
+
+ /* Look at split implications */
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ Uint32 distanceToRemoteHBLink = 0;
+ for (Uint32 m=0; m < numNodes; m++)
+ {
+ if (remoteMonitored[n+m])
+ break;
+ distanceToRemoteHBLink++;
+ }
+
+ distanceToRemote.push_back(distanceToRemoteHBLink);
+ maxDistanceToRemoteLink = MAX(maxDistanceToRemoteLink, distanceToRemoteHBLink);
+ }
+
+
+ ndbout_c("Dynamic order analysis");
+
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ ndbout_c(" %u %s %u%s%u%s%u \t Monitored by %s nodegroup, Dist to remote link : %u",
+ dynamicOrder[n],
+ ((n==0)?"*":" "),
+ monitorsNode[n],
+ ((monitorsRemote[n])?" >":"-->"),
+ dynamicOrder[n],
+ ((remoteMonitored[n])?" >":"-->"),
+ monitoredByNode[n],
+ ((sameNGMonitored[n])?"same":"other"),
+ distanceToRemote[n]);
+ }
+
+ ndbout_c("\n");
+
+ Vector<NodeGroupMembers> nodeGroupMembers;
+
+ for (Uint32 n=0; n < numNodes; n++)
+ {
+ Uint32 ng = nodeGroup[n];
+
+ bool ngfound = false;
+ for (Uint32 m = 0; m < nodeGroupMembers.size(); m++)
+ {
+ if (nodeGroupMembers[m].ngid == ng)
+ {
+ NodeGroupMembers& ngInfo = nodeGroupMembers[m];
+ ngInfo.members[ngInfo.membCount++] = dynamicOrder[n];
+ ngfound = true;
+ break;
+ }
+ }
+
+ if (!ngfound)
+ {
+ NodeGroupMembers newGroupInfo;
+ newGroupInfo.ngid = ng;
+ newGroupInfo.membCount = 1;
+ newGroupInfo.members[0] = dynamicOrder[n];
+ nodeGroupMembers.push_back(newGroupInfo);
+ }
+ }
+
+ ndbout_c("Nodegroups");
+
+ for (Uint32 n=0; n < nodeGroupMembers.size(); n++)
+ {
+ ndbout << " " << nodeGroupMembers[n].ngid << " (";
+ bool allRemoteMonitored = true;
+ for (Uint32 m=0; m < nodeGroupMembers[n].membCount; m++)
+ {
+ Uint32 nodeId = nodeGroupMembers[n].members[m];
+ ndbout << nodeId;
+ if ((m+1) < nodeGroupMembers[n].membCount)
+ ndbout << ",";
+ Uint32 dynamicIndex = nodeIdToDynamicIndex[nodeId];
+ allRemoteMonitored &= remoteMonitored[dynamicIndex];
+ }
+ ndbout << ") Entirely remote monitored NGs risk : "
+ << (allRemoteMonitored?"Y":"N") << "\n";
+ }
+ ndbout_c("\n");
+
+ ndbout_c("Cluster-split latency behaviour");
+
+ Uint32 oddPresident = dynamicOrder[0];
+ Uint32 evenPresident = dynamicOrder[0];
+
+ for (Uint32 n=0; n <= maxDistanceToRemoteLink; n++)
+ {
+ Vector<Uint32> failedNodeGroups;
+ ndbout << " " << n <<" HB latency period(s), nodes (";
+ bool useComma = false;
+ bool presidentFailed = false;
+ for (Uint32 m=0; m < numNodes; m++)
+ {
+ if (distanceToRemote[m] == n)
+ {
+ Uint32 failingNodeId = dynamicOrder[m];
+ if (useComma)
+ ndbout << ",";
+
+ useComma = true;
+ ndbout << failingNodeId;
+
+ if ((failingNodeId == evenPresident) ||
+ (failingNodeId == oddPresident))
+ {
+ ndbout << "*";
+ presidentFailed = true;
+ }
+
+ {
+ Uint32 ng = nodeGroup[m];
+ for (Uint32 i=0; i< nodeGroupMembers.size(); i++)
+ {
+ if (nodeGroupMembers[i].ngid == ng)
+ {
+ if ((--nodeGroupMembers[i].membCount) == 0)
+ {
+ failedNodeGroups.push_back(ng);
+ }
+ }
+ }
+ }
+ }
+ }
+ ndbout << ") will be declared failed." << endl;
+ if (failedNodeGroups.size() != 0)
+ {
+ ndbout << " NG failure risk on reconnect for nodegroups : ";
+ for (Uint32 i=0; i< failedNodeGroups.size(); i++)
+ {
+ if (i > 0)
+ ndbout << ",";
+ ndbout << failedNodeGroups[i];
+ }
+ ndbout << endl;
+ }
+ if (presidentFailed)
+ {
+ /* A president (even/odd/both) has failed, we should
+ * calculate the new president(s) from the p.o.v.
+ * of both sides
+ */
+ Uint32 newOdd=0;
+ Uint32 newEven=0;
+ for (Uint32 i=0; i< numNodes; i++)
+ {
+ /* Each side finds either the first node on their
+ * side, or the first node on the other side which
+ * is still 'alive' from their point of view
+ */
+ bool candidateIsOdd = dynamicOrder[i] & 1;
+
+ if (!newOdd)
+ {
+ if (candidateIsOdd ||
+ (distanceToRemote[i] > n))
+ {
+ newOdd = dynamicOrder[i];
+ }
+ }
+ if (!newEven)
+ {
+ if ((!candidateIsOdd) ||
+ (distanceToRemote[i] > n))
+ {
+ newEven = dynamicOrder[i];
+ }
+ }
+ }
+
+ bool oddPresidentFailed = (oddPresident != newOdd);
+ bool evenPresidentFailed = (evenPresident != newEven);
+
+ if (oddPresidentFailed)
+ {
+ ndbout_c(" Odd president (%u) failed, new odd president : %u",
+ oddPresident, newOdd);
+ oddPresident = newOdd;
+ }
+ if (evenPresidentFailed)
+ {
+ ndbout_c(" Even president (%u) failed, new even president : %u",
+ evenPresident, newEven);
+ evenPresident = newEven;
+ }
+
+ if (oddPresident != evenPresident)
+ {
+ ndbout_c(" President role duplicated, Odd (%u), Even (%u)",
+ oddPresident, evenPresident);
+ }
+
+ }
+ }
+
+ ndbout << endl << endl;
+
+ return NDBT_OK;
+}
+
+int runSplitLatency25PctFail(NDBT_Context* ctx, NDBT_Step* step)
+{
+ /* Use dump commands to inject artificial inter-node latency
+ * Use an error insert to cause latency to disappear when
+ * a node observes > 25% of nodes failed.
+ * This should trigger a race of FAIL_REQs from both sides
+ * of the cluster, and can result in cluster failure
+ */
+ NdbRestarter restarter;
+
+ /* First the error insert which will drop latency (QMGR) */
+ restarter.insertErrorInAllNodes(938);
+
+ /* Now the dump code which causes the system to experience
+ * latency along odd/even lines (CMVMI)
+ */
+ int dumpStateArgs[] = {9990, 1};
+ restarter.dumpStateAllNodes(dumpStateArgs, 2);
+
+ return NDBT_OK;
+}
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -4715,6 +5222,14 @@ TESTCASE("ForceStopAndRestart", "Test re
{
STEP(runForceStopAndRestart);
}
+TESTCASE("ClusterSplitLatency",
+ "Test behaviour of 2-replica cluster with latency between halves")
+{
+ TC_PROPERTY("DynamicOrder", Uint32(9));
+ INITIALIZER(runRestartToDynamicOrder);
+ INITIALIZER(analyseDynamicOrder);
+ INITIALIZER(runSplitLatency25PctFail);
+}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-7.1 branch (frazer.clement:4021 to 4022) | Frazer Clement | 18 Oct |