From: Frazer Clement Date: October 17 2011 1:59pm Subject: bzr push into mysql-5.1-telco-7.0 branch (frazer.clement:4602 to 4603) List-Archive: http://lists.mysql.com/commits/141477 Message-Id: <201110171359.p9HDx6RB025438@acsmt356.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4603 Frazer Clement 2011-10-17 Assist debugging occasional failure of testNodeRestart -n ClusterSplitLatency Improve debugging output for error insert 938. modified: storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp storage/ndb/test/ndbapi/testNodeRestart.cpp 4602 Jonas Oreland 2011-10-17 ndb - add information_schema table exposing map between mysql connection id and ndb transid modified: sql/ha_ndb_index_stat.cc sql/ha_ndbcluster.cc sql/ha_ndbcluster.h sql/ha_ndbcluster_binlog.cc sql/ha_ndbcluster_connection.cc sql/ha_ndbinfo.cc sql/ha_ndbinfo.h storage/ndb/include/ndbapi/Ndb.hpp storage/ndb/src/ndbapi/Ndb.cpp storage/ndb/src/ndbapi/NdbImpl.hpp storage/ndb/src/ndbapi/Ndbinit.cpp === modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp' --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2011-09-14 11:32:24 +0000 +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2011-10-17 13:54:09 +0000 @@ -4788,7 +4788,9 @@ void Qmgr::failReport(Signal* signal, if (ERROR_INSERTED(938)) { nodeFailCount++; - ndbout_c("QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount); + ndbout_c("QMGR : execFAIL_REP(Failed : %u Source : %u Cause : %u) : " + "%u nodes have failed", + aFailedNode, sourceNode, aFailCause, nodeFailCount); /* Count DB nodes */ Uint32 nodeCount = 0; for (Uint32 i = 1; i < MAX_NDB_NODES; i++) @@ -6877,6 +6879,12 @@ Qmgr::execNODE_PINGCONF(Signal* signal) return; } + if (ERROR_INSERTED(938)) + { + ndbout_c("QMGR : execNODE_PING_CONF() from %u in tick %u", + sendersNodeId, m_connectivity_check.m_tick); + } + /* Node must have been pinged, we must be waiting for the response, * or the node must have already failed */ === modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp' --- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2011-06-30 15:59:25 +0000 +++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2011-10-17 13:54:09 +0000 @@ -4726,17 +4726,23 @@ int runSplitLatency25PctFail(NDBT_Contex /** * Now wait for half of cluster to die... */ - ndbout_c("Waiting for half of cluster to die"); - int not_started = 0; const int node_count = restarter.getNumDbNodes(); + ndbout_c("Waiting for half of cluster (%u/%u) to die", node_count/2, node_count); + int not_started = 0; do { not_started = 0; for (int i = 0; i < node_count; i++) { - if (restarter.getNodeStatus(restarter.getDbNodeId(i)) == NDB_MGM_NODE_STATUS_NOT_STARTED) + int nodeId = restarter.getDbNodeId(i); + int status = restarter.getNodeStatus(nodeId); + ndbout_c("Node %u status %u", nodeId, status); + if (status == NDB_MGM_NODE_STATUS_NOT_STARTED) not_started++; } + NdbSleep_MilliSleep(2000); + ndbout_c("%u / %u in state NDB_MGM_NODE_STATUS_NOT_STARTED(%u)", + not_started, node_count, NDB_MGM_NODE_STATUS_NOT_STARTED); } while (2 * not_started != node_count); ndbout_c("Restarting cluster"); No bundle (reason: useless for push emails).