3578 Ole John Aske 2011-10-31 [merge]
Merge telco-7.0 -> telco-7.0-spj-scan-scan
modified:
storage/ndb/src/kernel/blocks/ERROR_codes.txt
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
storage/ndb/test/ndbapi/testNodeRestart.cpp
storage/ndb/test/run-test/daily-basic-tests.txt
3577 Ole John Aske 2011-10-28 [merge]
merge telco-7.0 -> 7.0-spj-scan-scan
added:
storage/ndb/include/kernel/statedesc.hpp
storage/ndb/src/kernel/blocks/dblqh/DblqhStateDesc.cpp
storage/ndb/src/kernel/blocks/dbtc/DbtcStateDesc.cpp
modified:
mysql-test/suite/ndb/r/ndbinfo.result
mysql-test/suite/ndb/t/ndbinfo.test
sql/ha_ndbcluster_connection.cc
storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
storage/ndb/src/ndbapi/NdbQueryOperation.cpp
storage/ndb/tools/CMakeLists.txt
storage/ndb/tools/Makefile.am
storage/ndb/tools/ndbinfo_sql.cpp
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2011-06-07 12:19:47 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2011-10-31 10:01:23 +0000
@@ -18,7 +18,7 @@ Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4035
-Next DBLQH 5072
+Next DBLQH 5074
Next DBDICT 6026
Next DBDIH 7229
Next DBTC 8092
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2011-10-28 09:04:10 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2011-10-31 10:01:23 +0000
@@ -2372,6 +2372,17 @@ void Dbdih::execSTART_PERMREQ(Signal* si
CRASH_INSERTION(7122);
ndbrequire(isMaster());
ndbrequire(refToNode(retRef) == nodeId);
+ if (c_lcpMasterTakeOverState.state != LMTOS_IDLE)
+ {
+ jam();
+ infoEvent("DIH : Denied request for start permission from %u "
+ "while LCP Master takeover in progress.",
+ nodeId);
+ signal->theData[0] = nodeId;
+ signal->theData[1] = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
+ sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
+ return;
+ }
if ((c_nodeStartMaster.activeState) ||
(c_nodeStartMaster.wait != ZFALSE) ||
ERROR_INSERTED_CLEAR(7175)) {
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2011-10-20 12:51:03 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2011-10-31 10:01:23 +0000
@@ -13788,6 +13788,15 @@ void Dblqh::execBACKUP_FRAGMENT_REF(Sign
void Dblqh::execBACKUP_FRAGMENT_CONF(Signal* signal)
{
jamEntry();
+
+ if (ERROR_INSERTED(5073))
+ {
+ ndbout_c("Delaying BACKUP_FRAGMENT_CONF");
+ sendSignalWithDelay(reference(), GSN_BACKUP_FRAGMENT_CONF, signal, 500,
+ signal->getLength());
+ return;
+ }
+
//BackupFragmentConf* conf= (BackupFragmentConf*)signal->getDataPtr();
lcpPtr.i = 0;
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2011-10-28 09:04:10 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2011-10-31 10:01:23 +0000
@@ -2705,6 +2705,23 @@ Dbspj::lookup_build(Build_context& ctx,
const QN_LookupParameters * param = (const QN_LookupParameters*)qp;
do
{
+ err = DbspjErr::InvalidTreeNodeSpecification;
+ if (unlikely(node->len < QN_LookupNode::NodeSize))
+ {
+ jam();
+ DEBUG_CRASH();
+ break;
+ }
+
+ err = DbspjErr::InvalidTreeParametersSpecification;
+ DEBUG("param len: " << param->len);
+ if (unlikely(param->len < QN_LookupParameters::NodeSize))
+ {
+ jam();
+ DEBUG_CRASH();
+ break;
+ }
+
err = createNode(ctx, requestPtr, treeNodePtr);
if (unlikely(err != 0))
{
@@ -2756,13 +2773,6 @@ Dbspj::lookup_build(Build_context& ctx,
dst->requestInfo = requestInfo;
}
- err = DbspjErr::InvalidTreeNodeSpecification;
- if (unlikely(node->len < QN_LookupNode::NodeSize))
- {
- DEBUG_CRASH();
- break;
- }
-
if (treeBits & QN_LookupNode::L_UNIQUE_INDEX)
{
jam();
@@ -2775,14 +2785,6 @@ Dbspj::lookup_build(Build_context& ctx,
Uint32 tableSchemaVersion = tableId + ((schemaVersion << 16) & 0xFFFF0000);
dst->tableSchemaVersion = tableSchemaVersion;
- err = DbspjErr::InvalidTreeParametersSpecification;
- DEBUG("param len: " << param->len);
- if (unlikely(param->len < QN_LookupParameters::NodeSize))
- {
- DEBUG_CRASH();
- break;
- }
-
ctx.m_resultData = param->resultData;
treeNodePtr.p->m_lookup_data.m_api_resultRef = ctx.m_resultRef;
treeNodePtr.p->m_lookup_data.m_api_resultData = param->resultData;
@@ -3765,6 +3767,24 @@ Dbspj::scanFrag_build(Build_context& ctx
do
{
+ err = DbspjErr::InvalidTreeNodeSpecification;
+ DEBUG("scanFrag_build: len=" << node->len);
+ if (unlikely(node->len < QN_ScanFragNode::NodeSize))
+ {
+ jam();
+ DEBUG_CRASH();
+ break;
+ }
+
+ err = DbspjErr::InvalidTreeParametersSpecification;
+ DEBUG("param len: " << param->len);
+ if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
+ {
+ jam();
+ DEBUG_CRASH();
+ break;
+ }
+
err = createNode(ctx, requestPtr, treeNodePtr);
if (unlikely(err != 0))
break;
@@ -3810,24 +3830,9 @@ Dbspj::scanFrag_build(Build_context& ctx
(treeBits & DABits::NI_LINKED_DISK) == 0 &&
(paramBits & DABits::PI_DISK_ATTR) == 0);
dst->requestInfo = requestInfo;
-
- err = DbspjErr::InvalidTreeNodeSpecification;
- DEBUG("scanFrag_build: len=" << node->len);
- if (unlikely(node->len < QN_ScanFragNode::NodeSize))
- break;
-
dst->tableId = node->tableId;
dst->schemaVersion = node->tableVersion;
- err = DbspjErr::InvalidTreeParametersSpecification;
- DEBUG("param len: " << param->len);
- if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
- {
- jam();
- DEBUG_CRASH();
- break;
- }
-
ctx.m_resultData = param->resultData;
/**
@@ -4315,6 +4320,24 @@ Dbspj::scanIndex_build(Build_context& ct
do
{
+ err = DbspjErr::InvalidTreeNodeSpecification;
+ DEBUG("scanIndex_build: len=" << node->len);
+ if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
+ {
+ jam();
+ DEBUG_CRASH();
+ break;
+ }
+
+ err = DbspjErr::InvalidTreeParametersSpecification;
+ DEBUG("param len: " << param->len);
+ if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
+ {
+ jam();
+ DEBUG_CRASH();
+ break;
+ }
+
err = createNode(ctx, requestPtr, treeNodePtr);
if (unlikely(err != 0))
break;
@@ -4355,24 +4378,9 @@ Dbspj::scanIndex_build(Build_context& ct
(paramBits & DABits::PI_DISK_ATTR) == 0);
ScanFragReq::setCorrFactorFlag(requestInfo, 1);
dst->requestInfo = requestInfo;
-
- err = DbspjErr::InvalidTreeNodeSpecification;
- DEBUG("scanIndex_build: len=" << node->len);
- if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
- break;
-
dst->tableId = node->tableId;
dst->schemaVersion = node->tableVersion;
- err = DbspjErr::InvalidTreeParametersSpecification;
- DEBUG("param len: " << param->len);
- if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
- {
- jam();
- DEBUG_CRASH();
- break;
- }
-
ctx.m_resultData = param->resultData;
/**
@@ -5735,7 +5743,7 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal
DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex
<< treeNodePtr.p->m_send.m_ref
- << ", m_node_no=" << treeNodePtr.p->m_node_no
+ << ", m_node_no=" << treeNodePtr.p->m_node_no
<< ", senderData: " << req->senderData);
#ifdef DEBUG_SCAN_FRAGREQ
=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp 2011-10-17 13:54:09 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp 2011-10-28 14:17:25 +0000
@@ -4757,6 +4757,125 @@ int runSplitLatency25PctFail(NDBT_Contex
return NDBT_OK;
}
+int
+runMasterFailSlowLCP(NDBT_Context* ctx, NDBT_Step* step)
+{
+ /* Motivated by bug# 13323589 */
+ NdbRestarter res;
+
+ if (res.getNumDbNodes() < 4)
+ {
+ return NDBT_OK;
+ }
+
+ int master = res.getMasterNodeId();
+ int otherVictim = res.getRandomNodeOtherNodeGroup(master, rand());
+ int nextMaster = res.getNextMasterNodeId(master);
+ nextMaster = (nextMaster == otherVictim) ? res.getNextMasterNodeId(otherVictim) :
+ nextMaster;
+ assert(nextMaster != master);
+ assert(nextMaster != otherVictim);
+
+ /* Get a node which is not current or next master */
+ int slowNode= nextMaster;
+ while ((slowNode == nextMaster) ||
+ (slowNode == otherVictim) ||
+ (slowNode == master))
+ {
+ slowNode = res.getRandomNotMasterNodeId(rand());
+ }
+
+ ndbout_c("master: %d otherVictim : %d nextMaster: %d slowNode: %d",
+ master,
+ otherVictim,
+ nextMaster,
+ slowNode);
+
+ /* Steps :
+ * 1. Insert slow LCP frag error in slowNode
+ * 2. Start LCP
+ * 3. Wait for LCP to start
+ * 4. Kill at least two nodes including Master
+ * 5. Wait for killed nodes to attempt to rejoin
+ * 6. Remove slow LCP error
+ * 7. Allow system to stabilise + check no errors
+ */
+ // 5073 = Delay on handling BACKUP_FRAGMENT_CONF in LQH
+ if (res.insertErrorInNode(slowNode, 5073))
+ {
+ return NDBT_FAILED;
+ }
+
+ {
+ int req[1] = {DumpStateOrd::DihStartLcpImmediately};
+ if (res.dumpStateOneNode(master, req, 1))
+ {
+ return NDBT_FAILED;
+ }
+ }
+
+ ndbout_c("Giving LCP time to start...");
+
+ NdbSleep_SecSleep(10);
+
+ ndbout_c("Killing other victim node (%u)...", otherVictim);
+
+ if (res.restartOneDbNode(otherVictim, false, false, true))
+ {
+ return NDBT_FAILED;
+ }
+
+ ndbout_c("Killing Master node (%u)...", master);
+
+ if (res.restartOneDbNode(master, false, false, true))
+ {
+ return NDBT_FAILED;
+ }
+
+ /*
+ ndbout_c("Waiting for old Master node to enter NoStart state...");
+ if (res.waitNodesNoStart(&master, 1, 10))
+ return NDBT_FAILED;
+
+ ndbout_c("Starting old Master...");
+ if (res.startNodes(&master, 1))
+ return NDBT_FAILED;
+
+ */
+ ndbout_c("Waiting for some progress on old Master and other victim restart");
+ NdbSleep_SecSleep(15);
+
+ ndbout_c("Now removing error insert on slow node (%u)", slowNode);
+
+ if (res.insertErrorInNode(slowNode, 0))
+ {
+ return NDBT_FAILED;
+ }
+
+ ndbout_c("Now wait a while to check stability...");
+ NdbSleep_SecSleep(30);
+
+ if (res.getNodeStatus(master) == NDB_MGM_NODE_STATUS_NOT_STARTED)
+ {
+ ndbout_c("Old Master needs kick to restart");
+ if (res.startNodes(&master, 1))
+ {
+ return NDBT_FAILED;
+ }
+ }
+
+ ndbout_c("Wait for cluster recovery...");
+ if (res.waitClusterStarted())
+ {
+ return NDBT_FAILED;
+ }
+
+
+ ndbout_c("Done");
+ return NDBT_OK;
+}
+
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -5288,6 +5407,11 @@ TESTCASE("Bug57522", "")
{
INITIALIZER(runBug57522);
}
+TESTCASE("MasterFailSlowLCP",
+ "DIH Master failure during a slow LCP can cause a crash.")
+{
+ INITIALIZER(runMasterFailSlowLCP);
+}
TESTCASE("ForceStopAndRestart", "Test restart and stop -with force flag")
{
STEP(runForceStopAndRestart);
=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2011-10-20 12:51:03 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2011-10-31 10:01:23 +0000
@@ -1835,3 +1835,8 @@ max-time 1800
cmd: testNdbApi
args: -n TestFragmentedSend T1
+max-time: 300
+cmd: testNodeRestart
+args: -nMasterFailSlowLCP T1
+
+
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-7.0-spj-scan-vs-scan branch(ole.john.aske:3577 to 3578) | Ole John Aske | 1 Nov |