List:Commits« Previous MessageNext Message »
From:Ole John Aske Date:October 31 2011 10:02am
Subject:bzr push into mysql-5.1-telco-7.0-spj-scan-vs-scan branch
(ole.john.aske:3577 to 3578)
View as plain text  
 3578 Ole John Aske	2011-10-31 [merge]
      Merge telco-7.0 -> telco-7.0-spj-scan-scan

    modified:
      storage/ndb/src/kernel/blocks/ERROR_codes.txt
      storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
      storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
      storage/ndb/test/ndbapi/testNodeRestart.cpp
      storage/ndb/test/run-test/daily-basic-tests.txt
 3577 Ole John Aske	2011-10-28 [merge]
      merge telco-7.0 -> 7.0-spj-scan-scan

    added:
      storage/ndb/include/kernel/statedesc.hpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhStateDesc.cpp
      storage/ndb/src/kernel/blocks/dbtc/DbtcStateDesc.cpp
    modified:
      mysql-test/suite/ndb/r/ndbinfo.result
      mysql-test/suite/ndb/t/ndbinfo.test
      sql/ha_ndbcluster_connection.cc
      storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
      storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
      storage/ndb/src/ndbapi/NdbQueryOperation.cpp
      storage/ndb/tools/CMakeLists.txt
      storage/ndb/tools/Makefile.am
      storage/ndb/tools/ndbinfo_sql.cpp
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2011-06-07 12:19:47 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2011-10-31 10:01:23 +0000
@@ -18,7 +18,7 @@ Next NDBCNTR 1002
 Next NDBFS 2000
 Next DBACC 3002
 Next DBTUP 4035
-Next DBLQH 5072
+Next DBLQH 5074
 Next DBDICT 6026
 Next DBDIH 7229
 Next DBTC 8092

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2011-10-28 09:04:10 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2011-10-31 10:01:23 +0000
@@ -2372,6 +2372,17 @@ void Dbdih::execSTART_PERMREQ(Signal* si
   CRASH_INSERTION(7122);
   ndbrequire(isMaster());
   ndbrequire(refToNode(retRef) == nodeId);
+  if (c_lcpMasterTakeOverState.state != LMTOS_IDLE)
+  {
+    jam();
+    infoEvent("DIH : Denied request for start permission from %u "
+              "while LCP Master takeover in progress.",
+              nodeId);
+    signal->theData[0] = nodeId;
+    signal->theData[1] = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
+    sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
+    return;
+  }
   if ((c_nodeStartMaster.activeState) ||
       (c_nodeStartMaster.wait != ZFALSE) ||
       ERROR_INSERTED_CLEAR(7175)) {

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2011-10-20 12:51:03 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2011-10-31 10:01:23 +0000
@@ -13788,6 +13788,15 @@ void Dblqh::execBACKUP_FRAGMENT_REF(Sign
 void Dblqh::execBACKUP_FRAGMENT_CONF(Signal* signal) 
 {
   jamEntry();
+
+  if (ERROR_INSERTED(5073))
+  {
+    ndbout_c("Delaying BACKUP_FRAGMENT_CONF");
+    sendSignalWithDelay(reference(), GSN_BACKUP_FRAGMENT_CONF, signal, 500,
+                        signal->getLength());
+    return;
+  }
+
   //BackupFragmentConf* conf= (BackupFragmentConf*)signal->getDataPtr();
 
   lcpPtr.i = 0;

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2011-10-28 09:04:10 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2011-10-31 10:01:23 +0000
@@ -2705,6 +2705,23 @@ Dbspj::lookup_build(Build_context& ctx,
   const QN_LookupParameters * param = (const QN_LookupParameters*)qp;
   do
   {
+    err = DbspjErr::InvalidTreeNodeSpecification;
+    if (unlikely(node->len < QN_LookupNode::NodeSize))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+
+    err = DbspjErr::InvalidTreeParametersSpecification;
+    DEBUG("param len: " << param->len);
+    if (unlikely(param->len < QN_LookupParameters::NodeSize))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+
     err = createNode(ctx, requestPtr, treeNodePtr);
     if (unlikely(err != 0))
     {
@@ -2756,13 +2773,6 @@ Dbspj::lookup_build(Build_context& ctx,
       dst->requestInfo = requestInfo;
     }
 
-    err = DbspjErr::InvalidTreeNodeSpecification;
-    if (unlikely(node->len < QN_LookupNode::NodeSize))
-    {
-      DEBUG_CRASH();
-      break;
-    }
-
     if (treeBits & QN_LookupNode::L_UNIQUE_INDEX)
     {
       jam();
@@ -2775,14 +2785,6 @@ Dbspj::lookup_build(Build_context& ctx,
     Uint32 tableSchemaVersion = tableId + ((schemaVersion << 16) & 0xFFFF0000);
     dst->tableSchemaVersion = tableSchemaVersion;
 
-    err = DbspjErr::InvalidTreeParametersSpecification;
-    DEBUG("param len: " << param->len);
-    if (unlikely(param->len < QN_LookupParameters::NodeSize))
-    {
-      DEBUG_CRASH();
-      break;
-    }
-
     ctx.m_resultData = param->resultData;
     treeNodePtr.p->m_lookup_data.m_api_resultRef = ctx.m_resultRef;
     treeNodePtr.p->m_lookup_data.m_api_resultData = param->resultData;
@@ -3765,6 +3767,24 @@ Dbspj::scanFrag_build(Build_context& ctx
 
   do
   {
+    err = DbspjErr::InvalidTreeNodeSpecification;
+    DEBUG("scanFrag_build: len=" << node->len);
+    if (unlikely(node->len < QN_ScanFragNode::NodeSize))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+
+    err = DbspjErr::InvalidTreeParametersSpecification;
+    DEBUG("param len: " << param->len);
+    if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+
     err = createNode(ctx, requestPtr, treeNodePtr);
     if (unlikely(err != 0))
       break;
@@ -3810,24 +3830,9 @@ Dbspj::scanFrag_build(Build_context& ctx
                                (treeBits & DABits::NI_LINKED_DISK) == 0 &&
                                (paramBits & DABits::PI_DISK_ATTR) == 0);
     dst->requestInfo = requestInfo;
-
-    err = DbspjErr::InvalidTreeNodeSpecification;
-    DEBUG("scanFrag_build: len=" << node->len);
-    if (unlikely(node->len < QN_ScanFragNode::NodeSize))
-      break;
-
     dst->tableId = node->tableId;
     dst->schemaVersion = node->tableVersion;
 
-    err = DbspjErr::InvalidTreeParametersSpecification;
-    DEBUG("param len: " << param->len);
-    if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
-    {
-      jam();
-      DEBUG_CRASH();
-      break;
-    }
-
     ctx.m_resultData = param->resultData;
 
     /**
@@ -4315,6 +4320,24 @@ Dbspj::scanIndex_build(Build_context& ct
 
   do
   {
+    err = DbspjErr::InvalidTreeNodeSpecification;
+    DEBUG("scanIndex_build: len=" << node->len);
+    if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+
+    err = DbspjErr::InvalidTreeParametersSpecification;
+    DEBUG("param len: " << param->len);
+    if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+
     err = createNode(ctx, requestPtr, treeNodePtr);
     if (unlikely(err != 0))
       break;
@@ -4355,24 +4378,9 @@ Dbspj::scanIndex_build(Build_context& ct
                                (paramBits & DABits::PI_DISK_ATTR) == 0);
     ScanFragReq::setCorrFactorFlag(requestInfo, 1);
     dst->requestInfo = requestInfo;
-
-    err = DbspjErr::InvalidTreeNodeSpecification;
-    DEBUG("scanIndex_build: len=" << node->len);
-    if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
-      break;
-
     dst->tableId = node->tableId;
     dst->schemaVersion = node->tableVersion;
 
-    err = DbspjErr::InvalidTreeParametersSpecification;
-    DEBUG("param len: " << param->len);
-    if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
-    {
-      jam();
-      DEBUG_CRASH();
-      break;
-    }
-
     ctx.m_resultData = param->resultData;
 
     /**
@@ -5735,7 +5743,7 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal
 
       DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex
             << treeNodePtr.p->m_send.m_ref
-              << ", m_node_no=" << treeNodePtr.p->m_node_no
+            << ", m_node_no=" << treeNodePtr.p->m_node_no
             << ", senderData: " << req->senderData);
 
 #ifdef DEBUG_SCAN_FRAGREQ

=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp	2011-10-17 13:54:09 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp	2011-10-28 14:17:25 +0000
@@ -4757,6 +4757,125 @@ int runSplitLatency25PctFail(NDBT_Contex
   return NDBT_OK;
 }
 
+int
+runMasterFailSlowLCP(NDBT_Context* ctx, NDBT_Step* step)
+{
+  /* Motivated by bug# 13323589 */
+  NdbRestarter res;
+
+  if (res.getNumDbNodes() < 4)
+  {
+    return NDBT_OK;
+  }
+
+  int master = res.getMasterNodeId();
+  int otherVictim = res.getRandomNodeOtherNodeGroup(master, rand());
+  int nextMaster = res.getNextMasterNodeId(master);
+  nextMaster = (nextMaster == otherVictim) ? res.getNextMasterNodeId(otherVictim) :
+    nextMaster;
+  assert(nextMaster != master);
+  assert(nextMaster != otherVictim);
+
+  /* Get a node which is not current or next master */
+  int slowNode= nextMaster;
+  while ((slowNode == nextMaster) ||
+         (slowNode == otherVictim) ||
+         (slowNode == master))
+  {
+    slowNode = res.getRandomNotMasterNodeId(rand());
+  }
+
+  ndbout_c("master: %d otherVictim : %d nextMaster: %d slowNode: %d",
+           master,
+           otherVictim,
+           nextMaster,
+           slowNode);
+
+  /* Steps :
+   * 1. Insert slow LCP frag error in slowNode
+   * 2. Start LCP
+   * 3. Wait for LCP to start
+   * 4. Kill at least two nodes including Master
+   * 5. Wait for killed nodes to attempt to rejoin
+   * 6. Remove slow LCP error
+   * 7. Allow system to stabilise + check no errors
+   */
+  // 5073 = Delay on handling BACKUP_FRAGMENT_CONF in LQH
+  if (res.insertErrorInNode(slowNode, 5073))
+  {
+    return NDBT_FAILED;
+  }
+
+  {
+    int req[1] = {DumpStateOrd::DihStartLcpImmediately};
+    if (res.dumpStateOneNode(master, req, 1))
+    {
+      return NDBT_FAILED;
+    }
+  }
+
+  ndbout_c("Giving LCP time to start...");
+
+  NdbSleep_SecSleep(10);
+
+  ndbout_c("Killing other victim node (%u)...", otherVictim);
+
+  if (res.restartOneDbNode(otherVictim, false, false, true))
+  {
+    return NDBT_FAILED;
+  }
+
+  ndbout_c("Killing Master node (%u)...", master);
+
+  if (res.restartOneDbNode(master, false, false, true))
+  {
+    return NDBT_FAILED;
+  }
+
+  /*
+     ndbout_c("Waiting for old Master node to enter NoStart state...");
+     if (res.waitNodesNoStart(&master, 1, 10))
+     return NDBT_FAILED;
+
+     ndbout_c("Starting old Master...");
+     if (res.startNodes(&master, 1))
+     return NDBT_FAILED;
+
+  */
+  ndbout_c("Waiting for some progress on old Master and other victim restart");
+  NdbSleep_SecSleep(15);
+
+  ndbout_c("Now removing error insert on slow node (%u)", slowNode);
+
+  if (res.insertErrorInNode(slowNode, 0))
+  {
+    return NDBT_FAILED;
+  }
+
+  ndbout_c("Now wait a while to check stability...");
+  NdbSleep_SecSleep(30);
+
+  if (res.getNodeStatus(master) == NDB_MGM_NODE_STATUS_NOT_STARTED)
+  {
+    ndbout_c("Old Master needs kick to restart");
+    if (res.startNodes(&master, 1))
+    {
+      return NDBT_FAILED;
+    }
+  }
+
+  ndbout_c("Wait for cluster recovery...");
+  if (res.waitClusterStarted())
+  {
+    return NDBT_FAILED;
+  }
+
+
+  ndbout_c("Done");
+  return NDBT_OK;
+}
+
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -5288,6 +5407,11 @@ TESTCASE("Bug57522", "")
 {
   INITIALIZER(runBug57522);
 }
+TESTCASE("MasterFailSlowLCP",
+         "DIH Master failure during a slow LCP can cause a crash.")
+{
+  INITIALIZER(runMasterFailSlowLCP);
+}
 TESTCASE("ForceStopAndRestart", "Test restart and stop -with force flag")
 {
   STEP(runForceStopAndRestart);

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2011-10-20 12:51:03 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2011-10-31 10:01:23 +0000
@@ -1835,3 +1835,8 @@ max-time 1800
 cmd: testNdbApi
 args: -n TestFragmentedSend T1
 
+max-time: 300
+cmd: testNodeRestart
+args: -nMasterFailSlowLCP T1
+
+

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.1-telco-7.0-spj-scan-vs-scan branch(ole.john.aske:3577 to 3578) Ole John Aske1 Nov