List:Commits« Previous MessageNext Message »
From:Jan Wedvik Date:August 22 2012 10:50am
Subject:bzr push into mysql-5.1-telco-7.0 branch (jan.wedvik:4959 to 4960)
Bug#14190114
View as plain text  
 4960 Jan Wedvik	2012-08-22
      Bug#14190114: CLUSTER CRASH DUE TO NDBREQUIRE IN ./LOCALPROXY.HPP DBLQH (LINE: 234)
      
      This patch fixes a set of errors that causes node failure (or block new 
      dictionary operations) if the master node crashes in certain states of a drop 
      table operation. This covers bug 14190114 and some related errors that showed 
      up when running the regression test:
      
      1)
      This patch fixes the direct cause of bug 14190114. The patch ensures that
      the master will reject SCHEMA_TRANS_BEGIN_REQ and SCHEMA_TRANS_END_REQ messages
      while there are outstanding DICT_TAKEOVER_REQs. If SCHEMA_TRANS_BEGIN_REQ was
      allowed, the system could end up in a situation where two transactions had
      outstanding DROP_TAB_REQs at the same time. This caused bug 14190114.
      Likewise, SCHEMA_TRANS_END_REQ cannot be processed before the new master knows
      the state of the transaction (i.e. after it has receibed the 
      DICT_TAKEOVER_CONFs).
      
      2)
      This patch fixes an error in the construction of the CONTINUB message that
      DICT sends to itself if it receives a DICT_TAKEOVER_REQ while it still
      has active operations.
      
      This patch also substitutes sendSignal with sendSignalWithDelay. This is done
      for two reasons:
      I) To avoid waisting CPU cycles by doing busy wait.
      II) To prevent CONTINUB messages from filling the jam trace buffer (this made
      the error report fro the customer harder to analyze.)
      
      3)
      This patch disables counting of SCHEMA_TRANS_IMPL_CONF and 
      SCHEMA_TRANS_IMPL_REF messages during takeover. Normally the master counts 
      these to know when all participants have completed an operation. But during
      a takeover, the new master will not know the number of outstanding messages
      until it has received DICT_TAKEOVER_CONF.
      
      4)
      This patch ensures that drop table operations are set to start OS_COMPLETED
      after finishing RT_COMPLETE requests. As it was, these would remain in state
      OS_COMPLETING. This meant that DICT could never send DICT_TAKEOVER_CONF, since
      this can only be done when all operations are in 'passive' states.

    modified:
      storage/ndb/include/kernel/signaldata/SchemaTrans.hpp
      storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp
      storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
      storage/ndb/test/ndbapi/testDict.cpp
      storage/ndb/test/run-test/daily-basic-tests.txt
 4959 Martin Skold	2012-08-21
      ndb - bump version to 7.0.36

    modified:
      configure.in
      storage/ndb/ndb_configure.m4
=== modified file 'storage/ndb/include/kernel/signaldata/SchemaTrans.hpp'
--- a/storage/ndb/include/kernel/signaldata/SchemaTrans.hpp	2011-10-07 13:15:08 +0000
+++ b/storage/ndb/include/kernel/signaldata/SchemaTrans.hpp	2012-08-22 10:49:42 +0000
@@ -81,6 +81,7 @@ struct SchemaTransEndConf {
 struct SchemaTransEndRef {
   enum ErrorCode {
     NoError = 0,
+    Busy = 701,
     NotMaster = 702,
     InvalidTransKey = 781,
     InvalidTransId = 782,

=== modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2012-06-21 12:35:31 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp	2012-08-22 10:49:42 +0000
@@ -2225,6 +2225,7 @@ void Dbdict::initCommonData()
   c_systemRestart = false;
   c_initialNodeRestart = false;
   c_nodeRestart = false;
+  c_takeOverInProgress = false;
 
   c_outstanding_sub_startstop = 0;
   c_sub_startstop_lock.clear();
@@ -4619,6 +4620,7 @@ void Dbdict::handle_master_takeover(Sign
   }
   DictTakeoverReq* req = (DictTakeoverReq*)signal->getDataPtrSend();
   req->senderRef = reference();
+  c_takeOverInProgress = true;
   sendSignal(rg, GSN_DICT_TAKEOVER_REQ, signal,
                DictTakeoverReq::SignalLength, JBB);
 }
@@ -7827,8 +7829,8 @@ Dbdict::dropTable_complete_done(Signal*
     sendSignal(SUMA_REF, GSN_DROP_TAB_CONF, signal,
                DropTabConf::SignalLength, JBB);
   }
-
-  sendTransConf(signal, trans_ptr);
+  ndbassert(op_ptr.p->m_state == SchemaOp::OS_COMPLETING);
+  sendTransConf(signal, op_ptr);
 }
 
 // DropTable: ABORT
@@ -19745,12 +19747,16 @@ Dbdict::execDICT_TAKEOVER_REQ(Signal* si
        case SchemaOp::OS_COMMITTING:
        case SchemaOp::OS_COMPLETING:
        {
+         /**
+          * Wait 100ms and check again. This delay is there to save CPU cycles
+          * and to avoid filling the jam trace buffer.
+          */
          jam();
          Uint32* data = &signal->theData[0];
-         data[0] = ZDICT_TAKEOVER_REQ;
          memmove(&data[1], &data[0], DictTakeoverReq::SignalLength << 2);
-         sendSignal(reference(), GSN_CONTINUEB, signal,
-                    1 + DictTakeoverReq::SignalLength, JBB);
+         data[0] = ZDICT_TAKEOVER_REQ;
+         sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
+                             100, 1 + DictTakeoverReq::SignalLength);
          return;
        }
        default:
@@ -19873,6 +19879,7 @@ Dbdict::execDICT_TAKEOVER_REF(Signal* si
       return;
     }
   }
+  c_takeOverInProgress = false;
   check_takeover_replies(signal);
 }
 
@@ -19918,6 +19925,7 @@ Dbdict::execDICT_TAKEOVER_CONF(Signal* s
       return;
     }
   }
+  c_takeOverInProgress = false;
   check_takeover_replies(signal);
 }
 
@@ -24813,6 +24821,18 @@ Dbdict::execSCHEMA_TRANS_BEGIN_REQ(Signa
       break;
     }
 
+    if (c_takeOverInProgress)
+    {
+      /**
+       * There is a dict takeover in progress. There may thus another
+       * transaction that should be rolled backward or forward before we
+       * can allow another transaction to start.
+       */
+      jam();
+      setError(error, SchemaTransBeginRef::Busy, __LINE__);
+      break;
+    }
+
     if (!check_ndb_versions() && !localTrans)
     {
       jam();
@@ -25010,6 +25030,18 @@ Dbdict::execSCHEMA_TRANS_END_REQ(Signal*
       setError(error, SchemaTransEndRef::NotMaster, __LINE__);
       break;
     }
+
+    if (c_takeOverInProgress)
+    {
+      /**
+       * There is a dict takeover in progress, and the transaction may thus
+       * be in an inconsistent state. Therefore we cannot process this request
+       * now.
+       */
+      jam();
+      setError(error, SchemaTransEndRef::Busy, __LINE__);
+      break;
+    }
 #ifdef MARTIN
     ndbout_c("Dbdict::execSCHEMA_TRANS_END_REQ: trans %u, state %u", trans_ptr.i, trans_ptr.p->m_state);
 #endif
@@ -25222,6 +25254,17 @@ Dbdict::execSCHEMA_TRANS_IMPL_CONF(Signa
   jamEntry();
   ndbrequire(signal->getNoOfSections() == 0);
 
+  if (c_takeOverInProgress)
+  {
+    /**
+     * The new master will rebuild the transaction state from the
+     * DICT_TAKEOVER_CONF signals. Therefore we ignore this signal during 
+     * takeover.
+     */
+    jam();
+    return;
+  }
+
   const SchemaTransImplConf* conf =
     (const SchemaTransImplConf*)signal->getDataPtr();
 
@@ -25248,6 +25291,17 @@ Dbdict::execSCHEMA_TRANS_IMPL_REF(Signal
   jamEntry();
   ndbrequire(signal->getNoOfSections() == 0);
 
+  if (c_takeOverInProgress)
+  {
+    /**
+     * The new master will rebuild the transaction state from the
+     * DICT_TAKEOVER_CONF signals. Therefore we ignore this signal during 
+     * takeover.
+     */
+    jam();
+    return;
+  }
+
   SchemaTransImplRef refCopy =
     *(SchemaTransImplRef*)signal->getDataPtr();
   SchemaTransImplRef * ref = &refCopy;

=== modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp'
--- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp	2012-01-23 08:00:45 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp	2012-08-22 10:49:42 +0000
@@ -1319,6 +1319,13 @@ private:
   bool   c_nodeRestart;
   bool   c_initialNodeRestart;
   Uint32 c_tabinfoReceived;
+  /**
+   * This flag indicates that a dict takeover is in progress, specifically 
+   * that the new master has outstanding DICT_TAKEOVER_REQ messages. The flag
+   * is used to prevent client from starting (or ending) transactions during
+   * takeover.
+   */
+  bool   c_takeOverInProgress;
 
   /**
    * Temporary structure used when parsing table info

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2012-08-13 15:04:51 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2012-08-22 10:49:42 +0000
@@ -2513,7 +2513,31 @@ Dblqh::dropTab_wait_usage(Signal* signal
 void
 Dblqh::execDROP_TAB_REQ(Signal* signal){
   jamEntry();
-
+  if (ERROR_INSERTED(5076))
+  {
+    /**
+     * This error insert simulates a situation where it takes a long time
+     * to execute DROP_TAB_REQ, such that we can crash the (dict) master
+     * while there is an outstanding DROP_TAB_REQ.
+     */
+    jam();
+    sendSignalWithDelay(reference(), GSN_DROP_TAB_REQ, signal, 1000,
+                        signal->getLength());
+    return;
+  }
+  if (ERROR_INSERTED(5077))
+  {
+    jam();
+    CLEAR_ERROR_INSERT_VALUE;
+    /** 
+     * Kill this node 2 seconds from now. We wait for two seconds to make sure
+     * that DROP_TAB_REQ messages have reached other nodes before this one
+     * dies.
+     */
+    signal->theData[0] = 9999;
+    sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 2000, 1);
+    return;
+  }
   DropTabReq reqCopy = * (DropTabReq*)signal->getDataPtr();
   DropTabReq* req = &reqCopy;
   

=== modified file 'storage/ndb/test/ndbapi/testDict.cpp'
--- a/storage/ndb/test/ndbapi/testDict.cpp	2011-12-15 08:28:12 +0000
+++ b/storage/ndb/test/ndbapi/testDict.cpp	2012-08-22 10:49:42 +0000
@@ -776,6 +776,144 @@ int runUseTableUntilStopped3(NDBT_Contex
   return NDBT_OK;
 }
 
+/**
+ * This is a regression test for bug 14190114 
+ * "CLUSTER CRASH DUE TO NDBREQUIRE IN ./LOCALPROXY.HPP DBLQH (LINE: 234)".
+ * This bug occurs if there is a takeover (i.e. the master node crashes) 
+ * while an LQH block is executing a DROP_TAB_REQ signal. It only affects
+ * multi-threaded ndb.
+ */
+static int
+runDropTakeoverTest(NDBT_Context* ctx, NDBT_Step* step)
+{
+  NdbRestarter restarter;
+  if (restarter.getNumDbNodes() == 1)
+  {
+    g_info << "Cannot do this test with just one datanode." << endl;
+    return NDBT_OK;
+  }
+
+  Ndb* const ndb = GETNDB(step);
+  NdbDictionary::Dictionary* const dict = ndb->getDictionary();
+
+  // First we create a table that is a copy of ctx->getTab().
+  NdbDictionary::Table copyTab(*ctx->getTab());
+  const char* copyName = "copyTab";
+
+  copyTab.setName(copyName);
+  if (dict->createTable(copyTab) != 0)
+  {
+    g_err << "Failed to create table " << copyName << endl
+          << dict->getNdbError() << endl;
+    return NDBT_FAILED;
+  }
+
+  /**
+   * Find the node id of the master node and another data node that is not 
+   * the master.
+   */
+  const int masterNodeId = restarter.getMasterNodeId();
+  const int nonMasterNodeId =
+    masterNodeId == restarter.getDbNodeId(0) ?
+    restarter.getDbNodeId(1) : 
+    restarter.getDbNodeId(0);
+
+  /**
+   * This error insert makes LQH resend the DROP_TAB_REQ to itself (with a
+   * delay) rather than executing it, until the error insert is reset.
+   * This makes it appear as if though the LQH block spends a long time 
+   * executing the DROP_TAB_REQ signal.
+   */
+  g_info << "Insert error 5076 in node " << nonMasterNodeId << endl;
+  require(restarter.insertErrorInNode(nonMasterNodeId, 5076) == 0);
+  /**
+   * This error insert makes the master node crash when one of its LQH 
+   * blocks tries to execute a DROP_TAB_REQ signal. This will then trigger
+   * a takeover.
+   */
+  g_info << "Insert error 5077 in node " << masterNodeId << endl;
+  require(restarter.insertErrorInNode(masterNodeId, 5077) == 0);
+
+  // This dropTable should fail, since the master node dies.
+  g_info << "Trying to drop table " << copyName << endl;
+  if (dict->dropTable(copyName) == 0)
+  {
+    g_err << "Unexpectedly managed to drop table " << copyName << endl;
+    return NDBT_FAILED;
+  }
+
+  /** 
+   * Check that only old master is dead. Bug 14190114 would cause other nodes
+   * to die as well.
+   */
+  const int deadNodeId = restarter.checkClusterAlive(&masterNodeId, 1);
+  if (deadNodeId != 0)
+  {
+    g_err << "NodeId " << deadNodeId << " is down." << endl;
+    return NDBT_FAILED;
+  }
+  
+  // Reset error insert.
+  g_info << "insert error 0 in node " << nonMasterNodeId << endl;
+  require(restarter.insertErrorInNode(nonMasterNodeId, 0) == 0);
+
+  // Verify that old master comes back up, and that no other node crashed.
+  g_info << "Waiting for all nodes to be up." << endl;
+  if (restarter.waitClusterStarted() != 0)
+  {
+    g_err << "One or more cluster nodes are not up." << endl;
+    return NDBT_FAILED;
+  }
+
+  /**
+   * The 'drop table' operation should have been rolled forward, since the
+   * node crash happened in the complete phase. Verify that the table is 
+   * gone.
+   */
+  g_info << "Verifying that table " << copyName << " was deleted." << endl;
+  if (dict->getTable(copyName) == NULL)
+  {
+    if (dict->getNdbError().code != 723) // 723 = no such table existed.
+    {
+      g_err << "dict->getTable() for " << copyName 
+            << " failed in unexpedted way:" << endl
+            << dict->getNdbError() << endl;
+      return NDBT_FAILED;
+    }
+  }
+  else
+  {
+    g_err << "Transaction dropping " << copyName << " was not rolled forward"
+          << endl;
+    return NDBT_FAILED;
+  }
+  
+  /** 
+   * Do another dictionary transaction, to verify that the cluster allows that.
+   */
+  NdbDictionary::Table extraTab(*ctx->getTab());
+  const char* extraName = "extraTab";
+
+  extraTab.setName(extraName);
+  g_info << "Trying to create table " << extraName << endl;
+  if (dict->createTable(extraTab) != 0)
+  {
+    g_err << "Failed to create table " << extraName << endl
+          << dict->getNdbError() << endl;
+    return NDBT_FAILED;
+  }
+
+  // Clean up by dropping extraTab.
+  g_info << "Trying to drop table " << extraName << endl;
+  if (dict->dropTable(extraName) != 0)
+  {
+    g_err << "Failed to drop table " << extraName << endl
+          << dict->getNdbError() << endl;
+    return NDBT_FAILED;
+  }
+  
+  return NDBT_OK;
+}
 
 int
 runCreateMaxTables(NDBT_Context* ctx, NDBT_Step* step)
@@ -9076,6 +9214,9 @@ TESTCASE("CreateAndDropDuring",
   STEP(runCreateAndDropDuring);
   STEP(runUseTableUntilStopped);
 }
+TESTCASE("DropWithTakeover","bug 14190114"){
+  INITIALIZER(runDropTakeoverTest);
+}
 TESTCASE("CreateInvalidTables", 
 	 "Try to create the invalid tables we have defined\n"){ 
   INITIALIZER(runCreateInvalidTables);

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2012-08-13 13:03:29 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2012-08-22 10:49:42 +0000
@@ -1868,6 +1868,10 @@ cmd: testDict
 args: -n IndexStatCreate T1
 
 max-time: 300
+cmd: testDict
+args: -n DropWithTakeover T1
+
+max-time: 300
 cmd: testBasic
 args: -n LeakApiConnectObjects T1
 

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.1-telco-7.0 branch (jan.wedvik:4959 to 4960)Bug#14190114Jan Wedvik22 Aug