From: Jan Wedvik Date: August 22 2012 10:50am Subject: bzr push into mysql-5.1-telco-7.0 branch (jan.wedvik:4959 to 4960) Bug#14190114 List-Archive: http://lists.mysql.com/commits/144594 X-Bug: 14190114 Message-Id: <20120822105014.14235.72264.4960@atum17.no.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4960 Jan Wedvik 2012-08-22 Bug#14190114: CLUSTER CRASH DUE TO NDBREQUIRE IN ./LOCALPROXY.HPP DBLQH (LINE: 234) This patch fixes a set of errors that causes node failure (or block new dictionary operations) if the master node crashes in certain states of a drop table operation. This covers bug 14190114 and some related errors that showed up when running the regression test: 1) This patch fixes the direct cause of bug 14190114. The patch ensures that the master will reject SCHEMA_TRANS_BEGIN_REQ and SCHEMA_TRANS_END_REQ messages while there are outstanding DICT_TAKEOVER_REQs. If SCHEMA_TRANS_BEGIN_REQ was allowed, the system could end up in a situation where two transactions had outstanding DROP_TAB_REQs at the same time. This caused bug 14190114. Likewise, SCHEMA_TRANS_END_REQ cannot be processed before the new master knows the state of the transaction (i.e. after it has receibed the DICT_TAKEOVER_CONFs). 2) This patch fixes an error in the construction of the CONTINUB message that DICT sends to itself if it receives a DICT_TAKEOVER_REQ while it still has active operations. This patch also substitutes sendSignal with sendSignalWithDelay. This is done for two reasons: I) To avoid waisting CPU cycles by doing busy wait. II) To prevent CONTINUB messages from filling the jam trace buffer (this made the error report fro the customer harder to analyze.) 3) This patch disables counting of SCHEMA_TRANS_IMPL_CONF and SCHEMA_TRANS_IMPL_REF messages during takeover. Normally the master counts these to know when all participants have completed an operation. But during a takeover, the new master will not know the number of outstanding messages until it has received DICT_TAKEOVER_CONF. 4) This patch ensures that drop table operations are set to start OS_COMPLETED after finishing RT_COMPLETE requests. As it was, these would remain in state OS_COMPLETING. This meant that DICT could never send DICT_TAKEOVER_CONF, since this can only be done when all operations are in 'passive' states. modified: storage/ndb/include/kernel/signaldata/SchemaTrans.hpp storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp storage/ndb/test/ndbapi/testDict.cpp storage/ndb/test/run-test/daily-basic-tests.txt 4959 Martin Skold 2012-08-21 ndb - bump version to 7.0.36 modified: configure.in storage/ndb/ndb_configure.m4 === modified file 'storage/ndb/include/kernel/signaldata/SchemaTrans.hpp' --- a/storage/ndb/include/kernel/signaldata/SchemaTrans.hpp 2011-10-07 13:15:08 +0000 +++ b/storage/ndb/include/kernel/signaldata/SchemaTrans.hpp 2012-08-22 10:49:42 +0000 @@ -81,6 +81,7 @@ struct SchemaTransEndConf { struct SchemaTransEndRef { enum ErrorCode { NoError = 0, + Busy = 701, NotMaster = 702, InvalidTransKey = 781, InvalidTransId = 782, === modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp' --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp 2012-06-21 12:35:31 +0000 +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp 2012-08-22 10:49:42 +0000 @@ -2225,6 +2225,7 @@ void Dbdict::initCommonData() c_systemRestart = false; c_initialNodeRestart = false; c_nodeRestart = false; + c_takeOverInProgress = false; c_outstanding_sub_startstop = 0; c_sub_startstop_lock.clear(); @@ -4619,6 +4620,7 @@ void Dbdict::handle_master_takeover(Sign } DictTakeoverReq* req = (DictTakeoverReq*)signal->getDataPtrSend(); req->senderRef = reference(); + c_takeOverInProgress = true; sendSignal(rg, GSN_DICT_TAKEOVER_REQ, signal, DictTakeoverReq::SignalLength, JBB); } @@ -7827,8 +7829,8 @@ Dbdict::dropTable_complete_done(Signal* sendSignal(SUMA_REF, GSN_DROP_TAB_CONF, signal, DropTabConf::SignalLength, JBB); } - - sendTransConf(signal, trans_ptr); + ndbassert(op_ptr.p->m_state == SchemaOp::OS_COMPLETING); + sendTransConf(signal, op_ptr); } // DropTable: ABORT @@ -19745,12 +19747,16 @@ Dbdict::execDICT_TAKEOVER_REQ(Signal* si case SchemaOp::OS_COMMITTING: case SchemaOp::OS_COMPLETING: { + /** + * Wait 100ms and check again. This delay is there to save CPU cycles + * and to avoid filling the jam trace buffer. + */ jam(); Uint32* data = &signal->theData[0]; - data[0] = ZDICT_TAKEOVER_REQ; memmove(&data[1], &data[0], DictTakeoverReq::SignalLength << 2); - sendSignal(reference(), GSN_CONTINUEB, signal, - 1 + DictTakeoverReq::SignalLength, JBB); + data[0] = ZDICT_TAKEOVER_REQ; + sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, + 100, 1 + DictTakeoverReq::SignalLength); return; } default: @@ -19873,6 +19879,7 @@ Dbdict::execDICT_TAKEOVER_REF(Signal* si return; } } + c_takeOverInProgress = false; check_takeover_replies(signal); } @@ -19918,6 +19925,7 @@ Dbdict::execDICT_TAKEOVER_CONF(Signal* s return; } } + c_takeOverInProgress = false; check_takeover_replies(signal); } @@ -24813,6 +24821,18 @@ Dbdict::execSCHEMA_TRANS_BEGIN_REQ(Signa break; } + if (c_takeOverInProgress) + { + /** + * There is a dict takeover in progress. There may thus another + * transaction that should be rolled backward or forward before we + * can allow another transaction to start. + */ + jam(); + setError(error, SchemaTransBeginRef::Busy, __LINE__); + break; + } + if (!check_ndb_versions() && !localTrans) { jam(); @@ -25010,6 +25030,18 @@ Dbdict::execSCHEMA_TRANS_END_REQ(Signal* setError(error, SchemaTransEndRef::NotMaster, __LINE__); break; } + + if (c_takeOverInProgress) + { + /** + * There is a dict takeover in progress, and the transaction may thus + * be in an inconsistent state. Therefore we cannot process this request + * now. + */ + jam(); + setError(error, SchemaTransEndRef::Busy, __LINE__); + break; + } #ifdef MARTIN ndbout_c("Dbdict::execSCHEMA_TRANS_END_REQ: trans %u, state %u", trans_ptr.i, trans_ptr.p->m_state); #endif @@ -25222,6 +25254,17 @@ Dbdict::execSCHEMA_TRANS_IMPL_CONF(Signa jamEntry(); ndbrequire(signal->getNoOfSections() == 0); + if (c_takeOverInProgress) + { + /** + * The new master will rebuild the transaction state from the + * DICT_TAKEOVER_CONF signals. Therefore we ignore this signal during + * takeover. + */ + jam(); + return; + } + const SchemaTransImplConf* conf = (const SchemaTransImplConf*)signal->getDataPtr(); @@ -25248,6 +25291,17 @@ Dbdict::execSCHEMA_TRANS_IMPL_REF(Signal jamEntry(); ndbrequire(signal->getNoOfSections() == 0); + if (c_takeOverInProgress) + { + /** + * The new master will rebuild the transaction state from the + * DICT_TAKEOVER_CONF signals. Therefore we ignore this signal during + * takeover. + */ + jam(); + return; + } + SchemaTransImplRef refCopy = *(SchemaTransImplRef*)signal->getDataPtr(); SchemaTransImplRef * ref = &refCopy; === modified file 'storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp' --- a/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp 2012-01-23 08:00:45 +0000 +++ b/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp 2012-08-22 10:49:42 +0000 @@ -1319,6 +1319,13 @@ private: bool c_nodeRestart; bool c_initialNodeRestart; Uint32 c_tabinfoReceived; + /** + * This flag indicates that a dict takeover is in progress, specifically + * that the new master has outstanding DICT_TAKEOVER_REQ messages. The flag + * is used to prevent client from starting (or ending) transactions during + * takeover. + */ + bool c_takeOverInProgress; /** * Temporary structure used when parsing table info === modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp' --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-08-13 15:04:51 +0000 +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-08-22 10:49:42 +0000 @@ -2513,7 +2513,31 @@ Dblqh::dropTab_wait_usage(Signal* signal void Dblqh::execDROP_TAB_REQ(Signal* signal){ jamEntry(); - + if (ERROR_INSERTED(5076)) + { + /** + * This error insert simulates a situation where it takes a long time + * to execute DROP_TAB_REQ, such that we can crash the (dict) master + * while there is an outstanding DROP_TAB_REQ. + */ + jam(); + sendSignalWithDelay(reference(), GSN_DROP_TAB_REQ, signal, 1000, + signal->getLength()); + return; + } + if (ERROR_INSERTED(5077)) + { + jam(); + CLEAR_ERROR_INSERT_VALUE; + /** + * Kill this node 2 seconds from now. We wait for two seconds to make sure + * that DROP_TAB_REQ messages have reached other nodes before this one + * dies. + */ + signal->theData[0] = 9999; + sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 2000, 1); + return; + } DropTabReq reqCopy = * (DropTabReq*)signal->getDataPtr(); DropTabReq* req = &reqCopy; === modified file 'storage/ndb/test/ndbapi/testDict.cpp' --- a/storage/ndb/test/ndbapi/testDict.cpp 2011-12-15 08:28:12 +0000 +++ b/storage/ndb/test/ndbapi/testDict.cpp 2012-08-22 10:49:42 +0000 @@ -776,6 +776,144 @@ int runUseTableUntilStopped3(NDBT_Contex return NDBT_OK; } +/** + * This is a regression test for bug 14190114 + * "CLUSTER CRASH DUE TO NDBREQUIRE IN ./LOCALPROXY.HPP DBLQH (LINE: 234)". + * This bug occurs if there is a takeover (i.e. the master node crashes) + * while an LQH block is executing a DROP_TAB_REQ signal. It only affects + * multi-threaded ndb. + */ +static int +runDropTakeoverTest(NDBT_Context* ctx, NDBT_Step* step) +{ + NdbRestarter restarter; + if (restarter.getNumDbNodes() == 1) + { + g_info << "Cannot do this test with just one datanode." << endl; + return NDBT_OK; + } + + Ndb* const ndb = GETNDB(step); + NdbDictionary::Dictionary* const dict = ndb->getDictionary(); + + // First we create a table that is a copy of ctx->getTab(). + NdbDictionary::Table copyTab(*ctx->getTab()); + const char* copyName = "copyTab"; + + copyTab.setName(copyName); + if (dict->createTable(copyTab) != 0) + { + g_err << "Failed to create table " << copyName << endl + << dict->getNdbError() << endl; + return NDBT_FAILED; + } + + /** + * Find the node id of the master node and another data node that is not + * the master. + */ + const int masterNodeId = restarter.getMasterNodeId(); + const int nonMasterNodeId = + masterNodeId == restarter.getDbNodeId(0) ? + restarter.getDbNodeId(1) : + restarter.getDbNodeId(0); + + /** + * This error insert makes LQH resend the DROP_TAB_REQ to itself (with a + * delay) rather than executing it, until the error insert is reset. + * This makes it appear as if though the LQH block spends a long time + * executing the DROP_TAB_REQ signal. + */ + g_info << "Insert error 5076 in node " << nonMasterNodeId << endl; + require(restarter.insertErrorInNode(nonMasterNodeId, 5076) == 0); + /** + * This error insert makes the master node crash when one of its LQH + * blocks tries to execute a DROP_TAB_REQ signal. This will then trigger + * a takeover. + */ + g_info << "Insert error 5077 in node " << masterNodeId << endl; + require(restarter.insertErrorInNode(masterNodeId, 5077) == 0); + + // This dropTable should fail, since the master node dies. + g_info << "Trying to drop table " << copyName << endl; + if (dict->dropTable(copyName) == 0) + { + g_err << "Unexpectedly managed to drop table " << copyName << endl; + return NDBT_FAILED; + } + + /** + * Check that only old master is dead. Bug 14190114 would cause other nodes + * to die as well. + */ + const int deadNodeId = restarter.checkClusterAlive(&masterNodeId, 1); + if (deadNodeId != 0) + { + g_err << "NodeId " << deadNodeId << " is down." << endl; + return NDBT_FAILED; + } + + // Reset error insert. + g_info << "insert error 0 in node " << nonMasterNodeId << endl; + require(restarter.insertErrorInNode(nonMasterNodeId, 0) == 0); + + // Verify that old master comes back up, and that no other node crashed. + g_info << "Waiting for all nodes to be up." << endl; + if (restarter.waitClusterStarted() != 0) + { + g_err << "One or more cluster nodes are not up." << endl; + return NDBT_FAILED; + } + + /** + * The 'drop table' operation should have been rolled forward, since the + * node crash happened in the complete phase. Verify that the table is + * gone. + */ + g_info << "Verifying that table " << copyName << " was deleted." << endl; + if (dict->getTable(copyName) == NULL) + { + if (dict->getNdbError().code != 723) // 723 = no such table existed. + { + g_err << "dict->getTable() for " << copyName + << " failed in unexpedted way:" << endl + << dict->getNdbError() << endl; + return NDBT_FAILED; + } + } + else + { + g_err << "Transaction dropping " << copyName << " was not rolled forward" + << endl; + return NDBT_FAILED; + } + + /** + * Do another dictionary transaction, to verify that the cluster allows that. + */ + NdbDictionary::Table extraTab(*ctx->getTab()); + const char* extraName = "extraTab"; + + extraTab.setName(extraName); + g_info << "Trying to create table " << extraName << endl; + if (dict->createTable(extraTab) != 0) + { + g_err << "Failed to create table " << extraName << endl + << dict->getNdbError() << endl; + return NDBT_FAILED; + } + + // Clean up by dropping extraTab. + g_info << "Trying to drop table " << extraName << endl; + if (dict->dropTable(extraName) != 0) + { + g_err << "Failed to drop table " << extraName << endl + << dict->getNdbError() << endl; + return NDBT_FAILED; + } + + return NDBT_OK; +} int runCreateMaxTables(NDBT_Context* ctx, NDBT_Step* step) @@ -9076,6 +9214,9 @@ TESTCASE("CreateAndDropDuring", STEP(runCreateAndDropDuring); STEP(runUseTableUntilStopped); } +TESTCASE("DropWithTakeover","bug 14190114"){ + INITIALIZER(runDropTakeoverTest); +} TESTCASE("CreateInvalidTables", "Try to create the invalid tables we have defined\n"){ INITIALIZER(runCreateInvalidTables); === modified file 'storage/ndb/test/run-test/daily-basic-tests.txt' --- a/storage/ndb/test/run-test/daily-basic-tests.txt 2012-08-13 13:03:29 +0000 +++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2012-08-22 10:49:42 +0000 @@ -1868,6 +1868,10 @@ cmd: testDict args: -n IndexStatCreate T1 max-time: 300 +cmd: testDict +args: -n DropWithTakeover T1 + +max-time: 300 cmd: testBasic args: -n LeakApiConnectObjects T1 No bundle (reason: useless for push emails).