From: Date: June 1 2006 8:24am Subject: bk commit into 4.1 tree (jonas:1.2507) BUG#20185 List-Archive: http://lists.mysql.com/commits/7134 X-Bug: 20185 Message-Id: <20060601062408.0A3ED36A8A2@perch.ndb.mysql.com> Below is the list of changes that have just been committed into a local 4.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2507 06/06/01 08:24:05 jonas@stripped +5 -0 ndb - bug#20185 Dont be too aggressive in Dbtc::nodeFailCheckTransaction let it timeout by 1, so that it does not assert that it has waited too long old impl. set timeotu value to 0, making timeout = (ctcTimer - 0) which could be quite big. ndb/test/run-test/daily-basic-tests.txt 1.32 06/06/01 08:24:03 jonas@stripped +4 -0 autotest ndb/test/ndbapi/testNodeRestart.cpp 1.22 06/06/01 08:24:03 jonas@stripped +56 -0 testcase ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 1.66 06/06/01 08:24:02 jonas@stripped +5 -3 Dont set api con timer to 0, as this might trigger asserion in timeoutfound lab if state == PREPARE_TO_COMMIT ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 1.37 06/06/01 08:24:02 jonas@stripped +24 -0 2 new error inserts 7030 - delay in GCP_PREPARE until checkLocalNodefailComplete is true 7031 - delay in GCP_PREPARE and die ndb/src/kernel/blocks/ERROR_codes.txt 1.17 06/06/01 08:24:02 jonas@stripped +3 -0 error codes # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: jonas # Host: perch.ndb.mysql.com # Root: /home/jonas/src/41-work --- 1.31/ndb/test/run-test/daily-basic-tests.txt 2006-04-03 12:09:48 +02:00 +++ 1.32/ndb/test/run-test/daily-basic-tests.txt 2006-06-01 08:24:03 +02:00 @@ -470,6 +470,10 @@ max-time: 1000 cmd: testNodeRestart args: -n Bug18612SR T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug20185 T1 + # OLD FLEX max-time: 500 cmd: flexBench --- 1.16/ndb/src/kernel/blocks/ERROR_codes.txt 2006-03-23 11:53:52 +01:00 +++ 1.17/ndb/src/kernel/blocks/ERROR_codes.txt 2006-06-01 08:24:02 +02:00 @@ -63,6 +63,9 @@ Delay GCP_SAVEREQ by 10 secs 7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE +7030: Delay in GCP_PREPARE until node has completed a node failure +7031: Delay in GCP_PREPARE and die 3s later + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- --- 1.36/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-03-31 16:36:41 +02:00 +++ 1.37/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-06-01 08:24:02 +02:00 @@ -5417,6 +5417,12 @@ Dbdih::checkLocalNodefailComplete(Signal return; } + if (ERROR_INSERTED(7030)) + { + ndbout_c("Reenable GCP_PREPARE"); + CLEAR_ERROR_INSERT_VALUE; + } + NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0]; nf->blockNo = DBDIH; nf->nodeId = cownNodeId; @@ -7459,6 +7465,16 @@ void Dbdih::execGCP_PREPARE(Signal* sign { jamEntry(); CRASH_INSERTION(7005); + + if (ERROR_INSERTED(7030)) + { + cgckptflag = true; + ndbout_c("Delayed GCP_PREPARE 5s"); + sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, + signal->getLength()); + return; + } + Uint32 masterNodeId = signal->theData[0]; Uint32 gci = signal->theData[1]; BlockReference retRef = calcDihBlockRef(masterNodeId); @@ -7471,6 +7487,14 @@ void Dbdih::execGCP_PREPARE(Signal* sign cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED; cnewgcp = gci; + if (ERROR_INSERTED(7031)) + { + ndbout_c("Crashing delayed in GCP_PREPARE 3s"); + signal->theData[0] = 9999; + sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); + return; + } + signal->theData[0] = cownNodeId; signal->theData[1] = gci; sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA); --- 1.65/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-04-25 16:01:06 +02:00 +++ 1.66/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-06-01 08:24:02 +02:00 @@ -7002,18 +7002,20 @@ Dbtc::nodeFailCheckTransactions(Signal* { jam(); Ptr transPtr; + Uint32 TtcTimer = ctcTimer; + Uint32 TapplTimeout = c_appl_timeout_value; for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++) { ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); if (transPtr.p->m_transaction_nodes.get(failedNodeId)) { jam(); + // Force timeout regardless of state - Uint32 save = c_appl_timeout_value; c_appl_timeout_value = 1; - setApiConTimer(transPtr.i, 0, __LINE__); + setApiConTimer(transPtr.i, TtcTimer - 2, __LINE__); timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT); - c_appl_timeout_value = save; + c_appl_timeout_value = TapplTimeout; } // Send CONTINUEB to continue later --- 1.21/ndb/test/ndbapi/testNodeRestart.cpp 2006-04-03 20:43:12 +02:00 +++ 1.22/ndb/test/ndbapi/testNodeRestart.cpp 2006-06-01 08:24:03 +02:00 @@ -868,6 +868,56 @@ runBug18612SR(NDBT_Context* ctx, NDBT_St return NDBT_OK; } +int runBug20185(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter restarter; + HugoOperations hugoOps(*ctx->getTab()); + Ndb* pNdb = GETNDB(step); + + int dump[] = { 7090, 20 } ; + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + NdbSleep_MilliSleep(3000); + + if(hugoOps.startTransaction(pNdb) != 0) + return NDBT_FAILED; + + if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0) + return NDBT_FAILED; + + if (hugoOps.execute_NoCommit(pNdb) != 0) + return NDBT_FAILED; + + int nodeId; + const int node = hugoOps.getTransaction()->getConnectedNodeId(); + do { + nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); + } while (nodeId == node); + + if (restarter.insertErrorInAllNodes(7030)) + return NDBT_FAILED; + + if (restarter.insertErrorInNode(nodeId, 7031)) + return NDBT_FAILED; + + NdbSleep_MilliSleep(500); + + if (hugoOps.execute_Commit(pNdb) == 0) + return NDBT_FAILED; + + NdbSleep_MilliSleep(3000); + + restarter.waitClusterStarted(); + + if (restarter.dumpStateAllNodes(dump, 1)) + return NDBT_FAILED; + + return NDBT_OK; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -1173,6 +1223,12 @@ TESTCASE("Bug18612SR", "Test bug with partitioned clusters"){ INITIALIZER(runLoadTable); STEP(runBug18612SR); + FINALIZER(runClearTable); +} +TESTCASE("Bug20185", + ""){ + INITIALIZER(runLoadTable); + STEP(runBug20185); FINALIZER(runClearTable); } NDBT_TESTSUITE_END(testNodeRestart);