From: Date: June 1 2006 10:46am Subject: bk commit into 5.1 tree (tomas:1.2025) BUG#20185 List-Archive: http://lists.mysql.com/commits/7141 X-Bug: 20185 Message-Id: <20060601084628.738FA7FE81@poseidon.mysql.com> Below is the list of changes that have just been committed into a local 5.1 repository of tomas. When tomas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2025 06/06/01 10:46:09 tomas@stripped +5 -0 Bug #20185 Node failure might cause other node failure storage/ndb/test/run-test/daily-basic-tests.txt 1.36 06/06/01 10:46:00 tomas@stripped +4 -0 Bug #20185 Node failure might cause other node failure storage/ndb/test/ndbapi/testNodeRestart.cpp 1.24 06/06/01 10:46:00 tomas@stripped +56 -0 Bug #20185 Node failure might cause other node failure storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 1.99 06/06/01 10:46:00 tomas@stripped +5 -3 Bug #20185 Node failure might cause other node failure storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 1.46 06/06/01 10:46:00 tomas@stripped +24 -0 Bug #20185 Node failure might cause other node failure storage/ndb/src/kernel/blocks/ERROR_codes.txt 1.15 06/06/01 10:46:00 tomas@stripped +3 -0 Bug #20185 Node failure might cause other node failure # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: tomas # Host: poseidon.ndb.mysql.com # Root: /home/tomas/wl2325-alcatel --- 1.35/storage/ndb/test/run-test/daily-basic-tests.txt 2006-04-05 16:05:36 +02:00 +++ 1.36/storage/ndb/test/run-test/daily-basic-tests.txt 2006-06-01 10:46:00 +02:00 @@ -437,6 +437,10 @@ cmd: testNodeRestart args: -n Bug18414 T1 +max-time: 1000 +cmd: testNodeRestart +args: -n Bug20185 T1 + # OLD FLEX max-time: 500 cmd: flexBench --- 1.14/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2006-03-24 17:11:35 +01:00 +++ 1.15/storage/ndb/src/kernel/blocks/ERROR_codes.txt 2006-06-01 10:46:00 +02:00 @@ -61,6 +61,9 @@ 5007: Delay GCP_SAVEREQ by 10 secs +7030: Delay in GCP_PREPARE until node has completed a node failure +7031: Delay in GCP_PREPARE and die 3s later + ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ----------------------------------------------------------------- --- 1.45/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-04-07 12:24:10 +02:00 +++ 1.46/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-06-01 10:46:00 +02:00 @@ -5398,6 +5398,12 @@ return; } + if (ERROR_INSERTED(7030)) + { + ndbout_c("Reenable GCP_PREPARE"); + CLEAR_ERROR_INSERT_VALUE; + } + NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0]; nf->blockNo = DBDIH; nf->nodeId = cownNodeId; @@ -7501,6 +7507,16 @@ { jamEntry(); CRASH_INSERTION(7005); + + if (ERROR_INSERTED(7030)) + { + cgckptflag = true; + ndbout_c("Delayed GCP_PREPARE 5s"); + sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000, + signal->getLength()); + return; + } + Uint32 masterNodeId = signal->theData[0]; Uint32 gci = signal->theData[1]; BlockReference retRef = calcDihBlockRef(masterNodeId); @@ -7513,6 +7529,14 @@ cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED; cnewgcp = gci; + if (ERROR_INSERTED(7031)) + { + ndbout_c("Crashing delayed in GCP_PREPARE 3s"); + signal->theData[0] = 9999; + sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1); + return; + } + signal->theData[0] = cownNodeId; signal->theData[1] = gci; sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA); --- 1.98/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-04-26 09:46:00 +02:00 +++ 1.99/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-06-01 10:46:00 +02:00 @@ -7084,18 +7084,20 @@ { jam(); Ptr transPtr; + Uint32 TtcTimer = ctcTimer; + Uint32 TapplTimeout = c_appl_timeout_value; for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++) { ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); if (transPtr.p->m_transaction_nodes.get(failedNodeId)) { jam(); + // Force timeout regardless of state - Uint32 save = c_appl_timeout_value; c_appl_timeout_value = 1; - setApiConTimer(transPtr.i, 0, __LINE__); + setApiConTimer(transPtr.i, TtcTimer - 2, __LINE__); timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT); - c_appl_timeout_value = save; + c_appl_timeout_value = TapplTimeout; } // Send CONTINUEB to continue later --- 1.23/storage/ndb/test/ndbapi/testNodeRestart.cpp 2006-04-05 16:05:36 +02:00 +++ 1.24/storage/ndb/test/ndbapi/testNodeRestart.cpp 2006-06-01 10:46:00 +02:00 @@ -868,6 +868,56 @@ return NDBT_OK; } +int runBug20185(NDBT_Context* ctx, NDBT_Step* step){ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + NdbRestarter restarter; + HugoOperations hugoOps(*ctx->getTab()); + Ndb* pNdb = GETNDB(step); + + int dump[] = { 7090, 20 } ; + if (restarter.dumpStateAllNodes(dump, 2)) + return NDBT_FAILED; + + NdbSleep_MilliSleep(3000); + + if(hugoOps.startTransaction(pNdb) != 0) + return NDBT_FAILED; + + if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0) + return NDBT_FAILED; + + if (hugoOps.execute_NoCommit(pNdb) != 0) + return NDBT_FAILED; + + int nodeId; + const int node = hugoOps.getTransaction()->getConnectedNodeId(); + do { + nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); + } while (nodeId == node); + + if (restarter.insertErrorInAllNodes(7030)) + return NDBT_FAILED; + + if (restarter.insertErrorInNode(nodeId, 7031)) + return NDBT_FAILED; + + NdbSleep_MilliSleep(500); + + if (hugoOps.execute_Commit(pNdb) == 0) + return NDBT_FAILED; + + NdbSleep_MilliSleep(3000); + + restarter.waitClusterStarted(); + + if (restarter.dumpStateAllNodes(dump, 1)) + return NDBT_FAILED; + + return NDBT_OK; +} + NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -1173,6 +1223,12 @@ "Test bug with partitioned clusters"){ INITIALIZER(runLoadTable); STEP(runBug18612SR); + FINALIZER(runClearTable); +} +TESTCASE("Bug20185", + ""){ + INITIALIZER(runLoadTable); + STEP(runBug20185); FINALIZER(runClearTable); } NDBT_TESTSUITE_END(testNodeRestart);