From: Date: March 22 2006 11:44am Subject: bk commit into 4.1 tree (jonas:1.2475) BUG#18414 List-Archive: http://lists.mysql.com/commits/4012 X-Bug: 18414 Message-Id: <20060322104434.E05E730A813@perch.ndb.mysql.com> Below is the list of changes that have just been committed into a local 4.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2475 06/03/22 11:44:31 jonas@stripped +6 -0 ndb - bug#18414 Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding ndb/test/run-test/daily-basic-tests.txt 1.28 06/03/22 11:44:29 jonas@stripped +4 -0 Add testcase for bug18414 ndb/test/ndbapi/testTimeout.cpp 1.13 06/03/22 11:44:29 jonas@stripped +5 -2 Fix error code checking ndb/test/ndbapi/testNodeRestart.cpp 1.15 06/03/22 11:44:29 jonas@stripped +73 -0 Add testcase for bug18414 ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 1.64 06/03/22 11:44:29 jonas@stripped +44 -8 Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 1.35 06/03/22 11:44:29 jonas@stripped +0 -4 remove dumping of LCP info during NF ndb/src/kernel/blocks/ERROR_codes.txt 1.15 06/03/22 11:44:29 jonas@stripped +2 -0 New error code # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: jonas # Host: perch.ndb.mysql.com # Root: /home/jonas/src/41-work --- 1.27/ndb/test/run-test/daily-basic-tests.txt 2006-03-21 14:47:08 +01:00 +++ 1.28/ndb/test/run-test/daily-basic-tests.txt 2006-03-22 11:44:29 +01:00 @@ -458,6 +458,10 @@ cmd: testSystemRestart args: -n Bug18385 T1 +max-time: 500 +cmd: testNodeRestart +args: -n Bug18414 T1 + # OLD FLEX max-time: 500 cmd: flexBench --- 1.14/ndb/src/kernel/blocks/ERROR_codes.txt 2006-03-21 14:47:08 +01:00 +++ 1.15/ndb/src/kernel/blocks/ERROR_codes.txt 2006-03-22 11:44:29 +01:00 @@ -226,6 +226,8 @@ 8045: (ABORTCONF only as part of take-over) Delay execution of ABORTCONF signal 2 seconds to generate time-out. +8050: Send ZABORT_TIMEOUT_BREAK delayed + ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ------------------------------------------------- --- 1.34/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-03-21 15:13:39 +01:00 +++ 1.35/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-03-22 11:44:29 +01:00 @@ -5982,10 +5982,6 @@ signal->theData[0] = 7012; execDUMP_STATE_ORD(signal); - signal->theData[0] = 7015; - signal->theData[1] = 0; - execDUMP_STATE_ORD(signal); - c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__); checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER); --- 1.63/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-03-20 14:53:27 +01:00 +++ 1.64/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-03-22 11:44:29 +01:00 @@ -6386,6 +6386,7 @@ return; } + bool found = false; OperationState tmp[16]; Uint32 TloopCount = 0; @@ -6393,7 +6394,31 @@ jam(); if (tcConnectptr.i == RNIL) { jam(); - if (Tcheck == 0) { + +#ifdef VM_TRACE + ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d", + found, Tcheck, apiConnectptr.p->counter); +#endif + if (found || apiConnectptr.p->counter) + { + jam(); + /** + * We sent atleast one ABORT/ABORTED + * or ZABORT_TIMEOUT_BREAK is in job buffer + * wait for reception... + */ + return; + } + + if (Tcheck == 1) + { + jam(); + releaseAbortResources(signal); + return; + } + + if (Tcheck == 0) + { jam(); /*------------------------------------------------------------------ * All nodes had already reported ABORTED for all tcConnect records. @@ -6402,9 +6427,11 @@ *------------------------------------------------------------------*/ char buf[96]; buf[0] = 0; char buf2[96]; - BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:", - __LINE__, apiConnectptr.i); - for(Uint32 i = 0; icounter); + for(Uint32 i = 0; itheData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK; signal->theData[1] = tcConnectptr.i; signal->theData[2] = apiConnectptr.i; - sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + if (ERROR_INSERTED(8050)) + { + ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", + Tcheck, apiConnectptr.p->counter); + sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3); + } + else + { + sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); + } return; }//if ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); @@ -6450,7 +6488,7 @@ jam(); if (tcConnectptr.p->tcNodedata[Ti] != 0) { TloopCount += 31; - Tcheck = 1; + found = true; hostptr.i = tcConnectptr.p->tcNodedata[Ti]; ptrCheckGuard(hostptr, chostFilesize, hostRecord); if (hostptr.p->hostStatus == HS_ALIVE) { @@ -7007,8 +7045,6 @@ hostptr.i = tfailedNodeId; ptrCheckGuard(hostptr, chostFilesize, hostRecord); - ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)", - tfailedNodeId, signal->getSendersBlockRef(), reference()); if (signal->getSendersBlockRef() != reference()) { jam(); --- 1.14/ndb/test/ndbapi/testNodeRestart.cpp 2006-03-17 10:55:00 +01:00 +++ 1.15/ndb/test/ndbapi/testNodeRestart.cpp 2006-03-22 11:44:29 +01:00 @@ -581,6 +581,73 @@ return ret ? NDBT_OK : NDBT_FAILED; } +int +runBug18414(NDBT_Context* ctx, NDBT_Step* step){ + + NdbRestarter restarter; + if (restarter.getNumDbNodes() < 2) + { + ctx->stopTest(); + return NDBT_OK; + } + + Ndb* pNdb = GETNDB(step); + HugoOperations hugoOps(*ctx->getTab()); + HugoTransactions hugoTrans(*ctx->getTab()); + int loop = 0; + do + { + if(hugoOps.startTransaction(pNdb) != 0) + goto err; + + if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0) + goto err; + + if(hugoOps.execute_NoCommit(pNdb) != 0) + goto err; + + int node1 = hugoOps.getTransaction()->getConnectedNodeId(); + int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand()); + + if (node1 == -1 || node2 == -1) + break; + + if (loop & 1) + { + if (restarter.insertErrorInNode(node1, 8050)) + goto err; + } + + if (restarter.insertErrorInNode(node2, 5003)) + goto err; + + int res= hugoOps.execute_Rollback(pNdb); + + if (restarter.waitNodesNoStart(&node2, 1) != 0) + goto err; + + if (restarter.insertErrorInAllNodes(0)) + goto err; + + if (restarter.startNodes(&node2, 1) != 0) + goto err; + + if (restarter.waitClusterStarted() != 0) + goto err; + + if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0) + goto err; + + hugoOps.closeTransaction(pNdb); + + } while(++loop < 5); + + return NDBT_OK; + +err: + hugoOps.closeTransaction(pNdb); + return NDBT_FAILED; +} NDBT_TESTSUITE(testNodeRestart); TESTCASE("NoLoad", @@ -869,6 +936,12 @@ TESTCASE("Bug16772", "Test bug with restarting before NF handling is complete"){ STEP(runBug16772); +} +TESTCASE("Bug18414", + "Test bug with NF during NR"){ + INITIALIZER(runLoadTable); + STEP(runBug18414); + FINALIZER(runClearTable); } NDBT_TESTSUITE_END(testNodeRestart); --- 1.12/ndb/test/ndbapi/testTimeout.cpp 2006-03-20 14:49:44 +01:00 +++ 1.13/ndb/test/ndbapi/testTimeout.cpp 2006-03-22 11:44:29 +01:00 @@ -173,8 +173,11 @@ NdbSleep_MilliSleep(sleep); // Expect that transaction has timed-out - CHECK(hugoOps.execute_Commit(pNdb) == 237); - + int ret = hugoOps.execute_Commit(pNdb); + CHECK(ret != 0); + NdbError err = pNdb->getNdbError(ret); + CHECK(err.classification == NdbError::TimeoutExpired); + } while(false); hugoOps.closeTransaction(pNdb);