Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2475 06/03/22 11:44:31 jonas@stripped +6 -0
ndb - bug#18414
Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/test/run-test/daily-basic-tests.txt
1.28 06/03/22 11:44:29 jonas@stripped +4 -0
Add testcase for bug18414
ndb/test/ndbapi/testTimeout.cpp
1.13 06/03/22 11:44:29 jonas@stripped +5 -2
Fix error code checking
ndb/test/ndbapi/testNodeRestart.cpp
1.15 06/03/22 11:44:29 jonas@stripped +73 -0
Add testcase for bug18414
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
1.64 06/03/22 11:44:29 jonas@stripped +44 -8
Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
1.35 06/03/22 11:44:29 jonas@stripped +0 -4
remove dumping of LCP info during NF
ndb/src/kernel/blocks/ERROR_codes.txt
1.15 06/03/22 11:44:29 jonas@stripped +2 -0
New error code
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: perch.ndb.mysql.com
# Root: /home/jonas/src/41-work
--- 1.27/ndb/test/run-test/daily-basic-tests.txt 2006-03-21 14:47:08 +01:00
+++ 1.28/ndb/test/run-test/daily-basic-tests.txt 2006-03-22 11:44:29 +01:00
@@ -458,6 +458,10 @@
cmd: testSystemRestart
args: -n Bug18385 T1
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug18414 T1
+
# OLD FLEX
max-time: 500
cmd: flexBench
--- 1.14/ndb/src/kernel/blocks/ERROR_codes.txt 2006-03-21 14:47:08 +01:00
+++ 1.15/ndb/src/kernel/blocks/ERROR_codes.txt 2006-03-22 11:44:29 +01:00
@@ -226,6 +226,8 @@
8045: (ABORTCONF only as part of take-over)
Delay execution of ABORTCONF signal 2 seconds to generate time-out.
+8050: Send ZABORT_TIMEOUT_BREAK delayed
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
--- 1.34/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-03-21 15:13:39 +01:00
+++ 1.35/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2006-03-22 11:44:29 +01:00
@@ -5982,10 +5982,6 @@
signal->theData[0] = 7012;
execDUMP_STATE_ORD(signal);
- signal->theData[0] = 7015;
- signal->theData[1] = 0;
- execDUMP_STATE_ORD(signal);
-
c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
--- 1.63/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-03-20 14:53:27 +01:00
+++ 1.64/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2006-03-22 11:44:29 +01:00
@@ -6386,6 +6386,7 @@
return;
}
+ bool found = false;
OperationState tmp[16];
Uint32 TloopCount = 0;
@@ -6393,7 +6394,31 @@
jam();
if (tcConnectptr.i == RNIL) {
jam();
- if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+ ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+ found, Tcheck, apiConnectptr.p->counter);
+#endif
+ if (found || apiConnectptr.p->counter)
+ {
+ jam();
+ /**
+ * We sent atleast one ABORT/ABORTED
+ * or ZABORT_TIMEOUT_BREAK is in job buffer
+ * wait for reception...
+ */
+ return;
+ }
+
+ if (Tcheck == 1)
+ {
+ jam();
+ releaseAbortResources(signal);
+ return;
+ }
+
+ if (Tcheck == 0)
+ {
jam();
/*------------------------------------------------------------------
* All nodes had already reported ABORTED for all tcConnect records.
@@ -6402,9 +6427,11 @@
*------------------------------------------------------------------*/
char buf[96]; buf[0] = 0;
char buf2[96];
- BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
- __LINE__, apiConnectptr.i);
- for(Uint32 i = 0; i<TloopCount; i++){
+ BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+ __LINE__, apiConnectptr.i,
+ apiConnectptr.p->counter);
+ for(Uint32 i = 0; i<TloopCount; i++)
+ {
BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
BaseString::snprintf(buf, sizeof(buf), buf2);
}
@@ -6412,7 +6439,9 @@
ndbout_c(buf);
ndbrequire(false);
releaseAbortResources(signal);
+ return;
}
+
return;
}//if
TloopCount++;
@@ -6427,7 +6456,16 @@
signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
signal->theData[1] = tcConnectptr.i;
signal->theData[2] = apiConnectptr.i;
- sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ if (ERROR_INSERTED(8050))
+ {
+ ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)",
+ Tcheck, apiConnectptr.p->counter);
+ sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+ }
+ else
+ {
+ sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+ }
return;
}//if
ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6450,7 +6488,7 @@
jam();
if (tcConnectptr.p->tcNodedata[Ti] != 0) {
TloopCount += 31;
- Tcheck = 1;
+ found = true;
hostptr.i = tcConnectptr.p->tcNodedata[Ti];
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -7007,8 +7045,6 @@
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
- tfailedNodeId, signal->getSendersBlockRef(), reference());
if (signal->getSendersBlockRef() != reference())
{
jam();
--- 1.14/ndb/test/ndbapi/testNodeRestart.cpp 2006-03-17 10:55:00 +01:00
+++ 1.15/ndb/test/ndbapi/testNodeRestart.cpp 2006-03-22 11:44:29 +01:00
@@ -581,6 +581,73 @@
return ret ? NDBT_OK : NDBT_FAILED;
}
+int
+runBug18414(NDBT_Context* ctx, NDBT_Step* step){
+
+ NdbRestarter restarter;
+ if (restarter.getNumDbNodes() < 2)
+ {
+ ctx->stopTest();
+ return NDBT_OK;
+ }
+
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ HugoTransactions hugoTrans(*ctx->getTab());
+ int loop = 0;
+ do
+ {
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
+ goto err;
+
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ int node1 = hugoOps.getTransaction()->getConnectedNodeId();
+ int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+
+ if (node1 == -1 || node2 == -1)
+ break;
+
+ if (loop & 1)
+ {
+ if (restarter.insertErrorInNode(node1, 8050))
+ goto err;
+ }
+
+ if (restarter.insertErrorInNode(node2, 5003))
+ goto err;
+
+ int res= hugoOps.execute_Rollback(pNdb);
+
+ if (restarter.waitNodesNoStart(&node2, 1) != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(0))
+ goto err;
+
+ if (restarter.startNodes(&node2, 1) != 0)
+ goto err;
+
+ if (restarter.waitClusterStarted() != 0)
+ goto err;
+
+ if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
+ goto err;
+
+ hugoOps.closeTransaction(pNdb);
+
+ } while(++loop < 5);
+
+ return NDBT_OK;
+
+err:
+ hugoOps.closeTransaction(pNdb);
+ return NDBT_FAILED;
+}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
@@ -869,6 +936,12 @@
TESTCASE("Bug16772",
"Test bug with restarting before NF handling is complete"){
STEP(runBug16772);
+}
+TESTCASE("Bug18414",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runBug18414);
+ FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
--- 1.12/ndb/test/ndbapi/testTimeout.cpp 2006-03-20 14:49:44 +01:00
+++ 1.13/ndb/test/ndbapi/testTimeout.cpp 2006-03-22 11:44:29 +01:00
@@ -173,8 +173,11 @@
NdbSleep_MilliSleep(sleep);
// Expect that transaction has timed-out
- CHECK(hugoOps.execute_Commit(pNdb) == 237);
-
+ int ret = hugoOps.execute_Commit(pNdb);
+ CHECK(ret != 0);
+ NdbError err = pNdb->getNdbError(ret);
+ CHECK(err.classification == NdbError::TimeoutExpired);
+
} while(false);
hugoOps.closeTransaction(pNdb);
| Thread |
|---|
| • bk commit into 4.1 tree (jonas:1.2475) BUG#18414 | jonas | 22 Mar |