Below is the list of changes that have just been committed into a local
5.0 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2046 05/12/13 11:54:42 jonas@stripped +7 -0
Merge perch.ndb.mysql.com:/home/jonas/src/mysql-4.1
into perch.ndb.mysql.com:/home/jonas/src/mysql-5.0
ndb/test/run-test/daily-basic-tests.txt
1.34 05/12/13 11:54:40 jonas@stripped +11 -2
merge
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
1.76 05/12/13 11:54:40 jonas@stripped +7 -6
merge
ndb/src/kernel/blocks/ERROR_codes.txt
1.14 05/12/13 11:54:39 jonas@stripped +2 -2
merge
ndb/test/ndbapi/testNodeRestart.cpp
1.17 05/12/13 11:51:01 jonas@stripped +0 -0
Auto merged
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
1.87 05/12/13 11:51:01 jonas@stripped +0 -0
Auto merged
ndb/src/kernel/blocks/dblqh/Dblqh.hpp
1.37 05/12/13 11:51:01 jonas@stripped +0 -0
Auto merged
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
1.38 05/12/13 11:51:01 jonas@stripped +0 -0
Auto merged
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: perch.ndb.mysql.com
# Root: /home/jonas/src/mysql-5.0/RESYNC
--- 1.33/ndb/test/run-test/daily-basic-tests.txt 2005-07-20 13:22:37 +02:00
+++ 1.34/ndb/test/run-test/daily-basic-tests.txt 2005-12-13 11:54:40 +01:00
@@ -413,6 +413,27 @@
cmd: testScan
args: -n ScanParallelism
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15587 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15632 T1
+
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug15685 T1
+
+# OLD FLEX
+max-time: 500
+cmd: flexBench
+args: -c 25 -t 10
+
+max-time: 500
+cmd: flexHammer
+args: -r 5 -t 32
+
#
# DICT TESTS
max-time: 1500
--- 1.13/ndb/src/kernel/blocks/ERROR_codes.txt 2005-09-15 15:00:31 +02:00
+++ 1.14/ndb/src/kernel/blocks/ERROR_codes.txt 2005-12-13 11:54:39 +01:00
@@ -61,6 +61,8 @@
5007:
Delay GCP_SAVEREQ by 10 secs
+7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
+
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
@@ -155,11 +157,15 @@
5006:
Insert node failure handling when receiving ABORTREQ.
+5042:
+As 5002, but with specified table (see DumpStateOrd)
+
These error code can be combined with error codes for testing time-out
handling in DBTC to ensure that node failures are also well handled in
time-out handling. They can also be used to test multiple node failure
handling.
+
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBLQH
-------------------------------------------------
5011:
@@ -197,6 +203,9 @@
5041: Crash is receiving simple read from other TC on different node
8050: Send TCKEYREF is operation is non local
+
+5100,5101: Drop ABORT req in primary replica
+ Crash on "next" ABORT
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
-------------------------------------------------
--- 1.37/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2005-10-05 16:35:20 +02:00
+++ 1.38/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2005-12-13 11:51:01 +01:00
@@ -215,7 +215,7 @@
signal->theData[2] = c_nodeStartMaster.failNr;
signal->theData[3] = 0;
signal->theData[4] = currentgcp;
- sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
+ sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
}//Dbdih::sendINCL_NODEREQ()
void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
@@ -1863,6 +1863,14 @@
// global checkpoint id and the correct state. We do not wait for any reply
// since the starting node will not send any.
/*-------------------------------------------------------------------------*/
+ Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
+
+ if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+ (getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
+ {
+ c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
+ }
+
sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
}//Dbdih::gcpBlockedLab()
@@ -2065,6 +2073,13 @@
jamEntry();
Uint32 retRef = signal->theData[0];
Uint32 nodeId = signal->theData[1];
+ if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
+ return;
+ }
+
Uint32 tnodeStartFailNr = signal->theData[2];
currentgcp = signal->theData[4];
CRASH_INSERTION(7127);
@@ -2092,6 +2107,15 @@
// id's and the lcp status.
/*-----------------------------------------------------------------------*/
CRASH_INSERTION(7171);
+ Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
+
+ if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
+ (NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
+ {
+ signal->theData[0] = getOwnNodeId();
+ signal->theData[1] = getOwnNodeId();
+ sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
+ }
return;
}//if
if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
@@ -3741,8 +3765,16 @@
/*------------------------------------------------------------------------*/
// Verify that a starting node has also crashed. Reset the node start record.
/*-------------------------------------------------------------------------*/
- if (c_nodeStartMaster.startNode != RNIL) {
- ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
+ if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
+ {
+ BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
+ SystemError * const sysErr = (SystemError*)&signal->theData[0];
+ sysErr->errorCode = SystemError::StartInProgressError;
+ sysErr->errorRef = reference();
+ sysErr->data1= 0;
+ sysErr->data2= __LINE__;
+ sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
+ nodeResetStart();
}//if
/*--------------------------------------------------*/
@@ -5189,15 +5221,16 @@
/**
* For each of replica record
*/
- Uint32 replicaNo = 0;
+ bool found = false;
ReplicaRecordPtr replicaPtr;
for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
- replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) {
+ replicaPtr.i = replicaPtr.p->nextReplica) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
if(replicaPtr.p->procNode == nodeId){
jam();
+ found = true;
noOfRemovedReplicas++;
removeNodeFromStored(nodeId, fragPtr, replicaPtr);
if(replicaPtr.p->lcpOngoingFlag){
@@ -5212,6 +5245,15 @@
replicaPtr.p->lcpOngoingFlag = false;
}
}
+ }
+ if (!found)
+ {
+ jam();
+ /**
+ * Run updateNodeInfo to remove any dead nodes from list of activeNodes
+ * see bug#15587
+ */
+ updateNodeInfo(fragPtr);
}
noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
}
--- 1.36/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2005-10-05 16:35:20 +02:00
+++ 1.37/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2005-12-13 11:51:01 +01:00
@@ -2885,6 +2885,7 @@
UintR ctransidHash[1024];
Uint32 c_diskless;
+ Uint32 c_error_insert_table_id;
public:
/**
--- 1.75/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2005-10-05 16:35:20 +02:00
+++ 1.76/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2005-12-13 11:54:40 +01:00
@@ -3543,6 +3543,7 @@
jam();
regTcPtr->activeCreat = ZTRUE;
CRASH_INSERTION(5002);
+ CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
} else {
regTcPtr->activeCreat = ZFALSE;
}//if
@@ -5880,12 +5881,21 @@
warningReport(signal, 8);
return;
}//if
+
+ TcConnectionrec * const regTcPtr = tcConnectptr.p;
+
+ if (ERROR_INSERTED(5100))
+ {
+ SET_ERROR_INSERT_VALUE(5101);
+ return;
+ }
+ CRASH_INSERTION2(5101, regTcPtr->nextReplica != ZNIL);
+
/* ------------------------------------------------------------------------- */
/*A GUIDING DESIGN PRINCIPLE IN HANDLING THESE ERROR SITUATIONS HAVE BEEN */
/*KEEP IT SIMPLE. THUS WE RATHER INSERT A WAIT AND SET THE ABORT_STATE TO */
/*ACTIVE RATHER THAN WRITE NEW CODE TO HANDLE EVERY SPECIAL SITUATION. */
/* ------------------------------------------------------------------------- */
- TcConnectionrec * const regTcPtr = tcConnectptr.p;
if (regTcPtr->nextReplica != ZNIL) {
/* ------------------------------------------------------------------------- */
// We will immediately send the ABORT message also to the next LQH node in line.
@@ -18521,6 +18531,12 @@
"Shutting down node due to failed handling of GCP_SAVEREQ");
}
+ }
+
+ if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
+ {
+ c_error_insert_table_id = dumpState->args[1];
+ SET_ERROR_INSERT_VALUE(5042);
}
}//Dblqh::execDUMP_STATE_ORD()
--- 1.86/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2005-10-21 06:44:38 +02:00
+++ 1.87/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2005-12-13 11:51:01 +01:00
@@ -6183,7 +6183,6 @@
<< " - place: " << c_apiConTimer_line[apiConnectptr.i]);
switch (apiConnectptr.p->apiConnectstate) {
case CS_STARTED:
- ndbrequire(c_apiConTimer_line[apiConnectptr.i] != 3615);
if(apiConnectptr.p->lqhkeyreqrec == apiConnectptr.p->lqhkeyconfrec){
jam();
/*
@@ -6443,8 +6442,8 @@
warningEvent(buf);
ndbout_c(buf);
ndbrequire(false);
+ releaseAbortResources(signal);
}
- releaseAbortResources(signal);
return;
}//if
TloopCount++;
--- 1.16/ndb/test/ndbapi/testNodeRestart.cpp 2005-05-24 16:58:29 +02:00
+++ 1.17/ndb/test/ndbapi/testNodeRestart.cpp 2005-12-13 11:51:01 +01:00
@@ -21,6 +21,7 @@
#include <NdbRestarter.hpp>
#include <NdbRestarts.hpp>
#include <Vector.hpp>
+#include <signaldata/DumpStateOrd.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
@@ -409,6 +410,132 @@
return NDBT_OK;
}
+int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ Uint32 tableId = ctx->getTab()->getTableId();
+ int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 };
+ dump[1] = tableId;
+
+ int nodeId = restarter.getDbNodeId(1);
+
+ ndbout << "Restart node " << nodeId << endl;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.dumpStateOneNode(nodeId, dump, 2))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
+int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ int nodeId = restarter.getDbNodeId(1);
+
+ ndbout << "Restart node " << nodeId << endl;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(nodeId, 7165))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.restartOneDbNode(nodeId,
+ /** initial */ false,
+ /** nostart */ true,
+ /** abort */ true))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(nodeId, 7171))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&nodeId, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesStarted(&nodeId, 1))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+}
+
+int runBug15685(NDBT_Context* ctx, NDBT_Step* step){
+
+ Ndb* pNdb = GETNDB(step);
+ HugoOperations hugoOps(*ctx->getTab());
+ NdbRestarter restarter;
+
+ HugoTransactions hugoTrans(*ctx->getTab());
+ if (hugoTrans.loadTable(GETNDB(step), 10) != 0){
+ return NDBT_FAILED;
+ }
+
+ if(hugoOps.startTransaction(pNdb) != 0)
+ goto err;
+
+ if(hugoOps.pkUpdateRecord(pNdb, 0, 1, rand()) != 0)
+ goto err;
+
+ if(hugoOps.execute_NoCommit(pNdb) != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(5100))
+ return NDBT_FAILED;
+
+ hugoOps.execute_Rollback(pNdb);
+
+ if (restarter.waitClusterStarted() != 0)
+ goto err;
+
+ if (restarter.insertErrorInAllNodes(0))
+ return NDBT_FAILED;
+
+ ctx->stopTest();
+ return NDBT_OK;
+
+err:
+ ctx->stopTest();
+ return NDBT_FAILED;
+}
+
+
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
@@ -558,6 +685,8 @@
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
@@ -647,6 +776,8 @@
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
+ STEP(runPkUpdateUntilStopped);
+ STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
@@ -669,6 +800,24 @@
"Test commit after node failure"){
INITIALIZER(runLoadTable);
STEP(runLateCommit);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15587",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runScanUpdateUntilStopped);
+ STEP(runBug15587);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15632",
+ "Test bug with NF during NR"){
+ INITIALIZER(runLoadTable);
+ STEP(runBug15632);
+ FINALIZER(runClearTable);
+}
+TESTCASE("Bug15685",
+ "Test bug with NF during abort"){
+ STEP(runBug15685);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
| Thread |
|---|
| • bk commit into 5.0 tree (jonas:1.2046) | jonas | 13 Dec |