Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-01-11 21:13:17+01:00, jonas@stripped +19 -0
Merge joreland@stripped:/home/bk/mysql-5.1-new-ndb
into perch.ndb.mysql.com:/home/jonas/src/mysql-5.1-new-ndb
MERGE: 1.2343.23.1
storage/ndb/include/kernel/GlobalSignalNumbers.h@stripped, 2007-01-11 21:13:11+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.30.1.1
storage/ndb/include/kernel/NodeInfo.hpp@stripped, 2007-01-11 21:13:11+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.5.1.1
storage/ndb/include/kernel/signaldata/ApiRegSignalData.hpp@stripped, 2007-01-11 21:13:11+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.4.1.1
storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp@stripped, 2007-01-11 21:13:11+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.12.1.1
storage/ndb/include/ndb_version.h.in@stripped, 2007-01-11 21:13:11+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.12.1.1
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp@stripped, 2007-01-11 21:13:11+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.15.1.1
storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp@stripped, 2007-01-11 21:13:12+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.111.1.1
storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp@stripped, 2007-01-11 21:13:12+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.44.1.1
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp@stripped, 2007-01-11 21:13:12+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.23.1.1
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.95.1.1
storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.14.1.2
storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.11.1.1
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.42.1.3
storage/ndb/src/kernel/blocks/suma/Suma.cpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.49.1.1
storage/ndb/src/kernel/blocks/suma/SumaInit.cpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.14.1.1
storage/ndb/src/kernel/vm/GlobalData.hpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.7.1.1
storage/ndb/src/kernel/vm/SimulatedBlock.hpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.27.1.1
storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.35.1.4
storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-01-11 21:13:13+01:00, jonas@stripped +0 -0
Auto merged
MERGE: 1.60.1.1
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: jonas
# Host: perch.ndb.mysql.com
# Root: /home/jonas/src/mysql-5.1-new-ndb/RESYNC
--- 1.13/storage/ndb/include/ndb_version.h.in 2007-01-11 21:13:23 +01:00
+++ 1.14/storage/ndb/include/ndb_version.h.in 2007-01-11 21:13:23 +01:00
@@ -71,5 +71,7 @@
#define NDBD_QMGR_SINGLEUSER_VERSION_5 MAKE_VERSION(5,0,25)
+#define NDBD_NODE_VERSION_REP MAKE_VERSION(6,1,1)
+
#endif
--- 1.61/storage/ndb/test/run-test/daily-basic-tests.txt 2007-01-11 21:13:23 +01:00
+++ 1.62/storage/ndb/test/run-test/daily-basic-tests.txt 2007-01-11 21:13:23 +01:00
@@ -513,6 +513,14 @@
cmd: testNodeRestart
args: -n Bug24717 T1
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug25364 T1
+
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug25554 T1
+
#
# DICT TESTS
max-time: 1500
@@ -763,6 +771,10 @@
max-time: 1500
cmd: testSystemRestart
args: -n Bug24664
+
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug25468 T1
# OLD FLEX
max-time: 500
--- 1.31/storage/ndb/include/kernel/GlobalSignalNumbers.h 2007-01-11 21:13:23 +01:00
+++ 1.32/storage/ndb/include/kernel/GlobalSignalNumbers.h 2007-01-11 21:13:23 +01:00
@@ -182,7 +182,7 @@
#define GSN_CNTR_START_REP 119
/* 120 not unused */
#define GSN_ROUTE_ORD 121
-/* 122 unused */
+#define GSN_NODE_VERSION_REP 122
/* 123 unused */
/* 124 unused */
#define GSN_CHECK_LCP_STOP 125
--- 1.6/storage/ndb/include/kernel/NodeInfo.hpp 2007-01-11 21:13:23 +01:00
+++ 1.7/storage/ndb/include/kernel/NodeInfo.hpp 2007-01-11 21:13:23 +01:00
@@ -89,4 +89,14 @@
return ndbout;
}
+struct NodeVersionInfo
+{
+ STATIC_CONST( DataLength = 6 );
+ struct
+ {
+ Uint32 m_min_version;
+ Uint32 m_max_version;
+ } m_type [3]; // Indexed as NodeInfo::Type
+};
+
#endif
--- 1.5/storage/ndb/include/kernel/signaldata/ApiRegSignalData.hpp 2007-01-11 21:13:23 +01:00
+++ 1.6/storage/ndb/include/kernel/signaldata/ApiRegSignalData.hpp 2007-01-11 21:13:23 +01:00
@@ -79,12 +79,13 @@
friend class ClusterMgr;
public:
- STATIC_CONST( SignalLength = 3 + NodeState::DataLength );
+ STATIC_CONST( SignalLength = 4 + NodeState::DataLength );
private:
Uint32 qmgrRef;
Uint32 version; // Version of NDB node
Uint32 apiHeartbeatFrequency;
+ Uint32 minDbVersion;
NodeState nodeState;
};
--- 1.13/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2007-01-11 21:13:23 +01:00
+++ 1.14/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2007-01-11 21:13:23 +01:00
@@ -67,6 +67,7 @@
// 100-105 TUP and ACC
// 200-240 UTIL
// 300-305 TRIX
+ QmgrErr935 = 935,
NdbfsDumpFileStat = 400,
NdbfsDumpAllFiles = 401,
NdbfsDumpOpenFiles = 402,
--- 1.16/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2007-01-11 21:13:23 +01:00
+++ 1.17/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2007-01-11 21:13:23 +01:00
@@ -636,5 +636,6 @@
,{ GSN_DICT_COMMIT_REQ, "DICT_COMMIT_REQ"}
,{ GSN_ROUTE_ORD, "ROUTE_ORD" }
+ ,{ GSN_NODE_VERSION_REP, "NODE_VERSION_REP" }
};
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
--- 1.112/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp 2007-01-11 21:13:23 +01:00
+++ 1.113/storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp 2007-01-11 21:13:23 +01:00
@@ -10049,9 +10049,20 @@
}
OpSubEventPtr subbPtr;
Uint32 errCode = 0;
+
+ DictLockPtr loopPtr;
+ if (c_dictLockQueue.first(loopPtr) &&
+ loopPtr.p->lt->lockType == DictLockReq::NodeRestartLock)
+ {
+ jam();
+ errCode = 1405;
+ goto busy;
+ }
+
if (!c_opSubEvent.seize(subbPtr)) {
errCode = SubStartRef::Busy;
busy:
+ jam();
SubStartRef * ref = (SubStartRef *)signal->getDataPtrSend();
{ // fix
@@ -10150,6 +10161,7 @@
SubStartRef* ref = (SubStartRef*) signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = subbPtr.p->m_senderData;
+ ref->errorCode = err;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_REF,
signal, SubStartRef::SignalLength2, JBB);
c_opSubEvent.release(subbPtr);
@@ -10212,6 +10224,7 @@
#ifdef EVENT_PH3_DEBUG
ndbout_c("DBDICT(Coordinator) got GSN_SUB_START_CONF = (%d)", subbPtr.i);
#endif
+ subbPtr.p->m_sub_start_conf = *conf;
subbPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef));
completeSubStartReq(signal,subbPtr.i,0);
}
@@ -10251,6 +10264,9 @@
#ifdef EVENT_DEBUG
ndbout_c("SUB_START_CONF");
#endif
+
+ SubStartConf* conf = (SubStartConf*)signal->getDataPtrSend();
+ * conf = subbPtr.p->m_sub_start_conf;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_CONF,
signal, SubStartConf::SignalLength, JBB);
c_opSubEvent.release(subbPtr);
@@ -10372,6 +10388,7 @@
SubStopRef* ref = (SubStopRef*) signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = subbPtr.p->m_senderData;
+ ref->errorCode = err;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_REF,
signal, SubStopRef::SignalLength, JBB);
c_opSubEvent.release(subbPtr);
@@ -10424,6 +10441,7 @@
* Coordinator
*/
ndbrequire(refToBlock(senderRef) == DBDICT);
+ subbPtr.p->m_sub_stop_conf = *conf;
subbPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef));
completeSubStopReq(signal,subbPtr.i,0);
}
@@ -10464,6 +10482,8 @@
#ifdef EVENT_DEBUG
ndbout_c("SUB_STOP_CONF");
#endif
+ SubStopConf* conf = (SubStopConf*)signal->getDataPtrSend();
+ * conf = subbPtr.p->m_sub_stop_conf;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_CONF,
signal, SubStopConf::SignalLength, JBB);
c_opSubEvent.release(subbPtr);
@@ -10712,6 +10732,7 @@
SubRemoveRef* ref = (SubRemoveRef*) signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = subbPtr.p->m_senderData;
+ ref->errorCode = err;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_REMOVE_REF,
signal, SubRemoveRef::SignalLength, JBB);
}
--- 1.45/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp 2007-01-11 21:13:23 +01:00
+++ 1.46/storage/ndb/src/kernel/blocks/dbdict/Dbdict.hpp 2007-01-11 21:13:23 +01:00
@@ -51,6 +51,7 @@
#include <signaldata/DropTrig.hpp>
#include <signaldata/AlterTrig.hpp>
#include <signaldata/DictLock.hpp>
+#include <signaldata/SumaImpl.hpp>
#include "SchemaFile.hpp"
#include <blocks/mutexes.hpp>
#include <SafeCounter.hpp>
@@ -1631,6 +1632,10 @@
Uint32 m_senderRef;
Uint32 m_senderData;
Uint32 m_errorCode;
+ union {
+ SubStartConf m_sub_start_conf;
+ SubStopConf m_sub_stop_conf;
+ };
RequestTracker m_reqTracker;
};
typedef Ptr<OpSubEvent> OpSubEventPtr;
--- 1.24/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2007-01-11 21:13:23 +01:00
+++ 1.25/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2007-01-11 21:13:23 +01:00
@@ -636,6 +636,7 @@
void execTCGETOPSIZECONF(Signal *);
void execTC_CLOPSIZECONF(Signal *);
+ int handle_invalid_lcp_no(const class LcpFragRep*, ReplicaRecordPtr);
void execLCP_FRAG_REP(Signal *);
void execLCP_COMPLETE_REP(Signal *);
void execSTART_LCP_REQ(Signal *);
--- 1.96/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-01-11 21:13:23 +01:00
+++ 1.97/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2007-01-11 21:13:23 +01:00
@@ -3740,7 +3740,6 @@
takeOverPtr.i = takeOverPtrI;
ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
- releaseTakeOver(takeOverPtrI);
if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) &&
(takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) {
jam();
@@ -3754,6 +3753,7 @@
}//if
setAllowNodeStart(takeOverPtr.p->toStartingNode, true);
initTakeOver(takeOverPtr);
+ releaseTakeOver(takeOverPtrI);
}//Dbdih::endTakeOver()
void Dbdih::releaseTakeOver(Uint32 takeOverPtrI)
@@ -4045,6 +4045,11 @@
Uint32 newMasterId = nodeFail->masterNodeId;
const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
+ if (ERROR_INSERTED(7179))
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ }
+
/*-------------------------------------------------------------------------*/
// The first step is to convert from a bit mask to an array of failed nodes.
/*-------------------------------------------------------------------------*/
@@ -4908,6 +4913,7 @@
break;
}
ndbrequire(ok);
+ endTakeOver(takeOverPtr.i);
}//if
}//Dbdih::handleTakeOverNewMaster()
@@ -10255,12 +10261,42 @@
Uint32 fragId = lcpReport->fragId;
jamEntry();
+
+ if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
+ {
+ jam();
+ Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
+ Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
+ if (owng == nodeg)
+ {
+ jam();
+ ndbout_c("throwing away LCP_FRAG_REP from (and killing) %d", nodeId);
+ SET_ERROR_INSERT_VALUE(7179);
+ signal->theData[0] = 9999;
+ sendSignal(numberToRef(CMVMI, nodeId),
+ GSN_NDB_TAMPER, signal, 1, JBA);
+ return;
+ }
+ }
+ if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
+ {
+ jam();
+ Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
+ Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
+ if (owng == nodeg)
+ {
+ jam();
+ ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
+ return;
+ }
+ }
+
CRASH_INSERTION2(7025, isMaster());
CRASH_INSERTION2(7016, !isMaster());
-
+
bool fromTimeQueue = (signal->senderBlockRef() == reference());
-
+
TabRecordPtr tabPtr;
tabPtr.i = tableId;
ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
@@ -10462,6 +10498,37 @@
ndbrequire(false);
}//Dbdih::findReplica()
+
+int
+Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep,
+ ReplicaRecordPtr replicaPtr)
+{
+ ndbrequire(!isMaster());
+ Uint32 lcpNo = rep->lcpNo;
+ Uint32 lcpId = rep->lcpId;
+ Uint32 replicaLcpNo = replicaPtr.p->nextLcp;
+ Uint32 prevReplicaLcpNo = prevLcpNo(replicaLcpNo);
+
+ warningEvent("Detected previous node failure of %d during lcp",
+ rep->nodeId);
+ replicaPtr.p->nextLcp = lcpNo;
+ replicaPtr.p->lcpId[lcpNo] = 0;
+ replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
+
+ for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
+ {
+ jam();
+ if (replicaPtr.p->lcpStatus[i] == ZVALID &&
+ replicaPtr.p->lcpId[i] >= lcpId)
+ {
+ ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
+ ndbrequire(false);
+ }
+ }
+
+ return 0;
+}
+
/**
* Return true if table is all fragment replicas have been checkpointed
* to disk (in all LQHs)
@@ -10490,9 +10557,12 @@
ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
if(lcpNo != replicaPtr.p->nextLcp){
- ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d",
- lcpNo, replicaPtr.p->nextLcp);
- ndbrequire(false);
+ if (handle_invalid_lcp_no(lcpReport, replicaPtr))
+ {
+ ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d",
+ lcpNo, replicaPtr.p->nextLcp);
+ ndbrequire(false);
+ }
}
ndbrequire(lcpNo == replicaPtr.p->nextLcp);
ndbrequire(lcpNo < MAX_LCP_STORED);
--- 1.15/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-01-11 21:13:23 +01:00
+++ 1.16/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp 2007-01-11 21:13:23 +01:00
@@ -445,6 +445,15 @@
StopReq c_stopReq;
bool check_multi_node_shutdown(Signal* signal);
+
+#ifdef ERROR_INSERT
+ Uint32 c_error_insert_extra;
+#endif
+
+ void recompute_version_info(Uint32 type);
+ void recompute_version_info(Uint32 type, Uint32 version);
+ void execNODE_VERSION_REP(Signal* signal);
+ void sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr);
};
#endif
--- 1.12/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2007-01-11 21:13:23 +01:00
+++ 1.13/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp 2007-01-11 21:13:23 +01:00
@@ -36,6 +36,13 @@
setHbApiDelay(hbDBAPI);
c_connectedNodes.set(getOwnNodeId());
c_stopReq.senderRef = 0;
+
+ /**
+ * Check sanity for NodeVersion
+ */
+ ndbrequire((Uint32)NodeInfo::DB == 0);
+ ndbrequire((Uint32)NodeInfo::API == 1);
+ ndbrequire((Uint32)NodeInfo::MGM == 2);
}//Qmgr::initData()
void Qmgr::initRecords()
@@ -106,6 +113,7 @@
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
+ addRecSignal(GSN_NODE_VERSION_REP, &Qmgr::execNODE_VERSION_REP);
initData();
}//Qmgr::Qmgr()
--- 1.43/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-01-11 21:13:23 +01:00
+++ 1.44/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2007-01-11 21:13:23 +01:00
@@ -259,6 +259,9 @@
case 1:
initData(signal);
startphase1(signal);
+ recompute_version_info(NodeInfo::DB);
+ recompute_version_info(NodeInfo::API);
+ recompute_version_info(NodeInfo::MGM);
return;
case 7:
cactivateApiCheck = 1;
@@ -764,6 +767,7 @@
*/
UintR TdynId = ++c_maxDynamicId;
setNodeInfo(addNodePtr.i).m_version = startingVersion;
+ recompute_version_info(NodeInfo::DB, startingVersion);
addNodePtr.p->ndynamicId = TdynId;
/**
@@ -1502,7 +1506,8 @@
replyNodePtr.p->ndynamicId = dynamicId;
replyNodePtr.p->blockRef = signal->getSendersBlockRef();
setNodeInfo(replyNodePtr.i).m_version = version;
-
+ recompute_version_info(NodeInfo::DB, version);
+
if(!c_start.m_nodes.done()){
jam();
return;
@@ -1601,6 +1606,7 @@
}
sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
+ sendApiVersionRep(signal, nodePtr);
/* President have prepared us */
CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
@@ -1613,6 +1619,29 @@
}
void
+Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
+{
+ if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
+ {
+ jam();
+ Uint32 ref = calcQmgrBlockRef(nodePtr.i);
+ for(Uint32 i = 1; i<MAX_NODES; i++)
+ {
+ jam();
+ Uint32 version = getNodeInfo(i).m_version;
+ Uint32 type = getNodeInfo(i).m_type;
+ if (type != NodeInfo::DB && version)
+ {
+ jam();
+ signal->theData[0] = i;
+ signal->theData[1] = version;
+ sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
+ }
+ }
+ }
+}
+
+void
Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
@@ -2400,7 +2429,9 @@
* SECONDS.
*-------------------------------------------------------------------------*/
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
-
+ setNodeInfo(failedNodePtr.i).m_version = 0;
+ recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
+
CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
closeCom->xxxBlockRef = reference();
@@ -2706,7 +2737,6 @@
}
setNodeInfo(apiNodePtr.i).m_version = version;
-
setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0;
ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
@@ -2727,8 +2757,9 @@
apiRegConf->nodeState.dynamicId = -dynamicId;
}
}
+ NodeVersionInfo info = getNodeVersionInfo();
+ apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
-
sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
if (apiNodePtr.p->phase == ZAPI_INACTIVE &&
@@ -2747,6 +2778,33 @@
signal->theData[0] = apiNodePtr.i;
sendSignal(CMVMI_REF, GSN_ENABLE_COMORD, signal, 1, JBA);
+ recompute_version_info(type, version);
+
+ if (info.m_type[NodeInfo::DB].m_min_version >= NDBD_NODE_VERSION_REP)
+ {
+ jam();
+ NodeReceiverGroup rg(QMGR, c_clusterNodes);
+ rg.m_nodes.clear(getOwnNodeId());
+ signal->theData[0] = apiNodePtr.i;
+ signal->theData[1] = version;
+ sendSignal(rg, GSN_NODE_VERSION_REP, signal, 2, JBB);
+ }
+ else
+ {
+ Uint32 i = 0;
+ while((i = c_clusterNodes.find(i + 1)) != NdbNodeBitmask::NotFound)
+ {
+ jam();
+ if (i == getOwnNodeId())
+ continue;
+ if (getNodeInfo(i).m_version >= NDBD_NODE_VERSION_REP)
+ {
+ jam();
+ sendSignal(calcQmgrBlockRef(i), GSN_NODE_VERSION_REP, signal, 2,JBB);
+ }
+ }
+ }
+
signal->theData[0] = apiNodePtr.i;
EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
}
@@ -2782,6 +2840,76 @@
ApiVersionConf::SignalLength, JBB);
}
+void
+Qmgr::execNODE_VERSION_REP(Signal* signal)
+{
+ jamEntry();
+ Uint32 nodeId = signal->theData[0];
+ Uint32 version = signal->theData[1];
+
+ if (nodeId < MAX_NODES)
+ {
+ jam();
+ Uint32 type = getNodeInfo(nodeId).m_type;
+ setNodeInfo(nodeId).m_version = version;
+ recompute_version_info(type, version);
+ }
+}
+
+void
+Qmgr::recompute_version_info(Uint32 type, Uint32 version)
+{
+ NodeVersionInfo& info = setNodeVersionInfo();
+ switch(type){
+ case NodeInfo::DB:
+ case NodeInfo::API:
+ case NodeInfo::MGM:
+ break;
+ default:
+ return;
+ }
+
+ if (info.m_type[type].m_min_version == 0 ||
+ version < info.m_type[type].m_min_version)
+ info.m_type[type].m_min_version = version;
+ if (version > info.m_type[type].m_max_version)
+ info.m_type[type].m_max_version = version;
+}
+
+void
+Qmgr::recompute_version_info(Uint32 type)
+{
+ switch(type){
+ case NodeInfo::DB:
+ case NodeInfo::API:
+ case NodeInfo::MGM:
+ break;
+ default:
+ return;
+ }
+
+ Uint32 min = ~0, max = 0;
+ Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
+ for (Uint32 i = 1; i<cnt; i++)
+ {
+ if (getNodeInfo(i).m_type == type)
+ {
+ Uint32 version = getNodeInfo(i).m_version;
+
+ if (version)
+ {
+ if (version < min)
+ min = version;
+ if (version > max)
+ max = version;
+ }
+ }
+ }
+
+ NodeVersionInfo& info = setNodeVersionInfo();
+ info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
+ info.m_type[type].m_max_version = max;
+}
#if 0
bool
@@ -2921,6 +3049,17 @@
systemErrorLab(signal, __LINE__);
return;
}//if
+
+ if (getNodeState().startLevel < NodeState::SL_STARTED)
+ {
+ jam();
+ CRASH_INSERTION(932);
+ char buf[100];
+ BaseString::snprintf(buf, 100, "Node failure during restart");
+ progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
+ ndbrequire(false);
+ }
+
TnoFailedNodes = cnoFailedNodes;
failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause);
if (cpresident == getOwnNodeId()) {
@@ -3007,6 +3146,16 @@
return;
}//if
+ if (getNodeState().startLevel < NodeState::SL_STARTED)
+ {
+ jam();
+ CRASH_INSERTION(932);
+ char buf[100];
+ BaseString::snprintf(buf, 100, "Node failure during restart");
+ progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
+ ndbrequire(false);
+ }
+
guard0 = cnoPrepFailedNodes - 1;
arrGuard(guard0, MAX_NDB_NODES);
for (Tindex = 0; Tindex <= guard0; Tindex++) {
@@ -3184,6 +3333,18 @@
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
jam();
ptrAss(nodePtr, nodeRec);
+
+#ifdef ERROR_INSERT
+ if (ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
+ {
+ ndbout_c("skipping node %d", c_error_insert_extra);
+ CLEAR_ERROR_INSERT_VALUE;
+ signal->theData[0] = 9999;
+ sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
+ continue;
+ }
+#endif
+
if (nodePtr.p->phase == ZRUNNING) {
jam();
nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
@@ -3254,6 +3415,33 @@
return;
}//Qmgr::execPREP_FAILREF()
+static
+Uint32
+clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt, const Uint16 src[])
+{
+ if (srccnt == 0)
+ return dstcnt;
+
+ Uint32 pos = 0;
+ for (Uint32 i = 0; i<dstcnt; i++)
+ {
+ Uint32 node = dst[i];
+ for (Uint32 j = 0; j<srccnt; j++)
+ {
+ if (node == dst[j])
+ {
+ node = RNIL;
+ break;
+ }
+ }
+ if (node != RNIL)
+ {
+ dst[pos++] = node;
+ }
+ }
+ return pos;
+}
+
/*---------------------------------------------------------------------------*/
/* THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE. */
/*---------------------------------------------------------------------------*/
@@ -3341,19 +3529,18 @@
NodeFailRep::SignalLength, JBB);
}//if
}//for
- if (cpresident != getOwnNodeId()) {
- jam();
- cnoFailedNodes = cnoCommitFailedNodes - cnoFailedNodes;
- if (cnoFailedNodes > 0) {
- jam();
- guard0 = cnoFailedNodes - 1;
- arrGuard(guard0 + cnoCommitFailedNodes, MAX_NDB_NODES);
- for (Tj = 0; Tj <= guard0; Tj++) {
- jam();
- cfailedNodes[Tj] = cfailedNodes[Tj + cnoCommitFailedNodes];
- }//for
- }//if
- }//if
+
+ /**
+ * Remove committed nodes from failed/prepared
+ */
+ cnoFailedNodes = clear_nodes(cnoFailedNodes,
+ cfailedNodes,
+ cnoCommitFailedNodes,
+ ccommitFailedNodes);
+ cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes,
+ cprepFailedNodes,
+ cnoCommitFailedNodes,
+ ccommitFailedNodes);
cnoCommitFailedNodes = 0;
}//if
/**-----------------------------------------------------------------------
@@ -4732,6 +4919,14 @@
default:
;
}//switch
+
+#ifdef ERROR_INSERT
+ if (signal->theData[0] == 935 && signal->getLength() == 2)
+ {
+ SET_ERROR_INSERT_VALUE(935);
+ c_error_insert_extra = signal->theData[1];
+ }
+#endif
}//Qmgr::execDUMP_STATE_ORD()
void Qmgr::execSET_VAR_REQ(Signal* signal)
--- 1.50/storage/ndb/src/kernel/blocks/suma/Suma.cpp 2007-01-11 21:13:23 +01:00
+++ 1.51/storage/ndb/src/kernel/blocks/suma/Suma.cpp 2007-01-11 21:13:23 +01:00
@@ -229,7 +229,6 @@
c_startup.m_wait_handover= false;
c_failedApiNodes.clear();
- c_startup.m_restart_server_node_id = 0; // Server for my NR
ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
conf->senderRef = reference();
@@ -260,6 +259,14 @@
if(startphase == 5)
{
+ if (ERROR_INSERTED(13029)) /* Hold startphase 5 */
+ {
+ sendSignalWithDelay(SUMA_REF, GSN_STTOR, signal,
+ 30, signal->getLength());
+ DBUG_VOID_RETURN;
+ }
+
+ c_startup.m_restart_server_node_id = 0;
getNodeGroupMembers(signal);
if (typeOfStart == NodeState::ST_NODE_RESTART ||
typeOfStart == NodeState::ST_INITIAL_NODE_RESTART)
@@ -372,6 +379,8 @@
infoEvent("Suma: node %d refused %d",
c_startup.m_restart_server_node_id, ref->errorCode);
+
+ c_startup.m_restart_server_node_id++;
send_start_me_req(signal);
}
@@ -886,6 +895,22 @@
ptr->m_buffer_head.m_page_id);
}
}
+
+ if (tCase == 8006)
+ {
+ SET_ERROR_INSERT_VALUE(13029);
+ }
+
+ if (tCase == 8007)
+ {
+ c_startup.m_restart_server_node_id = MAX_NDB_NODES + 1;
+ SET_ERROR_INSERT_VALUE(13029);
+ }
+
+ if (tCase == 8008)
+ {
+ CLEAR_ERROR_INSERT_VALUE;
+ }
}
/*************************************************************
@@ -1091,14 +1116,14 @@
}
} else {
if (c_startup.m_restart_server_node_id &&
- refToNode(subRef) != c_startup.m_restart_server_node_id)
+ subRef != calcSumaBlockRef(c_startup.m_restart_server_node_id))
{
/**
* only allow "restart_server" Suma's to come through
* for restart purposes
*/
jam();
- sendSubStartRef(signal, 1405);
+ sendSubCreateRef(signal, 1415);
DBUG_VOID_RETURN;
}
// Check that id/key is unique
@@ -2231,14 +2256,17 @@
key.m_subscriptionKey = req->subscriptionKey;
if (c_startup.m_restart_server_node_id &&
- refToNode(senderRef) != c_startup.m_restart_server_node_id)
+ senderRef != calcSumaBlockRef(c_startup.m_restart_server_node_id))
{
/**
* only allow "restart_server" Suma's to come through
* for restart purposes
*/
jam();
- sendSubStartRef(signal, 1405);
+ Uint32 err = c_startup.m_restart_server_node_id != RNIL ? 1405 :
+ SubStartRef::NF_FakeErrorREF;
+
+ sendSubStartRef(signal, err);
DBUG_VOID_RETURN;
}
@@ -2453,25 +2481,28 @@
DBUG_VOID_RETURN;
}
- if(!c_subscriptions.find(subPtr, key)){
- jam();
- DBUG_PRINT("error", ("not found"));
- sendSubStopRef(signal, 1407);
- DBUG_VOID_RETURN;
- }
-
if (c_startup.m_restart_server_node_id &&
- refToNode(senderRef) != c_startup.m_restart_server_node_id)
+ senderRef != calcSumaBlockRef(c_startup.m_restart_server_node_id))
{
/**
* only allow "restart_server" Suma's to come through
* for restart purposes
*/
jam();
- sendSubStopRef(signal, 1405);
+ Uint32 err = c_startup.m_restart_server_node_id != RNIL ? 1405 :
+ SubStopRef::NF_FakeErrorREF;
+
+ sendSubStopRef(signal, err);
DBUG_VOID_RETURN;
}
+ if(!c_subscriptions.find(subPtr, key)){
+ jam();
+ DBUG_PRINT("error", ("not found"));
+ sendSubStopRef(signal, 1407);
+ DBUG_VOID_RETURN;
+ }
+
if (subPtr.p->m_state == Subscription::LOCKED) {
jam();
DBUG_PRINT("error", ("locked"));
--- 1.15/storage/ndb/src/kernel/blocks/suma/SumaInit.cpp 2007-01-11 21:13:23 +01:00
+++ 1.16/storage/ndb/src/kernel/blocks/suma/SumaInit.cpp 2007-01-11 21:13:23 +01:00
@@ -121,6 +121,8 @@
addRecSignal(GSN_SUB_GCP_COMPLETE_REP,
&Suma::execSUB_GCP_COMPLETE_REP);
+
+ c_startup.m_restart_server_node_id = RNIL; // Server for my NR
}
Suma::~Suma()
--- 1.8/storage/ndb/src/kernel/vm/GlobalData.hpp 2007-01-11 21:13:23 +01:00
+++ 1.9/storage/ndb/src/kernel/vm/GlobalData.hpp 2007-01-11 21:13:23 +01:00
@@ -35,6 +35,7 @@
struct GlobalData {
Uint32 m_restart_seq; //
+ NodeVersionInfo m_versionInfo;
NodeInfo m_nodeInfo[MAX_NODES];
Signal VMSignals[1]; // Owned by FastScheduler::
--- 1.28/storage/ndb/src/kernel/vm/SimulatedBlock.hpp 2007-01-11 21:13:23 +01:00
+++ 1.29/storage/ndb/src/kernel/vm/SimulatedBlock.hpp 2007-01-11 21:13:23 +01:00
@@ -402,6 +402,9 @@
const NodeInfo & getNodeInfo(NodeId nodeId) const;
NodeInfo & setNodeInfo(NodeId);
+ const NodeVersionInfo& getNodeVersionInfo() const;
+ NodeVersionInfo& setNodeVersionInfo();
+
/**********************
* Xfrm stuff
*/
@@ -706,6 +709,18 @@
SimulatedBlock::getNodeInfo(NodeId nodeId) const {
ndbrequire(nodeId > 0 && nodeId < MAX_NODES);
return globalData.m_nodeInfo[nodeId];
+}
+
+inline
+const NodeVersionInfo &
+SimulatedBlock::getNodeVersionInfo() const {
+ return globalData.m_versionInfo;
+}
+
+inline
+NodeVersionInfo &
+SimulatedBlock::setNodeVersionInfo() {
+ return globalData.m_versionInfo;
}
inline
--- 1.36/storage/ndb/test/ndbapi/testNodeRestart.cpp 2007-01-11 21:13:23 +01:00
+++ 1.37/storage/ndb/test/ndbapi/testNodeRestart.cpp 2007-01-11 21:13:23 +01:00
@@ -931,6 +931,81 @@
return NDBT_OK;
}
+int runBug24717(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+ Ndb* pNdb = GETNDB(step);
+
+ HugoTransactions hugoTrans(*ctx->getTab());
+
+ int dump[] = { 9002, 0 } ;
+ Uint32 ownNode = refToNode(pNdb->getReference());
+ dump[1] = ownNode;
+
+ for (; loops; loops --)
+ {
+ int nodeId = restarter.getRandomNotMasterNodeId(rand());
+ restarter.restartOneDbNode(nodeId, false, true, true);
+ restarter.waitNodesNoStart(&nodeId, 1);
+
+ if (restarter.dumpStateOneNode(nodeId, dump, 2))
+ return NDBT_FAILED;
+
+ restarter.startNodes(&nodeId, 1);
+
+ for (Uint32 i = 0; i < 100; i++)
+ {
+ hugoTrans.pkReadRecords(pNdb, 100, 1, NdbOperation::LM_CommittedRead);
+ }
+
+ restarter.waitClusterStarted();
+ }
+
+ return NDBT_OK;
+}
+
+int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
+ int result = NDBT_OK;
+ NdbRestarter restarter;
+ Ndb* pNdb = GETNDB(step);
+ int loops = ctx->getNumLoops();
+
+ if (restarter.getNumDbNodes() < 4)
+ return NDBT_OK;
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+
+ for (; loops; loops --)
+ {
+ int master = restarter.getMasterNodeId();
+ int victim = restarter.getRandomNodeOtherNodeGroup(master, rand());
+ int second = restarter.getRandomNodeSameNodeGroup(victim, rand());
+
+ int dump[] = { 935, victim } ;
+ if (restarter.dumpStateOneNode(master, dump, 2))
+ return NDBT_FAILED;
+
+ if (restarter.dumpStateOneNode(master, val2, 2))
+ return NDBT_FAILED;
+
+ if (restarter.restartOneDbNode(second, false, true, true))
+ return NDBT_FAILED;
+
+ int nodes[2] = { master, second };
+ if (restarter.waitNodesNoStart(nodes, 2))
+ return NDBT_FAILED;
+
+ restarter.startNodes(nodes, 2);
+
+ if (restarter.waitNodesStarted(nodes, 2))
+ return NDBT_FAILED;
+ }
+
+ return NDBT_OK;
+}
+
int
runBug21271(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
@@ -995,40 +1070,111 @@
}
return NDBT_OK;
}
-int runBug24717(NDBT_Context* ctx, NDBT_Step* step){
+
+int runBug25468(NDBT_Context* ctx, NDBT_Step* step){
+
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
- Ndb* pNdb = GETNDB(step);
- HugoTransactions hugoTrans(*ctx->getTab());
+ for (int i = 0; i<loops; i++)
+ {
+ int master = restarter.getMasterNodeId();
+ int node1, node2;
+ switch(i % 5){
+ case 0:
+ node1 = master;
+ node2 = restarter.getRandomNodeSameNodeGroup(master, rand());
+ break;
+ case 1:
+ node1 = restarter.getRandomNodeSameNodeGroup(master, rand());
+ node2 = master;
+ break;
+ case 2:
+ case 3:
+ case 4:
+ node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
+ if (node1 == -1)
+ node1 = master;
+ node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+ break;
+ }
- int dump[] = { 9000, 0 } ;
- Uint32 ownNode = refToNode(pNdb->getReference());
- dump[1] = ownNode;
+ ndbout_c("node1: %d node2: %d master: %d", node1, node2, master);
- for (; loops; loops --)
- {
- int nodeId = restarter.getRandomNotMasterNodeId(rand());
- restarter.restartOneDbNode(nodeId, false, true, true);
- restarter.waitNodesNoStart(&nodeId, 1);
-
- if (restarter.dumpStateOneNode(nodeId, dump, 2))
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+
+ if (restarter.dumpStateOneNode(node2, val2, 2))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(node1, 7178))
+ return NDBT_FAILED;
+
+ int val1 = 7099;
+ if (restarter.dumpStateOneNode(master, &val1, 1))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&node2, 1))
+ return NDBT_FAILED;
+
+ if (restarter.startAll())
+ return NDBT_FAILED;
+
+ if (restarter.waitClusterStarted())
return NDBT_FAILED;
-
- restarter.startNodes(&nodeId, 1);
-
- for (Uint32 i = 0; i < 100; i++)
- {
- hugoTrans.pkReadRecords(pNdb, 100, 1, NdbOperation::LM_CommittedRead);
- }
-
- int reset[2] = { 9001, 0 };
- restarter.dumpStateOneNode(nodeId, reset, 2);
- restarter.waitClusterStarted();
}
+
+ return NDBT_OK;
+}
+
+int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
+
+ int result = NDBT_OK;
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ NdbRestarter restarter;
+
+ if (restarter.getNumDbNodes() < 4)
+ return NDBT_OK;
+
+ for (int i = 0; i<loops; i++)
+ {
+ int master = restarter.getMasterNodeId();
+ int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
+ restarter.restartOneDbNode(node1, false, true, true);
+
+ int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+ if (restarter.dumpStateOneNode(master, val2, 2))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(master, 7141))
+ return NDBT_FAILED;
+
+ if (restarter.waitNodesNoStart(&node1, 1))
+ return NDBT_FAILED;
+
+ if (restarter.dumpStateOneNode(node1, val2, 2))
+ return NDBT_FAILED;
+
+ if (restarter.insertErrorInNode(node1, 932))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(&node1, 1))
+ return NDBT_FAILED;
+
+ int nodes[] = { master, node1 };
+ if (restarter.waitNodesNoStart(nodes, 2))
+ return NDBT_FAILED;
+
+ if (restarter.startNodes(nodes, 2))
+ return NDBT_FAILED;
+
+ if (restarter.waitClusterStarted())
+ return NDBT_FAILED;
+ }
+
return NDBT_OK;
}
@@ -1358,6 +1504,15 @@
}
TESTCASE("Bug24717", ""){
INITIALIZER(runBug24717);
+}
+TESTCASE("Bug25364", ""){
+ INITIALIZER(runBug25364);
+}
+TESTCASE("Bug25468", ""){
+ INITIALIZER(runBug25468);
+}
+TESTCASE("Bug25554", ""){
+ INITIALIZER(runBug25554);
}
NDBT_TESTSUITE_END(testNodeRestart);
| Thread |
|---|
| • bk commit into 5.1 tree (jonas:1.2386) | jonas | 11 Jan |