Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2180 05/04/22 09:07:25 joreland@stripped +16 -0
bug#9924 - ndb backup abort handling
Redo abort handling according to descr. in Backup.txt
bug#9960 - ndb backup
increase wait completed timeout to 48 hours
ndb/test/src/NdbBackup.cpp
1.20 05/04/22 09:07:23 joreland@stripped +26 -20
fix error codes
introduce checking of backup resources after each test
ndb/test/run-test/daily-basic-tests.txt
1.17 05/04/22 09:07:23 joreland@stripped +24 -0
Add failure test cases to autotest
ndb/test/ndbapi/testBackup.cpp
1.10 05/04/22 09:07:23 joreland@stripped +7 -7
fix return codes
ndb/src/ndbapi/ndberror.c
1.22 05/04/22 09:07:23 joreland@stripped +3 -2
new error codes
ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp
1.2 05/04/22 09:07:23 joreland@stripped +4 -2
Handle node failures activly
(mainly for backup...)
ndb/src/mgmsrv/MgmtSrvr.hpp
1.25 05/04/22 09:07:22 joreland@stripped +3 -1
Handle node failures activly
(mainly for backup...)
ndb/src/mgmsrv/MgmtSrvr.cpp
1.59 05/04/22 09:07:22 joreland@stripped +11 -18
Handle node failures activly
(mainly for backup...)
ndb/src/mgmapi/mgmapi.cpp
1.26 05/04/22 09:07:22 joreland@stripped +2 -2
bug#9960 - ndb backup
increase wait completed timeout to 48 hours
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
1.17 05/04/22 09:07:22 joreland@stripped +1 -0
Init own version
ndb/src/kernel/blocks/backup/BackupInit.cpp
1.10 05/04/22 09:07:22 joreland@stripped +1 -1
bug#9924 - ndb backup abort handling
Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/backup/Backup.txt
1.2 05/04/22 09:07:22 joreland@stripped +25 -0
bug#9924 - ndb backup abort handling
Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/backup/Backup.hpp
1.7 05/04/22 09:07:22 joreland@stripped +15 -21
bug#9924 - ndb backup abort handling
Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/backup/Backup.cpp
1.14 05/04/22 09:07:22 joreland@stripped +616 -876
bug#9924 - ndb backup abort handling
Redo abort handling according to descr. in Backup.txt
ndb/src/common/debugger/signaldata/BackupImpl.cpp
1.3 05/04/22 09:07:22 joreland@stripped +2 -4
fix printout
ndb/include/kernel/signaldata/BackupSignalData.hpp
1.3 05/04/22 09:07:22 joreland@stripped +3 -0
new error codes
ndb/include/kernel/signaldata/BackupImpl.hpp
1.3 05/04/22 09:07:22 joreland@stripped +7 -5
Add nodeid to reply to be able to fake reply during NF
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: joreland
# Host: eel.hemma.oreland.se.ndb.mysql.com
# Root: /home/jonas/src/mysql-4.1
--- 1.16/ndb/test/run-test/daily-basic-tests.txt Thu Feb 10 18:15:12 2005
+++ 1.17/ndb/test/run-test/daily-basic-tests.txt Fri Apr 22 09:07:23 2005
@@ -4,6 +4,30 @@
max-time: 600
cmd: atrt-testBackup
+args: -n NFMaster T1
+
+max-time: 600
+cmd: atrt-testBackup
+args: -n NFMasterAsSlave T1
+
+max-time: 600
+cmd: atrt-testBackup
+args: -n NFSlave T1
+
+max-time: 600
+cmd: atrt-testBackup
+args: -n FailMaster T1
+
+max-time: 600
+cmd: atrt-testBackup
+args: -n FailMasterAsSlave T1
+
+max-time: 600
+cmd: atrt-testBackup
+args: -n FailSlave T1
+
+max-time: 600
+cmd: atrt-testBackup
args: -n BackupOne T1 T6 T3 I3
# BASIC FUNCTIONALITY
--- 1.2/ndb/include/kernel/signaldata/BackupImpl.hpp Mon Nov 29 16:14:40 2004
+++ 1.3/ndb/include/kernel/signaldata/BackupImpl.hpp Fri Apr 22 09:07:22 2005
@@ -75,7 +75,7 @@
friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
- STATIC_CONST( SignalLength = 3 );
+ STATIC_CONST( SignalLength = 4 );
enum ErrorCode {
Undefined = 1340,
@@ -92,6 +92,7 @@
Uint32 backupId;
Uint32 backupPtr;
Uint32 errorCode;
+ Uint32 nodeId;
};
class DefineBackupConf {
@@ -158,7 +159,7 @@
friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
- STATIC_CONST( SignalLength = 4 );
+ STATIC_CONST( SignalLength = 5 );
enum ErrorCode {
FailedToAllocateTriggerRecord = 1
@@ -168,6 +169,7 @@
Uint32 backupPtr;
Uint32 signalNo;
Uint32 errorCode;
+ Uint32 nodeId;
};
class StartBackupConf {
@@ -232,9 +234,8 @@
private:
Uint32 backupId;
Uint32 backupPtr;
- Uint32 tableId;
- Uint32 fragmentNo;
Uint32 errorCode;
+ Uint32 nodeId;
};
class BackupFragmentConf {
@@ -296,12 +297,13 @@
friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
- STATIC_CONST( SignalLength = 3 );
+ STATIC_CONST( SignalLength = 4 );
private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 errorCode;
+ Uint32 nodeId;
};
class StopBackupConf {
--- 1.2/ndb/include/kernel/signaldata/BackupSignalData.hpp Mon Nov 29 16:14:40 2004
+++ 1.3/ndb/include/kernel/signaldata/BackupSignalData.hpp Fri Apr 22 09:07:22 2005
@@ -240,6 +240,9 @@
FileOrScanError = 1325, // slave -> coordinator
BackupFailureDueToNodeFail = 1326, // slave -> slave
OkToClean = 1327 // master -> slave
+
+ ,AbortScan = 1328
+ ,IncompatibleVersions = 1329
};
private:
Uint32 requestType;
--- 1.2/ndb/src/common/debugger/signaldata/BackupImpl.cpp Thu Jun 17 02:33:55 2004
+++ 1.3/ndb/src/common/debugger/signaldata/BackupImpl.cpp Fri Apr 22 09:07:22 2005
@@ -90,10 +90,8 @@
bool
printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
BackupFragmentRef* sig = (BackupFragmentRef*)data;
- fprintf(out, " backupPtr: %d backupId: %d\n",
- sig->backupPtr, sig->backupId);
- fprintf(out, " tableId: %d fragmentNo: %d errorCode: %d\n",
- sig->tableId, sig->fragmentNo, sig->errorCode);
+ fprintf(out, " backupPtr: %d backupId: %d nodeId: %d errorCode: %d\n",
+ sig->backupPtr, sig->backupId, sig->nodeId, sig->errorCode);
return true;
}
--- 1.13/ndb/src/kernel/blocks/backup/Backup.cpp Wed Feb 2 03:55:36 2005
+++ 1.14/ndb/src/kernel/blocks/backup/Backup.cpp Fri Apr 22 09:07:22 2005
@@ -67,31 +67,6 @@
//#define DEBUG_ABORT
-//---------------------------------------------------------
-// Ignore this since a completed abort could have preceded
-// this message.
-//---------------------------------------------------------
-#define slaveAbortCheck() \
-if ((ptr.p->backupId != backupId) || \
- (ptr.p->slaveState.getState() == ABORTING)) { \
- jam(); \
- return; \
-}
-
-#define masterAbortCheck() \
-if ((ptr.p->backupId != backupId) || \
- (ptr.p->masterData.state.getState() == ABORTING)) { \
- jam(); \
- return; \
-}
-
-#define defineSlaveAbortCheck() \
- if (ptr.p->slaveState.getState() == ABORTING) { \
- jam(); \
- closeFiles(signal, ptr); \
- return; \
- }
-
static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE;
void
@@ -221,12 +196,7 @@
jam();
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, Tdata1);
-
- if (ptr.p->slaveState.getState() == ABORTING) {
- jam();
- closeFiles(signal, ptr);
- return;
- }//if
+
BackupFilePtr filePtr;
ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
FsBuffer & buf = filePtr.p->operation.dataBuffer;
@@ -324,13 +294,7 @@
for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)){
infoEvent("BackupRecord %d: BackupId: %d MasterRef: %x ClientRef: %x",
ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef);
- if(ptr.p->masterRef == reference()){
- infoEvent(" MasterState: %d State: %d",
- ptr.p->masterData.state.getState(),
- ptr.p->slaveState.getState());
- } else {
- infoEvent(" State: %d", ptr.p->slaveState.getState());
- }
+ infoEvent(" State: %d", ptr.p->slaveState.getState());
BackupFilePtr filePtr;
for(ptr.p->files.first(filePtr); filePtr.i != RNIL;
ptr.p->files.next(filePtr)){
@@ -338,7 +302,7 @@
infoEvent(" file %d: type: %d open: %d running: %d done: %d scan: %d",
filePtr.i, filePtr.p->fileType, filePtr.p->fileOpened,
filePtr.p->fileRunning,
- filePtr.p->fileDone, filePtr.p->scanRunning);
+ filePtr.p->fileClosing, filePtr.p->scanRunning);
}
}
}
@@ -356,6 +320,17 @@
infoEvent("PagePool: %d",
c_pagePool.getSize());
+
+ if(signal->getLength() == 2 && signal->theData[1] == 2424)
+ {
+ ndbrequire(c_tablePool.getSize() == c_tablePool.getNoOfFree());
+ ndbrequire(c_attributePool.getSize() == c_attributePool.getNoOfFree());
+ ndbrequire(c_backupPool.getSize() == c_backupPool.getNoOfFree());
+ ndbrequire(c_backupFilePool.getSize() == c_backupFilePool.getNoOfFree());
+ ndbrequire(c_pagePool.getSize() == c_pagePool.getNoOfFree());
+ ndbrequire(c_fragmentPool.getSize() == c_fragmentPool.getNoOfFree());
+ ndbrequire(c_triggerPool.getSize() == c_triggerPool.getNoOfFree());
+ }
}
}
@@ -512,27 +487,6 @@
};
const Backup::State
-Backup::validMasterTransitions[] = {
- INITIAL, DEFINING,
- DEFINING, DEFINED,
- DEFINED, STARTED,
- STARTED, SCANNING,
- SCANNING, STOPPING,
- STOPPING, INITIAL,
-
- DEFINING, ABORTING,
- DEFINED, ABORTING,
- STARTED, ABORTING,
- SCANNING, ABORTING,
- STOPPING, ABORTING,
- ABORTING, ABORTING,
-
- DEFINING, INITIAL,
- ABORTING, INITIAL,
- INITIAL, INITIAL
-};
-
-const Backup::State
Backup::validSlaveTransitions[] = {
INITIAL, DEFINING,
DEFINING, DEFINED,
@@ -561,10 +515,6 @@
Backup::validSlaveTransitionsCount =
sizeof(Backup::validSlaveTransitions) / sizeof(Backup::State);
-const Uint32
-Backup::validMasterTransitionsCount =
-sizeof(Backup::validMasterTransitions) / sizeof(Backup::State);
-
void
Backup::CompoundState::setState(State newState){
bool found = false;
@@ -578,7 +528,8 @@
break;
}
}
- ndbrequire(found);
+
+ //ndbrequire(found);
if (newState == INITIAL)
abortState = INITIAL;
@@ -647,8 +598,7 @@
Uint32 theFailedNodes[NodeBitmask::Size];
for (Uint32 i = 0; i < NodeBitmask::Size; i++)
theFailedNodes[i] = rep->theNodes[i];
-
-// NodeId old_master_node_id = getMasterNodeId();
+
c_masterNodeId = new_master_node_id;
NodePtr nodePtr;
@@ -686,15 +636,24 @@
}
bool
-Backup::verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask)
+Backup::verifyNodesAlive(BackupRecordPtr ptr,
+ const NdbNodeBitmask& aNodeBitMask)
{
+ Uint32 version = getNodeInfo(getOwnNodeId()).m_version;
for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
jam();
if(aNodeBitMask.get(i)) {
if(!c_aliveNodes.get(i)){
jam();
+ ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
return false;
}//if
+ if(getNodeInfo(i).m_version != version)
+ {
+ jam();
+ ptr.p->setErrorCode(AbortBackupOrd::IncompatibleVersions);
+ return false;
+ }
}//if
}//for
return true;
@@ -709,6 +668,10 @@
ndbrequire( ptr.p->nodes.get(newCoord)); /* just to make sure newCoord
* is part of the backup
*/
+
+ NdbNodeBitmask mask;
+ mask.assign(2, theFailedNodes);
+
/* Update ptr.p->nodes to be up to date with current alive nodes
*/
NodePtr nodePtr;
@@ -730,26 +693,42 @@
return; // failed node is not part of backup process, safe to continue
}
- bool doMasterTakeover = false;
- if(NodeBitmask::get(theFailedNodes, refToNode(ptr.p->masterRef))){
- jam();
- doMasterTakeover = true;
- };
-
- if (newCoord == getOwnNodeId()){
- jam();
- if (doMasterTakeover) {
- /**
- * I'm new master
- */
- CRASH_INSERTION((10002));
-#ifdef DEBUG_ABORT
- ndbout_c("**** Master Takeover: Node failed: Master id = %u",
- refToNode(ptr.p->masterRef));
-#endif
- masterTakeOver(signal, ptr);
+ if(mask.get(refToNode(ptr.p->masterRef)))
+ {
+ /**
+ * Master died...abort
+ */
+ ptr.p->masterRef = reference();
+ ptr.p->nodes.clear();
+ ptr.p->nodes.set(getOwnNodeId());
+ ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
+ switch(ptr.p->m_gsn){
+ case GSN_DEFINE_BACKUP_REQ:
+ case GSN_START_BACKUP_REQ:
+ case GSN_BACKUP_FRAGMENT_REQ:
+ case GSN_STOP_BACKUP_REQ:
+ // I'm currently processing...reply to self and abort...
+ ptr.p->masterData.gsn = ptr.p->m_gsn;
+ ptr.p->masterData.sendCounter = ptr.p->nodes;
return;
- }//if
+ case GSN_DEFINE_BACKUP_REF:
+ case GSN_DEFINE_BACKUP_CONF:
+ case GSN_START_BACKUP_REF:
+ case GSN_START_BACKUP_CONF:
+ case GSN_BACKUP_FRAGMENT_REF:
+ case GSN_BACKUP_FRAGMENT_CONF:
+ case GSN_STOP_BACKUP_REF:
+ case GSN_STOP_BACKUP_CONF:
+ ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ;
+ masterAbort(signal, ptr);
+ return;
+ case GSN_ABORT_BACKUP_ORD:
+ // Already aborting
+ return;
+ }
+ }
+ else if (newCoord == getOwnNodeId())
+ {
/**
* I'm master for this backup
*/
@@ -759,62 +738,82 @@
ndbout_c("**** Master: Node failed: Master id = %u",
refToNode(ptr.p->masterRef));
#endif
- masterAbort(signal, ptr, false);
- return;
- }//if
- /**
- * If there's a new master, (it's not me)
- * but remember who it is
- */
- ptr.p->masterRef = calcBackupBlockRef(newCoord);
+ Uint32 gsn, len, pos;
+ ptr.p->nodes.bitANDC(mask);
+ switch(ptr.p->masterData.gsn){
+ case GSN_DEFINE_BACKUP_REQ:
+ {
+ DefineBackupRef * ref = (DefineBackupRef*)signal->getDataPtr();
+ ref->backupPtr = ptr.i;
+ ref->backupId = ptr.p->backupId;
+ ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
+ gsn= GSN_DEFINE_BACKUP_REF;
+ len= DefineBackupRef::SignalLength;
+ pos= &ref->nodeId - signal->getDataPtr();
+ break;
+ }
+ case GSN_START_BACKUP_REQ:
+ {
+ StartBackupRef * ref = (StartBackupRef*)signal->getDataPtr();
+ ref->backupPtr = ptr.i;
+ ref->backupId = ptr.p->backupId;
+ ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
+ ref->signalNo = ptr.p->masterData.startBackup.signalNo;
+ gsn= GSN_START_BACKUP_REF;
+ len= StartBackupRef::SignalLength;
+ pos= &ref->nodeId - signal->getDataPtr();
+ break;
+ }
+ case GSN_BACKUP_FRAGMENT_REQ:
+ {
+ BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr();
+ ref->backupPtr = ptr.i;
+ ref->backupId = ptr.p->backupId;
+ ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
+ gsn= GSN_BACKUP_FRAGMENT_REF;
+ len= BackupFragmentRef::SignalLength;
+ pos= &ref->nodeId - signal->getDataPtr();
+ break;
+ }
+ case GSN_STOP_BACKUP_REQ:
+ {
+ StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr();
+ ref->backupPtr = ptr.i;
+ ref->backupId = ptr.p->backupId;
+ ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
+ gsn= GSN_STOP_BACKUP_REF;
+ len= StopBackupRef::SignalLength;
+ pos= &ref->nodeId - signal->getDataPtr();
+ break;
+ }
+ case GSN_CREATE_TRIG_REQ:
+ case GSN_ALTER_TRIG_REQ:
+ case GSN_WAIT_GCP_REQ:
+ case GSN_UTIL_SEQUENCE_REQ:
+ case GSN_UTIL_LOCK_REQ:
+ case GSN_DROP_TRIG_REQ:
+ return;
+ }
+
+ for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; )
+ {
+ signal->theData[pos] = i;
+ sendSignal(reference(), gsn, signal, len, JBB);
#ifdef DEBUG_ABORT
- ndbout_c("**** Slave: Node failed: Master id = %u",
- refToNode(ptr.p->masterRef));
+ ndbout_c("sending %d to self from %d", gsn, i);
#endif
+ }
+ return;
+ }//if
+
/**
* I abort myself as slave if not master
*/
CRASH_INSERTION((10021));
- // slaveAbort(signal, ptr);
}
void
-Backup::masterTakeOver(Signal* signal, BackupRecordPtr ptr)
-{
- ptr.p->masterRef = reference();
- ptr.p->masterData.gsn = MAX_GSN + 1;
-
- switch(ptr.p->slaveState.getState()){
- case INITIAL:
- jam();
- ptr.p->masterData.state.forceState(INITIAL);
- break;
- case ABORTING:
- jam();
- case DEFINING:
- jam();
- case DEFINED:
- jam();
- case STARTED:
- jam();
- case SCANNING:
- jam();
- ptr.p->masterData.state.forceState(STARTED);
- break;
- case STOPPING:
- jam();
- case CLEANING:
- jam();
- ptr.p->masterData.state.forceState(STOPPING);
- break;
- default:
- ndbrequire(false);
- }
- masterAbort(signal, ptr, false);
-}
-
-void
Backup::execINCL_NODEREQ(Signal* signal)
{
jamEntry();
@@ -895,8 +894,8 @@
ndbrequire(ptr.p->pages.empty());
ndbrequire(ptr.p->tables.isEmpty());
- ptr.p->masterData.state.forceState(INITIAL);
- ptr.p->masterData.state.setState(DEFINING);
+ ptr.p->m_gsn = 0;
+ ptr.p->errorCode = 0;
ptr.p->clientRef = senderRef;
ptr.p->clientData = senderData;
ptr.p->masterRef = reference();
@@ -905,6 +904,7 @@
ptr.p->backupKey[0] = 0;
ptr.p->backupKey[1] = 0;
ptr.p->backupDataLen = 0;
+ ptr.p->masterData.errorCode = 0;
ptr.p->masterData.dropTrig.tableId = RNIL;
ptr.p->masterData.alterTrig.tableId = RNIL;
@@ -928,7 +928,6 @@
ndbrequire(ptr.i == RNIL);
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ);
- ptr.p->masterData.gsn = 0;
sendBackupRef(signal, ptr, BackupRef::SequenceFailure);
}//execUTIL_SEQUENCE_REF()
@@ -938,8 +937,7 @@
{
jam();
sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, errorCode);
- // ptr.p->masterData.state.setState(INITIAL);
- cleanupSlaveResources(ptr);
+ cleanup(signal, ptr);
}
void
@@ -968,7 +966,8 @@
UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr();
- if(conf->requestType == UtilSequenceReq::Create) {
+ if(conf->requestType == UtilSequenceReq::Create)
+ {
jam();
sendSTTORRY(signal); // At startup in NDB
return;
@@ -979,18 +978,20 @@
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ);
- ptr.p->masterData.gsn = 0;
- if (ptr.p->masterData.state.getState() == ABORTING) {
+
+ if (ptr.p->checkError())
+ {
jam();
sendBackupRef(signal, ptr, ptr.p->errorCode);
return;
}//if
- if (ERROR_INSERTED(10023)) {
- ptr.p->masterData.state.setState(ABORTING);
+
+ if (ERROR_INSERTED(10023))
+ {
sendBackupRef(signal, ptr, 323);
return;
}//if
- ndbrequire(ptr.p->masterData.state.getState() == DEFINING);
+
{
Uint64 backupId;
@@ -1018,7 +1019,6 @@
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ);
- ptr.p->masterData.gsn = 0;
ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ;
Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
@@ -1040,14 +1040,13 @@
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ);
- ptr.p->masterData.gsn = 0;
if (ERROR_INSERTED(10031)) {
- ptr.p->masterData.state.setState(ABORTING);
ptr.p->setErrorCode(331);
}//if
- if (ptr.p->masterData.state.getState() == ABORTING) {
+ if (ptr.p->checkError())
+ {
jam();
/**
@@ -1062,13 +1061,11 @@
Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
jam();
mutex2.unlock(); // ignore response
-
+
sendBackupRef(signal, ptr, ptr.p->errorCode);
return;
}//if
- ndbrequire(ptr.p->masterData.state.getState() == DEFINING);
-
sendDefineBackupReq(signal, ptr);
}
@@ -1078,33 +1075,6 @@
*
*****************************************************************************/
-void
-Backup::sendSignalAllWait(BackupRecordPtr ptr, Uint32 gsn, Signal *signal,
- Uint32 signalLength, bool executeDirect)
-{
- jam();
- ptr.p->masterData.gsn = gsn;
- ptr.p->masterData.sendCounter.clearWaitingFor();
- NodePtr node;
- for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)){
- jam();
- const Uint32 nodeId = node.p->nodeId;
- if(node.p->alive && ptr.p->nodes.get(nodeId)){
- jam();
-
- ptr.p->masterData.sendCounter.setWaitingFor(nodeId);
-
- const BlockReference ref = numberToRef(BACKUP, nodeId);
- if (!executeDirect || ref != reference()) {
- sendSignal(ref, gsn, signal, signalLength, JBB);
- }//if
- }//if
- }//for
- if (executeDirect) {
- EXECUTE_DIRECT(BACKUP, gsn, signal, signalLength);
- }
-}
-
bool
Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId)
{
@@ -1114,10 +1084,6 @@
ndbrequire(ptr.p->masterData.sendCounter.isWaitingFor(nodeId));
ptr.p->masterData.sendCounter.clearWaitingFor(nodeId);
-
- if (ptr.p->masterData.sendCounter.done())
- ptr.p->masterData.gsn = 0;
-
return ptr.p->masterData.sendCounter.done();
}
@@ -1138,11 +1104,12 @@
req->nodes = ptr.p->nodes;
req->backupDataLen = ptr.p->backupDataLen;
- ptr.p->masterData.errorCode = 0;
- ptr.p->okToCleanMaster = false; // master must wait with cleaning to last
- sendSignalAllWait(ptr, GSN_DEFINE_BACKUP_REQ, signal,
- DefineBackupReq::SignalLength,
- true /* do execute direct on oneself */);
+ ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ;
+ ptr.p->masterData.sendCounter = ptr.p->nodes;
+ NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
+ sendSignal(rg, GSN_DEFINE_BACKUP_REQ, signal,
+ DefineBackupReq::SignalLength, JBB);
+
/**
* Now send backup data
*/
@@ -1167,17 +1134,15 @@
jamEntry();
DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtr();
-
+
const Uint32 ptrI = ref->backupPtr;
- const Uint32 backupId = ref->backupId;
- const Uint32 nodeId = refToNode(signal->senderBlockRef());
-
+ //const Uint32 backupId = ref->backupId;
+ const Uint32 nodeId = ref->nodeId;
+
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
-
- masterAbortCheck(); // macro will do return if ABORTING
- ptr.p->masterData.errorCode = ref->errorCode;
+ ptr.p->setErrorCode(ref->errorCode);
defineBackupReply(signal, ptr, nodeId);
}
@@ -1188,17 +1153,16 @@
DefineBackupConf* conf = (DefineBackupConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
- const Uint32 backupId = conf->backupId;
+ //const Uint32 backupId = conf->backupId;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- masterAbortCheck(); // macro will do return if ABORTING
-
- if (ERROR_INSERTED(10024)) {
- ptr.p->masterData.errorCode = 324;
- }//if
+ if (ERROR_INSERTED(10024))
+ {
+ ptr.p->setErrorCode(324);
+ }
defineBackupReply(signal, ptr, nodeId);
}
@@ -1210,6 +1174,7 @@
jam();
return;
}
+
/**
* Unlock mutexes
*/
@@ -1223,16 +1188,10 @@
jam();
mutex2.unlock(); // ignore response
- if(ptr.p->errorCode) {
- jam();
- ptr.p->masterData.errorCode = ptr.p->errorCode;
- }
-
- if(ptr.p->masterData.errorCode){
+ if(ptr.p->checkError())
+ {
jam();
- ptr.p->setErrorCode(ptr.p->masterData.errorCode);
- sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean);
- masterSendAbortBackup(signal, ptr);
+ masterAbort(signal, ptr);
return;
}
@@ -1252,7 +1211,6 @@
ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3);
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3+NdbNodeBitmask::Size, JBB);
- ptr.p->masterData.state.setState(DEFINED);
/**
* Prepare Trig
*/
@@ -1286,7 +1244,6 @@
{
CreateTrigReq * req =(CreateTrigReq *)signal->getDataPtrSend();
- ptr.p->errorCode = 0;
ptr.p->masterData.gsn = GSN_CREATE_TRIG_REQ;
ptr.p->masterData.sendCounter = 3;
ptr.p->masterData.createTrig.tableId = tabPtr.p->tableId;
@@ -1395,17 +1352,14 @@
return;
}//if
- ptr.p->masterData.gsn = 0;
+ if (ERROR_INSERTED(10025))
+ {
+ ptr.p->errorCode = 325;
+ }
if(ptr.p->checkError()) {
jam();
- masterAbort(signal, ptr, true);
- return;
- }//if
-
- if (ERROR_INSERTED(10025)) {
- ptr.p->errorCode = 325;
- masterAbort(signal, ptr, true);
+ masterAbort(signal, ptr);
return;
}//if
@@ -1425,10 +1379,7 @@
/**
* Finished with all tables, send StartBackupReq
*/
- ptr.p->masterData.state.setState(STARTED);
-
ptr.p->tables.first(tabPtr);
- ptr.p->errorCode = 0;
ptr.p->masterData.startBackup.signalNo = 0;
ptr.p->masterData.startBackup.noOfSignals =
(ptr.p->tables.noOfElements() + StartBackupReq::MaxTableTriggers - 1) /
@@ -1467,9 +1418,12 @@
}//for
req->noOfTableTriggers = i;
- sendSignalAllWait(ptr, GSN_START_BACKUP_REQ, signal,
- StartBackupReq::HeaderLength +
- (i * StartBackupReq::TableTriggerLength));
+ ptr.p->masterData.gsn = GSN_START_BACKUP_REQ;
+ ptr.p->masterData.sendCounter = ptr.p->nodes;
+ NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
+ sendSignal(rg, GSN_START_BACKUP_REQ, signal,
+ StartBackupReq::HeaderLength +
+ (i * StartBackupReq::TableTriggerLength), JBB);
}
void
@@ -1479,15 +1433,13 @@
StartBackupRef* ref = (StartBackupRef*)signal->getDataPtr();
const Uint32 ptrI = ref->backupPtr;
- const Uint32 backupId = ref->backupId;
+ //const Uint32 backupId = ref->backupId;
const Uint32 signalNo = ref->signalNo;
- const Uint32 nodeId = refToNode(signal->senderBlockRef());
+ const Uint32 nodeId = ref->nodeId;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- masterAbortCheck(); // macro will do return if ABORTING
-
ptr.p->setErrorCode(ref->errorCode);
startBackupReply(signal, ptr, nodeId, signalNo);
}
@@ -1499,15 +1451,13 @@
StartBackupConf* conf = (StartBackupConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
- const Uint32 backupId = conf->backupId;
+ //const Uint32 backupId = conf->backupId;
const Uint32 signalNo = conf->signalNo;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- masterAbortCheck(); // macro will do return if ABORTING
-
startBackupReply(signal, ptr, nodeId, signalNo);
}
@@ -1524,17 +1474,16 @@
return;
}
+ if (ERROR_INSERTED(10026))
+ {
+ ptr.p->errorCode = 326;
+ }
+
if(ptr.p->checkError()){
jam();
- masterAbort(signal, ptr, true);
+ masterAbort(signal, ptr);
return;
}
-
- if (ERROR_INSERTED(10026)) {
- ptr.p->errorCode = 326;
- masterAbort(signal, ptr, true);
- return;
- }//if
TablePtr tabPtr;
c_tablePool.getPtr(tabPtr, ptr.p->masterData.startBackup.tablePtr);
@@ -1566,7 +1515,6 @@
{
AlterTrigReq * req =(AlterTrigReq *)signal->getDataPtrSend();
- ptr.p->errorCode = 0;
ptr.p->masterData.gsn = GSN_ALTER_TRIG_REQ;
ptr.p->masterData.sendCounter = 0;
@@ -1608,6 +1556,7 @@
return;
}//if
ptr.p->masterData.alterTrig.tableId = RNIL;
+
/**
* Finished with all tables
*/
@@ -1669,11 +1618,9 @@
return;
}//if
- ptr.p->masterData.gsn = 0;
-
if(ptr.p->checkError()){
jam();
- masterAbort(signal, ptr, true);
+ masterAbort(signal, ptr);
return;
}//if
@@ -1719,11 +1666,10 @@
ndbrequire(ptr.p->masterRef == reference());
ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ);
- ptr.p->masterData.gsn = 0;
if(ptr.p->checkError()) {
jam();
- masterAbort(signal, ptr, true);
+ masterAbort(signal, ptr);
return;
}//if
@@ -1731,13 +1677,13 @@
jam();
CRASH_INSERTION((10008));
ptr.p->startGCP = gcp;
- ptr.p->masterData.state.setState(SCANNING);
+ ptr.p->masterData.sendCounter= 0;
+ ptr.p->masterData.gsn = GSN_BACKUP_FRAGMENT_REQ;
nextFragment(signal, ptr);
} else {
jam();
CRASH_INSERTION((10009));
ptr.p->stopGCP = gcp;
- ptr.p->masterData.state.setState(STOPPING);
sendDropTrig(signal, ptr); // regular dropping of triggers
}//if
}
@@ -1787,6 +1733,7 @@
req->fragmentNo = i;
req->count = 0;
+ ptr.p->masterData.sendCounter++;
const BlockReference ref = numberToRef(BACKUP, nodeId);
sendSignal(ref, GSN_BACKUP_FRAGMENT_REQ, signal,
BackupFragmentReq::SignalLength, JBB);
@@ -1824,7 +1771,7 @@
BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
- const Uint32 backupId = conf->backupId;
+ //const Uint32 backupId = conf->backupId;
const Uint32 tableId = conf->tableId;
const Uint32 fragmentNo = conf->fragmentNo;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
@@ -1834,10 +1781,9 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- masterAbortCheck(); // macro will do return if ABORTING
-
ptr.p->noOfBytes += noOfBytes;
ptr.p->noOfRecords += noOfRecords;
+ ptr.p->masterData.sendCounter--;
TablePtr tabPtr;
ndbrequire(findTable(ptr, tabPtr, tableId));
@@ -1852,17 +1798,24 @@
fragPtr.p->scanned = 1;
fragPtr.p->scanning = 0;
- if(ptr.p->checkError()) {
- jam();
- masterAbort(signal, ptr, true);
- return;
- }//if
- if (ERROR_INSERTED(10028)) {
+ if (ERROR_INSERTED(10028))
+ {
ptr.p->errorCode = 328;
- masterAbort(signal, ptr, true);
- return;
- }//if
- nextFragment(signal, ptr);
+ }
+
+ if(ptr.p->checkError())
+ {
+ if(ptr.p->masterData.sendCounter.done())
+ {
+ jam();
+ masterAbort(signal, ptr);
+ return;
+ }//if
+ }
+ else
+ {
+ nextFragment(signal, ptr);
+ }
}
void
@@ -1874,15 +1827,52 @@
BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr();
const Uint32 ptrI = ref->backupPtr;
- const Uint32 backupId = ref->backupId;
+ //const Uint32 backupId = ref->backupId;
+ const Uint32 nodeId = ref->nodeId;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- masterAbortCheck(); // macro will do return if ABORTING
+ TablePtr tabPtr;
+ ptr.p->tables.first(tabPtr);
+ for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
+ jam();
+ FragmentPtr fragPtr;
+ Array<Fragment> & frags = tabPtr.p->fragments;
+ const Uint32 fragCount = frags.getSize();
+
+ for(Uint32 i = 0; i<fragCount; i++) {
+ jam();
+ tabPtr.p->fragments.getPtr(fragPtr, i);
+ if(fragPtr.p->scanning != 0 && nodeId == fragPtr.p->node)
+ {
+ jam();
+ ndbrequire(fragPtr.p->scanned == 0);
+ fragPtr.p->scanned = 1;
+ fragPtr.p->scanning = 0;
+ goto done;
+ }
+ }
+ }
+ ndbrequire(false);
+done:
+ ptr.p->masterData.sendCounter--;
ptr.p->setErrorCode(ref->errorCode);
- masterAbort(signal, ptr, true);
+
+ if(ptr.p->masterData.sendCounter.done())
+ {
+ jam();
+ masterAbort(signal, ptr);
+ return;
+ }//if
+
+ AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
+ ord->backupId = ptr.p->backupId;
+ ord->backupPtr = ptr.i;
+ ord->requestType = AbortBackupOrd::LogBufferFull;
+ ord->senderData= ptr.i;
+ execABORT_BACKUP_ORD(signal);
}
/*****************************************************************************
@@ -1910,15 +1900,7 @@
jam();
ptr.p->masterData.dropTrig.tableId = RNIL;
- sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean);
-
- if(ptr.p->masterData.state.getState() == STOPPING) {
- jam();
- sendStopBackup(signal, ptr);
- return;
- }//if
- ndbrequire(ptr.p->masterData.state.getState() == ABORTING);
- masterSendAbortBackup(signal, ptr);
+ sendStopBackup(signal, ptr);
}//if
}
@@ -2010,7 +1992,6 @@
return;
}//if
- ptr.p->masterData.gsn = 0;
sendDropTrig(signal, ptr); // recursive next
}
@@ -2023,14 +2004,23 @@
Backup::execSTOP_BACKUP_REF(Signal* signal)
{
jamEntry();
- ndbrequire(0);
+
+ StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr();
+ const Uint32 ptrI = ref->backupPtr;
+ //const Uint32 backupId = ref->backupId;
+ const Uint32 nodeId = ref->nodeId;
+
+ BackupRecordPtr ptr;
+ c_backupPool.getPtr(ptr, ptrI);
+
+ ptr.p->setErrorCode(ref->errorCode);
+ stopBackupReply(signal, ptr, nodeId);
}
void
Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr)
{
jam();
- ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
StopBackupReq* stop = (StopBackupReq*)signal->getDataPtrSend();
stop->backupPtr = ptr.i;
@@ -2038,8 +2028,11 @@
stop->startGCP = ptr.p->startGCP;
stop->stopGCP = ptr.p->stopGCP;
- sendSignalAllWait(ptr, GSN_STOP_BACKUP_REQ, signal,
- StopBackupReq::SignalLength);
+ ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
+ ptr.p->masterData.sendCounter = ptr.p->nodes;
+ NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
+ sendSignal(rg, GSN_STOP_BACKUP_REQ, signal,
+ StopBackupReq::SignalLength, JBB);
}
void
@@ -2049,14 +2042,12 @@
StopBackupConf* conf = (StopBackupConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
- const Uint32 backupId = conf->backupId;
+ //const Uint32 backupId = conf->backupId;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- masterAbortCheck(); // macro will do return if ABORTING
-
ptr.p->noOfLogBytes += conf->noOfLogBytes;
ptr.p->noOfLogRecords += conf->noOfLogRecords;
@@ -2073,35 +2064,39 @@
return;
}
- // ptr.p->masterData.state.setState(INITIAL);
-
- // send backup complete first to slaves so that they know
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupComplete);
-
- BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend();
- rep->backupId = ptr.p->backupId;
- rep->senderData = ptr.p->clientData;
- rep->startGCP = ptr.p->startGCP;
- rep->stopGCP = ptr.p->stopGCP;
- rep->noOfBytes = ptr.p->noOfBytes;
- rep->noOfRecords = ptr.p->noOfRecords;
- rep->noOfLogBytes = ptr.p->noOfLogBytes;
- rep->noOfLogRecords = ptr.p->noOfLogRecords;
- rep->nodes = ptr.p->nodes;
- sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal,
- BackupCompleteRep::SignalLength, JBB);
-
- signal->theData[0] = EventReport::BackupCompleted;
- signal->theData[1] = ptr.p->clientRef;
- signal->theData[2] = ptr.p->backupId;
- signal->theData[3] = ptr.p->startGCP;
- signal->theData[4] = ptr.p->stopGCP;
- signal->theData[5] = ptr.p->noOfBytes;
- signal->theData[6] = ptr.p->noOfRecords;
- signal->theData[7] = ptr.p->noOfLogBytes;
- signal->theData[8] = ptr.p->noOfLogRecords;
- ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9);
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB);
+
+ if(!ptr.p->checkError())
+ {
+ BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend();
+ rep->backupId = ptr.p->backupId;
+ rep->senderData = ptr.p->clientData;
+ rep->startGCP = ptr.p->startGCP;
+ rep->stopGCP = ptr.p->stopGCP;
+ rep->noOfBytes = ptr.p->noOfBytes;
+ rep->noOfRecords = ptr.p->noOfRecords;
+ rep->noOfLogBytes = ptr.p->noOfLogBytes;
+ rep->noOfLogRecords = ptr.p->noOfLogRecords;
+ rep->nodes = ptr.p->nodes;
+ sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal,
+ BackupCompleteRep::SignalLength, JBB);
+
+ signal->theData[0] = EventReport::BackupCompleted;
+ signal->theData[1] = ptr.p->clientRef;
+ signal->theData[2] = ptr.p->backupId;
+ signal->theData[3] = ptr.p->startGCP;
+ signal->theData[4] = ptr.p->stopGCP;
+ signal->theData[5] = ptr.p->noOfBytes;
+ signal->theData[6] = ptr.p->noOfRecords;
+ signal->theData[7] = ptr.p->noOfLogBytes;
+ signal->theData[8] = ptr.p->noOfLogRecords;
+ ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9);
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB);
+ }
+ else
+ {
+ masterAbort(signal, ptr);
+ }
}
/*****************************************************************************
@@ -2110,199 +2105,96 @@
*
*****************************************************************************/
void
-Backup::masterAbort(Signal* signal, BackupRecordPtr ptr, bool controlledAbort)
+Backup::masterAbort(Signal* signal, BackupRecordPtr ptr)
{
- if(ptr.p->masterData.state.getState() == ABORTING) {
-#ifdef DEBUG_ABORT
- ndbout_c("---- Master already aborting");
-#endif
- jam();
- return;
- }
jam();
#ifdef DEBUG_ABORT
ndbout_c("************ masterAbort");
#endif
-
- sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure);
- if (!ptr.p->checkError())
- ptr.p->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
-
- const State s = ptr.p->masterData.state.getState();
-
- ptr.p->masterData.state.setState(ABORTING);
-
- ndbrequire(s == INITIAL ||
- s == STARTED ||
- s == DEFINING ||
- s == DEFINED ||
- s == SCANNING ||
- s == STOPPING ||
- s == ABORTING);
- if(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ) {
- jam();
- DEBUG_OUT("masterAbort: gsn = GSN_UTIL_SEQUENCE_REQ");
- //-------------------------------------------------------
- // We are waiting for UTIL_SEQUENCE response. We rely on
- // this to arrive and check for ABORTING in response.
- // No slaves are involved at this point and ABORT simply
- // results in BACKUP_REF to client
- //-------------------------------------------------------
- /**
- * Waiting for Sequence Id
- * @see execUTIL_SEQUENCE_CONF
- */
- return;
- }//if
-
- if(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ) {
+ if(ptr.p->masterData.errorCode != 0)
+ {
jam();
- DEBUG_OUT("masterAbort: gsn = GSN_UTIL_LOCK_REQ");
- //-------------------------------------------------------
- // We are waiting for UTIL_LOCK response (mutex). We rely on
- // this to arrive and check for ABORTING in response.
- // No slaves are involved at this point and ABORT simply
- // results in BACKUP_REF to client
- //-------------------------------------------------------
- /**
- * Waiting for lock
- * @see execUTIL_LOCK_CONF
- */
return;
- }//if
-
- /**
- * Unlock mutexes only at master
- */
- jam();
- Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
- jam();
- mutex1.unlock(); // ignore response
-
- jam();
- Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
- jam();
- mutex2.unlock(); // ignore response
-
- if (!controlledAbort) {
- jam();
- if (s == DEFINING) {
- jam();
-//-------------------------------------------------------
-// If we are in the defining phase all work is done by
-// slaves. No triggers have been allocated thus slaves
-// may free all "Master" resources, let them know...
-//-------------------------------------------------------
- sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean);
- return;
- }//if
- if (s == DEFINED) {
- jam();
-//-------------------------------------------------------
-// DEFINED is the state when triggers are created. We rely
-// on that DICT will report create trigger failure in case
-// of node failure. Thus no special action is needed here.
-// We will check for errorCode != 0 when receiving
-// replies on create trigger.
-//-------------------------------------------------------
- return;
- }//if
- if(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ) {
- jam();
- DEBUG_OUT("masterAbort: gsn = GSN_WAIT_GCP_REQ");
-//-------------------------------------------------------
-// We are waiting for WAIT_GCP response. We rely on
-// this to arrive and check for ABORTING in response.
-//-------------------------------------------------------
-
- /**
- * Waiting for GCP
- * @see execWAIT_GCP_CONF
- */
- return;
- }//if
-
- if(ptr.p->masterData.gsn == GSN_ALTER_TRIG_REQ) {
- jam();
- DEBUG_OUT("masterAbort: gsn = GSN_ALTER_TRIG_REQ");
-//-------------------------------------------------------
-// We are waiting for ALTER_TRIG response. We rely on
-// this to arrive and check for ABORTING in response.
-//-------------------------------------------------------
+ }
- /**
- * All triggers haven't been created yet
- */
- return;
- }//if
+ BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend();
+ rep->backupId = ptr.p->backupId;
+ rep->senderData = ptr.p->clientData;
+ rep->reason = ptr.p->errorCode;
+ sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal,
+ BackupAbortRep::SignalLength, JBB);
- if(ptr.p->masterData.gsn == GSN_DROP_TRIG_REQ) {
- jam();
- DEBUG_OUT("masterAbort: gsn = GSN_DROP_TRIG_REQ");
-//-------------------------------------------------------
-// We are waiting for DROP_TRIG response. We rely on
-// this to arrive and will continue dropping triggers
-// until completed.
-//-------------------------------------------------------
+ signal->theData[0] = EventReport::BackupAborted;
+ signal->theData[1] = ptr.p->clientRef;
+ signal->theData[2] = ptr.p->backupId;
+ signal->theData[3] = ptr.p->errorCode;
+ sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
- /**
- * I'm currently dropping the trigger
- */
- return;
- }//if
- }//if
+ ndbrequire(ptr.p->errorCode);
+ ptr.p->masterData.errorCode = ptr.p->errorCode;
-//-------------------------------------------------------
-// If we are waiting for START_BACKUP responses we can
-// safely start dropping triggers (state == STARTED).
-// We will ignore any START_BACKUP responses after this.
-//-------------------------------------------------------
- DEBUG_OUT("masterAbort: sendDropTrig");
- sendDropTrig(signal, ptr); // dropping due to error
+ AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
+ ord->backupId = ptr.p->backupId;
+ ord->backupPtr = ptr.i;
+ ord->senderData= ptr.i;
+ NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
+
+ switch(ptr.p->masterData.gsn){
+ case GSN_DEFINE_BACKUP_REQ:
+ ord->requestType = AbortBackupOrd::BackupFailure;
+ sendSignal(rg, GSN_ABORT_BACKUP_ORD, signal,
+ AbortBackupOrd::SignalLength, JBB);
+ return;
+ case GSN_CREATE_TRIG_REQ:
+ case GSN_START_BACKUP_REQ:
+ case GSN_ALTER_TRIG_REQ:
+ case GSN_WAIT_GCP_REQ:
+ case GSN_BACKUP_FRAGMENT_REQ:
+ jam();
+ ptr.p->stopGCP= ptr.p->startGCP + 1;
+ sendDropTrig(signal, ptr); // dropping due to error
+ return;
+ case GSN_UTIL_SEQUENCE_REQ:
+ case GSN_UTIL_LOCK_REQ:
+ case GSN_DROP_TRIG_REQ:
+ ndbrequire(false);
+ return;
+ case GSN_STOP_BACKUP_REQ:
+ return;
+ }
}
void
-Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr)
+Backup::abort_scan(Signal * signal, BackupRecordPtr ptr)
{
- if (ptr.p->masterData.state.getState() != ABORTING) {
- sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure);
- ptr.p->masterData.state.setState(ABORTING);
- }
- const State s = ptr.p->masterData.state.getAbortState();
-
- /**
- * First inform to client
- */
- if(s == DEFINING) {
- jam();
-#ifdef DEBUG_ABORT
- ndbout_c("** Abort: sending BACKUP_REF to mgmtsrvr");
-#endif
- sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData,
- ptr.p->errorCode);
+ AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
+ ord->backupId = ptr.p->backupId;
+ ord->backupPtr = ptr.i;
+ ord->senderData= ptr.i;
+ ord->requestType = AbortBackupOrd::AbortScan;
- } else {
+ TablePtr tabPtr;
+ ptr.p->tables.first(tabPtr);
+ for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
jam();
-#ifdef DEBUG_ABORT
- ndbout_c("** Abort: sending BACKUP_ABORT_REP to mgmtsrvr");
-#endif
- BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend();
- rep->backupId = ptr.p->backupId;
- rep->senderData = ptr.p->clientData;
- rep->reason = ptr.p->errorCode;
- sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal,
- BackupAbortRep::SignalLength, JBB);
-
- signal->theData[0] = EventReport::BackupAborted;
- signal->theData[1] = ptr.p->clientRef;
- signal->theData[2] = ptr.p->backupId;
- signal->theData[3] = ptr.p->errorCode;
- sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
- }//if
-
- // ptr.p->masterData.state.setState(INITIAL);
-
- sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure);
+ FragmentPtr fragPtr;
+ Array<Fragment> & frags = tabPtr.p->fragments;
+ const Uint32 fragCount = frags.getSize();
+
+ for(Uint32 i = 0; i<fragCount; i++) {
+ jam();
+ tabPtr.p->fragments.getPtr(fragPtr, i);
+ const Uint32 nodeId = fragPtr.p->node;
+ if(fragPtr.p->scanning != 0 && ptr.p->nodes.get(nodeId)) {
+ jam();
+
+ const BlockReference ref = numberToRef(BACKUP, nodeId);
+ sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal,
+ AbortBackupOrd::SignalLength, JBB);
+
+ }
+ }
+ }
}
/*****************************************************************************
@@ -2313,26 +2205,17 @@
void
Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode)
{
- if (ptr.p->slaveState.getState() == ABORTING) {
- jam();
- return;
- }
- ptr.p->slaveState.setState(ABORTING);
-
- if (errCode != 0) {
- jam();
- ptr.p->setErrorCode(errCode);
- }//if
+ ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF;
+ ptr.p->setErrorCode(errCode);
ndbrequire(ptr.p->errorCode != 0);
-
+
DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend();
ref->backupId = ptr.p->backupId;
ref->backupPtr = ptr.i;
ref->errorCode = ptr.p->errorCode;
+ ref->nodeId = getOwnNodeId();
sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal,
DefineBackupRef::SignalLength, JBB);
-
- closeFiles(signal, ptr);
}
void
@@ -2366,6 +2249,7 @@
CRASH_INSERTION((10014));
+ ptr.p->m_gsn = GSN_DEFINE_BACKUP_REQ;
ptr.p->slaveState.forceState(INITIAL);
ptr.p->slaveState.setState(DEFINING);
ptr.p->errorCode = 0;
@@ -2432,7 +2316,7 @@
files[i].p->tableId = RNIL;
files[i].p->backupPtr = ptr.i;
files[i].p->filePointer = RNIL;
- files[i].p->fileDone = 0;
+ files[i].p->fileClosing = 0;
files[i].p->fileOpened = 0;
files[i].p->fileRunning = 0;
files[i].p->scanRunning = 0;
@@ -2468,17 +2352,14 @@
ptr.p->logFilePtr = files[1].i;
ptr.p->dataFilePtr = files[2].i;
- if (!verifyNodesAlive(ptr.p->nodes)) {
+ if (!verifyNodesAlive(ptr, ptr.p->nodes)) {
jam();
defineBackupRef(signal, ptr, DefineBackupRef::Undefined);
- // sendBackupRef(signal, ptr,
- // ptr.p->errorCode?ptr.p->errorCode:BackupRef::Undefined);
return;
}//if
if (ERROR_INSERTED(10027)) {
jam();
defineBackupRef(signal, ptr, 327);
- // sendBackupRef(signal, ptr, 327);
return;
}//if
@@ -2546,8 +2427,6 @@
return;
}//if
- defineSlaveAbortCheck();
-
/**
* All tables fetched
*/
@@ -2679,8 +2558,6 @@
}//if
}//for
- defineSlaveAbortCheck();
-
/**
* Did open succeed for all files
*/
@@ -2810,8 +2687,6 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
- defineSlaveAbortCheck();
-
defineBackupRef(signal, ptr, ref->errorCode);
}
@@ -2833,8 +2708,6 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
- defineSlaveAbortCheck();
-
SegmentedSectionPtr dictTabInfoPtr;
signal->getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO);
ndbrequire(dictTabInfoPtr.sz == len);
@@ -3047,8 +2920,6 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
- defineSlaveAbortCheck();
-
TablePtr tabPtr;
ndbrequire(findTable(ptr, tabPtr, tableId));
@@ -3127,8 +2998,6 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
- defineSlaveAbortCheck();
-
TablePtr tabPtr;
ndbrequire(findTable(ptr, tabPtr, tableId));
@@ -3143,9 +3012,7 @@
void
Backup::getFragmentInfoDone(Signal* signal, BackupRecordPtr ptr)
{
- // Slave must now hold on to master data until
- // AbortBackupOrd::OkToClean signal
- ptr.p->okToCleanMaster = false;
+ ptr.p->m_gsn = GSN_DEFINE_BACKUP_CONF;
ptr.p->slaveState.setState(DEFINED);
DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtr();
conf->backupPtr = ptr.i;
@@ -3169,16 +3036,15 @@
StartBackupReq* req = (StartBackupReq*)signal->getDataPtr();
const Uint32 ptrI = req->backupPtr;
- const Uint32 backupId = req->backupId;
+ //const Uint32 backupId = req->backupId;
const Uint32 signalNo = req->signalNo;
-
+
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
-
- slaveAbortCheck(); // macro will do return if ABORTING
ptr.p->slaveState.setState(STARTED);
-
+ ptr.p->m_gsn = GSN_START_BACKUP_REQ;
+
for(Uint32 i = 0; i<req->noOfTableTriggers; i++) {
jam();
TablePtr tabPtr;
@@ -3191,11 +3057,13 @@
TriggerPtr trigPtr;
if(!ptr.p->triggers.seizeId(trigPtr, triggerId)) {
jam();
+ ptr.p->m_gsn = GSN_START_BACKUP_REF;
StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend();
ref->backupPtr = ptr.i;
ref->backupId = ptr.p->backupId;
ref->signalNo = signalNo;
ref->errorCode = StartBackupRef::FailedToAllocateTriggerRecord;
+ ref->nodeId = getOwnNodeId();
sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal,
StartBackupRef::SignalLength, JBB);
return;
@@ -3233,6 +3101,7 @@
}//if
}//for
+ ptr.p->m_gsn = GSN_START_BACKUP_CONF;
StartBackupConf* conf = (StartBackupConf*)signal->getDataPtrSend();
conf->backupPtr = ptr.i;
conf->backupId = ptr.p->backupId;
@@ -3255,7 +3124,7 @@
CRASH_INSERTION((10016));
const Uint32 ptrI = req->backupPtr;
- const Uint32 backupId = req->backupId;
+ //const Uint32 backupId = req->backupId;
const Uint32 tableId = req->tableId;
const Uint32 fragNo = req->fragmentNo;
const Uint32 count = req->count;
@@ -3266,10 +3135,9 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
- slaveAbortCheck(); // macro will do return if ABORTING
-
ptr.p->slaveState.setState(SCANNING);
-
+ ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REQ;
+
/**
* Get file
*/
@@ -3280,7 +3148,7 @@
ndbrequire(filePtr.p->fileOpened == 1);
ndbrequire(filePtr.p->fileRunning == 1);
ndbrequire(filePtr.p->scanRunning == 0);
- ndbrequire(filePtr.p->fileDone == 0);
+ ndbrequire(filePtr.p->fileClosing == 0);
/**
* Get table
@@ -3350,7 +3218,7 @@
req->transId1 = 0;
req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
req->clientOpPtr= filePtr.i;
- req->batch_size_rows= 16;
+ req->batch_size_rows= parallelism;
req->batch_size_bytes= 0;
sendSignal(DBLQH_REF, GSN_SCAN_FRAGREQ, signal,
ScanFragReq::SignalLength, JBB);
@@ -3572,6 +3440,13 @@
return false;
}
+bool
+Backup::OperationRecord::closeScan()
+{
+ opNoDone = opNoConf = opLen = 0;
+ return true;
+}
+
bool
Backup::OperationRecord::scanConf(Uint32 noOfOps, Uint32 total_len)
{
@@ -3600,11 +3475,9 @@
c_backupFilePool.getPtr(filePtr, filePtrI);
filePtr.p->errorCode = ref->errorCode;
+ filePtr.p->scanRunning = 0;
- BackupRecordPtr ptr;
- c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
-
- abortFile(signal, ptr, filePtr);
+ backupFragmentRef(signal, filePtr);
}
void
@@ -3639,9 +3512,11 @@
{
jam();
- if(filePtr.p->errorCode != 0){
+ if(filePtr.p->errorCode != 0)
+ {
jam();
- abortFileHook(signal, filePtr, true); // Scan completed
+ filePtr.p->scanRunning = 0;
+ backupFragmentRef(signal, filePtr); // Scan completed
return;
}//if
@@ -3669,20 +3544,51 @@
sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
BackupFragmentConf::SignalLength, JBB);
+ ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF;
ptr.p->slaveState.setState(STARTED);
return;
}
+
+void
+Backup::backupFragmentRef(Signal * signal, BackupFilePtr filePtr)
+{
+ BackupRecordPtr ptr;
+ c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
+
+ ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REF;
+
+ BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtrSend();
+ ref->backupId = ptr.p->backupId;
+ ref->backupPtr = ptr.i;
+ ref->nodeId = getOwnNodeId();
+ ref->errorCode = ptr.p->errorCode;
+ sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_REF, signal,
+ BackupFragmentRef::SignalLength, JBB);
+}
void
Backup::checkScan(Signal* signal, BackupFilePtr filePtr)
{
- if(filePtr.p->errorCode != 0){
+ OperationRecord & op = filePtr.p->operation;
+
+ if(filePtr.p->errorCode != 0)
+ {
jam();
- abortFileHook(signal, filePtr, false); // Scan not completed
+
+ /**
+ * Close scan
+ */
+ op.closeScan();
+ ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend();
+ req->senderData = filePtr.i;
+ req->closeFlag = 1;
+ req->transId1 = 0;
+ req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
+ sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
+ ScanFragNextReq::SignalLength, JBB);
return;
}//if
-
- OperationRecord & op = filePtr.p->operation;
+
if(op.newScan()) {
jam();
@@ -3693,8 +3599,28 @@
req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
req->batch_size_rows= 16;
req->batch_size_bytes= 0;
- sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
- ScanFragNextReq::SignalLength, JBB);
+ if(ERROR_INSERTED(10032))
+ sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
+ 100, ScanFragNextReq::SignalLength);
+ else if(ERROR_INSERTED(10033))
+ {
+ SET_ERROR_INSERT_VALUE(10032);
+ sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
+ 10000, ScanFragNextReq::SignalLength);
+
+ BackupRecordPtr ptr;
+ c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
+ AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
+ ord->backupId = ptr.p->backupId;
+ ord->backupPtr = ptr.i;
+ ord->requestType = AbortBackupOrd::FileOrScanError;
+ ord->senderData= ptr.i;
+ sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
+ AbortBackupOrd::SignalLength, JBB);
+ }
+ else
+ sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
+ ScanFragNextReq::SignalLength, JBB);
return;
}//if
@@ -3718,11 +3644,8 @@
filePtr.p->fileRunning = 0;
filePtr.p->errorCode = errCode;
-
- BackupRecordPtr ptr;
- c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
-
- abortFile(signal, ptr, filePtr);
+
+ checkFile(signal, filePtr);
}
void
@@ -3738,12 +3661,6 @@
BackupFilePtr filePtr;
c_backupFilePool.getPtr(filePtr, filePtrI);
-
- if (ERROR_INSERTED(10029)) {
- BackupRecordPtr ptr;
- c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
- abortFile(signal, ptr, filePtr);
- }//if
OperationRecord & op = filePtr.p->operation;
@@ -3761,30 +3678,25 @@
#endif
OperationRecord & op = filePtr.p->operation;
-
+
Uint32 * tmp, sz; bool eof;
- if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) {
+ if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof))
+ {
jam();
- if(filePtr.p->errorCode == 0) {
- jam();
- FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
- req->filePointer = filePtr.p->filePointer;
- req->userPointer = filePtr.i;
- req->userReference = reference();
- req->varIndex = 0;
- req->offset = tmp - c_startOfPages;
- req->size = sz;
-
- sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
- FsAppendReq::SignalLength, JBA);
- return;
- } else {
- jam();
- if (filePtr.p->scanRunning == 1)
- eof = false;
- }//if
- }//if
+ jam();
+ FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
+ req->filePointer = filePtr.p->filePointer;
+ req->userPointer = filePtr.i;
+ req->userReference = reference();
+ req->varIndex = 0;
+ req->offset = tmp - c_startOfPages;
+ req->size = sz;
+
+ sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
+ FsAppendReq::SignalLength, JBA);
+ return;
+ }
if(!eof) {
jam();
@@ -3794,9 +3706,7 @@
return;
}//if
- ndbrequire(filePtr.p->fileDone == 1);
-
- if(sz > 0 && filePtr.p->errorCode == 0) {
+ if(sz > 0) {
jam();
FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
@@ -3812,6 +3722,7 @@
}//if
filePtr.p->fileRunning = 0;
+ filePtr.p->fileClosing = 1;
FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
@@ -3819,64 +3730,11 @@
req->userReference = reference();
req->fileFlag = 0;
#ifdef DEBUG_ABORT
- ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i);
+ ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i);
#endif
sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
}
-void
-Backup::abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr)
-{
- jam();
-
- if(ptr.p->slaveState.getState() != ABORTING) {
- /**
- * Inform master of failure
- */
- jam();
- ptr.p->slaveState.setState(ABORTING);
- ptr.p->setErrorCode(AbortBackupOrd::FileOrScanError);
- sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::FileOrScanError);
- return;
- }//if
-
-
- for(ptr.p->files.first(filePtr);
- filePtr.i!=RNIL;
- ptr.p->files.next(filePtr)){
- jam();
- filePtr.p->errorCode = 1;
- }//for
-
- closeFiles(signal, ptr);
-}
-
-void
-Backup::abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanComplete)
-{
- jam();
-
- if(!scanComplete) {
- jam();
-
- ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend();
- req->senderData = filePtr.i;
- req->closeFlag = 1;
- req->transId1 = 0;
- req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
- sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
- ScanFragNextReq::SignalLength, JBB);
- return;
- }//if
-
- filePtr.p->scanRunning = 0;
-
- BackupRecordPtr ptr;
- c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
-
- filePtr.i = RNIL;
- abortFile(signal, ptr, filePtr);
-}
/****************************************************************************
*
@@ -3953,27 +3811,30 @@
}//if
BackupFormat::LogFile::LogEntry * logEntry = trigPtr.p->logEntry;
- if(logEntry == 0) {
+ if(logEntry == 0)
+ {
jam();
Uint32 * dst;
FsBuffer & buf = trigPtr.p->operation->dataBuffer;
ndbrequire(trigPtr.p->maxRecordSize <= buf.getMaxWrite());
- BackupRecordPtr ptr;
- c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
- if(!buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) {
+ if(ERROR_INSERTED(10030) ||
+ !buf.getWritePtr(&dst, trigPtr.p->maxRecordSize))
+ {
jam();
+ BackupRecordPtr ptr;
+ c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull;
- sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull);
- return;
- }//if
- if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) {
- jam();
- trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull;
- sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull);
+ AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
+ ord->backupId = ptr.p->backupId;
+ ord->backupPtr = ptr.i;
+ ord->requestType = AbortBackupOrd::LogBufferFull;
+ ord->senderData= ptr.i;
+ sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
+ AbortBackupOrd::SignalLength, JBB);
return;
}//if
-
+
logEntry = (BackupFormat::LogFile::LogEntry *)dst;
trigPtr.p->logEntry = logEntry;
logEntry->Length = 0;
@@ -4015,9 +3876,10 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
- if(gci != ptr.p->currGCP) {
+ if(gci != ptr.p->currGCP)
+ {
jam();
-
+
trigPtr.p->logEntry->TriggerEvent = htonl(trigPtr.p->event | 0x10000);
trigPtr.p->logEntry->Data[len] = htonl(gci);
len ++;
@@ -4036,20 +3898,6 @@
}
void
-Backup::sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr,
- Uint32 requestType)
-{
- jam();
- AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
- ord->backupId = ptr.p->backupId;
- ord->backupPtr = ptr.i;
- ord->requestType = requestType;
- ord->senderData= ptr.i;
- sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
- AbortBackupOrd::SignalLength, JBB);
-}
-
-void
Backup::sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr,
Uint32 requestType)
{
@@ -4085,7 +3933,7 @@
CRASH_INSERTION((10020));
const Uint32 ptrI = req->backupPtr;
- const Uint32 backupId = req->backupId;
+ //const Uint32 backupId = req->backupId;
const Uint32 startGCP = req->startGCP;
const Uint32 stopGCP = req->stopGCP;
@@ -4101,7 +3949,7 @@
c_backupPool.getPtr(ptr, ptrI);
ptr.p->slaveState.setState(STOPPING);
- slaveAbortCheck(); // macro will do return if ABORTING
+ ptr.p->m_gsn = GSN_STOP_BACKUP_REQ;
/**
* Insert footers
@@ -4140,12 +3988,6 @@
void
Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
{
- if (ptr.p->closingFiles) {
- jam();
- return;
- }
- ptr.p->closingFiles = true;
-
/**
* Close all files
*/
@@ -4161,12 +4003,12 @@
jam();
openCount++;
- if(filePtr.p->fileDone == 1){
+ if(filePtr.p->fileClosing == 1){
jam();
continue;
}//if
- filePtr.p->fileDone = 1;
+ filePtr.p->fileClosing = 1;
if(filePtr.p->fileRunning == 1){
jam();
@@ -4183,7 +4025,7 @@
req->userReference = reference();
req->fileFlag = 0;
#ifdef DEBUG_ABORT
- ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i);
+ ndbout_c("***** b FSCLOSEREQ filePtr.i = %u", filePtr.i);
#endif
sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, sig,
FsCloseReq::SignalLength, JBA);
@@ -4210,11 +4052,6 @@
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
- /**
- * This should only happen during abort of backup
- */
- ndbrequire(ptr.p->slaveState.getState() == ABORTING);
-
filePtr.p->fileOpened = 1;
FsConf * conf = (FsConf*)signal->getDataPtr();
conf->userPointer = filePtrI;
@@ -4237,7 +4074,7 @@
ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI);
#endif
- ndbrequire(filePtr.p->fileDone == 1);
+ ndbrequire(filePtr.p->fileClosing == 1);
ndbrequire(filePtr.p->fileOpened == 1);
ndbrequire(filePtr.p->fileRunning == 0);
ndbrequire(filePtr.p->scanRunning == 0);
@@ -4265,25 +4102,20 @@
{
jam();
- if(ptr.p->slaveState.getState() == STOPPING) {
- jam();
- BackupFilePtr filePtr;
- ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
-
- StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
- conf->backupId = ptr.p->backupId;
- conf->backupPtr = ptr.i;
- conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
- conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
- sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
- StopBackupConf::SignalLength, JBB);
-
- ptr.p->slaveState.setState(CLEANING);
- return;
- }//if
+ jam();
+ BackupFilePtr filePtr;
+ ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
- ndbrequire(ptr.p->slaveState.getState() == ABORTING);
- removeBackup(signal, ptr);
+ StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
+ conf->backupId = ptr.p->backupId;
+ conf->backupPtr = ptr.i;
+ conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
+ conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
+ sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
+ StopBackupConf::SignalLength, JBB);
+
+ ptr.p->m_gsn = GSN_STOP_BACKUP_CONF;
+ ptr.p->slaveState.setState(CLEANING);
}
/*****************************************************************************
@@ -4291,57 +4123,6 @@
* Slave functionallity: Abort backup
*
*****************************************************************************/
-void
-Backup::removeBackup(Signal* signal, BackupRecordPtr ptr)
-{
- jam();
-
- FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
- req->userReference = reference();
- req->userPointer = ptr.i;
- req->directory = 1;
- req->ownDirectory = 1;
- FsOpenReq::setVersion(req->fileNumber, 2);
- FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
- FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
- FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
- sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
- FsRemoveReq::SignalLength, JBA);
-}
-
-void
-Backup::execFSREMOVEREF(Signal* signal)
-{
- jamEntry();
- ndbrequire(0);
-}
-
-void
-Backup::execFSREMOVECONF(Signal* signal){
- jamEntry();
-
- FsConf * conf = (FsConf*)signal->getDataPtr();
- const Uint32 ptrI = conf->userPointer;
-
- /**
- * Get backup record
- */
- BackupRecordPtr ptr;
- c_backupPool.getPtr(ptr, ptrI);
-
- ndbrequire(ptr.p->slaveState.getState() == ABORTING);
- if (ptr.p->masterRef == reference()) {
- if (ptr.p->masterData.state.getAbortState() == DEFINING) {
- jam();
- sendBackupRef(signal, ptr, ptr.p->errorCode);
- return;
- } else {
- jam();
- }//if
- }//if
- cleanupSlaveResources(ptr);
-}
-
/*****************************************************************************
*
* Slave functionallity: Abort backup
@@ -4394,8 +4175,7 @@
if (c_backupPool.findId(senderData)) {
jam();
c_backupPool.getPtr(ptr, senderData);
- } else { // TODO might be abort sent to not master,
- // or master aborting too early
+ } else {
jam();
#ifdef DEBUG_ABORT
ndbout_c("Backup: abort request type=%u on id=%u,%u not found",
@@ -4405,15 +4185,15 @@
}
}//if
+ ptr.p->m_gsn = GSN_ABORT_BACKUP_ORD;
const bool isCoordinator = (ptr.p->masterRef == reference());
-
+
bool ok = false;
switch(requestType){
/**
* Requests sent to master
*/
-
case AbortBackupOrd::ClientAbort:
jam();
// fall through
@@ -4422,113 +4202,61 @@
// fall through
case AbortBackupOrd::FileOrScanError:
jam();
- if(ptr.p->masterData.state.getState() == ABORTING) {
-#ifdef DEBUG_ABORT
- ndbout_c("---- Already aborting");
-#endif
- jam();
- return;
- }
+ ndbrequire(isCoordinator);
ptr.p->setErrorCode(requestType);
- ndbrequire(isCoordinator); // Sent from slave to coordinator
- masterAbort(signal, ptr, false);
+ if(ptr.p->masterData.gsn == GSN_BACKUP_FRAGMENT_REQ)
+ {
+ /**
+ * Only scans are actively aborted
+ */
+ abort_scan(signal, ptr);
+ }
return;
-
+
/**
- * Info sent to slave
+ * Requests sent to slave
*/
-
- case AbortBackupOrd::OkToClean:
+ case AbortBackupOrd::AbortScan:
jam();
- cleanupMasterResources(ptr);
+ ptr.p->setErrorCode(requestType);
return;
-
- /**
- * Requests sent to slave
- */
-
+
case AbortBackupOrd::BackupComplete:
jam();
- if (ptr.p->slaveState.getState() == CLEANING) { // TODO what if state is
- // not CLEANING?
- jam();
- cleanupSlaveResources(ptr);
- }//if
+ cleanup(signal, ptr);
return;
- break;
- case AbortBackupOrd::BackupFailureDueToNodeFail:
- jam();
- ok = true;
- if (ptr.p->errorCode != 0)
- ptr.p->setErrorCode(requestType);
- break;
case AbortBackupOrd::BackupFailure:
- jam();
- ok = true;
- break;
+ case AbortBackupOrd::BackupFailureDueToNodeFail:
+ case AbortBackupOrd::OkToClean:
+ case AbortBackupOrd::IncompatibleVersions:
+#ifndef VM_TRACE
+ default:
+#endif
+ ptr.p->setErrorCode(requestType);
+ ok= true;
}
ndbrequire(ok);
- /**
- * Slave abort
- */
- slaveAbort(signal, ptr);
-}
-
-void
-Backup::slaveAbort(Signal* signal, BackupRecordPtr ptr)
-{
- if(ptr.p->slaveState.getState() == ABORTING) {
-#ifdef DEBUG_ABORT
- ndbout_c("---- Slave already aborting");
-#endif
- jam();
- return;
+ Uint32 ref= ptr.p->masterRef;
+ ptr.p->masterRef = reference();
+ ptr.p->nodes.clear();
+ ptr.p->nodes.set(getOwnNodeId());
+
+ if(ref == reference())
+ {
+ ptr.p->stopGCP= ptr.p->startGCP + 1;
+ sendDropTrig(signal, ptr);
}
-#ifdef DEBUG_ABORT
- ndbout_c("************* slaveAbort");
-#endif
-
- State slaveState = ptr.p->slaveState.getState();
- ptr.p->slaveState.setState(ABORTING);
- switch(slaveState) {
- case DEFINING:
- jam();
- return;
-//------------------------------------------
-// Will watch for the abort at various places
-// in the defining phase.
-//------------------------------------------
- case ABORTING:
- jam();
- //Fall through
- case DEFINED:
- jam();
- //Fall through
- case STOPPING:
- jam();
+ else
+ {
+ ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
+ ptr.p->masterData.sendCounter.clearWaitingFor();
+ ptr.p->masterData.sendCounter.setWaitingFor(getOwnNodeId());
closeFiles(signal, ptr);
- return;
- case STARTED:
- jam();
- //Fall through
- case SCANNING:
- jam();
- BackupFilePtr filePtr;
- filePtr.i = RNIL;
- abortFile(signal, ptr, filePtr);
- return;
- case CLEANING:
- jam();
- cleanupSlaveResources(ptr);
- return;
- case INITIAL:
- jam();
- ndbrequire(false);
- return;
}
}
+
void
Backup::dumpUsedResources()
{
@@ -4576,12 +4304,8 @@
}
void
-Backup::cleanupMasterResources(BackupRecordPtr ptr)
+Backup::cleanup(Signal* signal, BackupRecordPtr ptr)
{
-#ifdef DEBUG_ABORT
- ndbout_c("******** Cleanup Master Resources *********");
- ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode);
-#endif
TablePtr tabPtr;
for(ptr.p->tables.first(tabPtr); tabPtr.i != RNIL;ptr.p->tables.next(tabPtr))
@@ -4601,20 +4325,6 @@
tabPtr.p->triggerIds[j] = ILLEGAL_TRIGGER_ID;
}//for
}//for
- ptr.p->tables.release();
- ptr.p->triggers.release();
- ptr.p->okToCleanMaster = true;
-
- cleanupFinalResources(ptr);
-}
-
-void
-Backup::cleanupSlaveResources(BackupRecordPtr ptr)
-{
-#ifdef DEBUG_ABORT
- ndbout_c("******** Clean Up Slave Resources*********");
- ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode);
-#endif
BackupFilePtr filePtr;
for(ptr.p->files.first(filePtr);
@@ -4626,35 +4336,65 @@
ndbrequire(filePtr.p->scanRunning == 0);
filePtr.p->pages.release();
}//for
+
ptr.p->files.release();
+ ptr.p->tables.release();
+ ptr.p->triggers.release();
+
+ ptr.p->tables.release();
+ ptr.p->triggers.release();
+ ptr.p->pages.release();
+ ptr.p->backupId = ~0;
+
+ if(ptr.p->checkError())
+ removeBackup(signal, ptr);
+ else
+ c_backups.release(ptr);
+}
+
- cleanupFinalResources(ptr);
+void
+Backup::removeBackup(Signal* signal, BackupRecordPtr ptr)
+{
+ jam();
+
+ FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
+ req->userReference = reference();
+ req->userPointer = ptr.i;
+ req->directory = 1;
+ req->ownDirectory = 1;
+ FsOpenReq::setVersion(req->fileNumber, 2);
+ FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
+ FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
+ FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
+ sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
+ FsRemoveReq::SignalLength, JBA);
}
void
-Backup::cleanupFinalResources(BackupRecordPtr ptr)
+Backup::execFSREMOVEREF(Signal* signal)
{
-#ifdef DEBUG_ABORT
- ndbout_c("******** Clean Up Final Resources*********");
- ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode);
-#endif
+ jamEntry();
+ FsRef * ref = (FsRef*)signal->getDataPtr();
+ const Uint32 ptrI = ref->userPointer;
- // if (!ptr.p->tables.empty() || !ptr.p->files.empty()) {
- if (!ptr.p->okToCleanMaster || !ptr.p->files.empty()) {
- jam();
-#ifdef DEBUG_ABORT
- ndbout_c("******** Waiting to do final cleanup");
-#endif
- return;
- }
- ptr.p->pages.release();
- ptr.p->masterData.state.setState(INITIAL);
- ptr.p->slaveState.setState(INITIAL);
- ptr.p->backupId = 0;
+ FsConf * conf = (FsConf*)signal->getDataPtr();
+ conf->userPointer = ptrI;
+ execFSREMOVECONF(signal);
+}
- ptr.p->closingFiles = false;
- ptr.p->okToCleanMaster = true;
+void
+Backup::execFSREMOVECONF(Signal* signal){
+ jamEntry();
+ FsConf * conf = (FsConf*)signal->getDataPtr();
+ const Uint32 ptrI = conf->userPointer;
+
+ /**
+ * Get backup record
+ */
+ BackupRecordPtr ptr;
+ c_backupPool.getPtr(ptr, ptrI);
c_backups.release(ptr);
- // ndbrequire(false);
}
+
--- 1.6/ndb/src/kernel/blocks/backup/Backup.hpp Thu Dec 9 14:04:26 2004
+++ 1.7/ndb/src/kernel/blocks/backup/Backup.hpp Fri Apr 22 09:07:22 2005
@@ -232,6 +232,7 @@
*/
bool newScan();
bool scanConf(Uint32 noOfOps, Uint32 opLen);
+ bool closeScan();
/**
* Per record
@@ -330,7 +331,7 @@
Uint8 fileOpened;
Uint8 fileRunning;
- Uint8 fileDone;
+ Uint8 fileClosing;
Uint8 scanRunning;
};
typedef Ptr<BackupFile> BackupFilePtr;
@@ -403,13 +404,11 @@
ArrayPool<TriggerRecord> & trp)
: slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
, tables(tp), triggers(trp), files(bp), pages(pp)
- , masterData(b, validMasterTransitions, validMasterTransitionsCount)
- , backup(b)
- {
- closingFiles = false;
- okToCleanMaster = true;
- }
+ , masterData(b), backup(b)
+ {
+ }
+ Uint32 m_gsn;
CompoundState slaveState;
Uint32 clientRef;
@@ -420,9 +419,6 @@
Uint32 errorCode;
NdbNodeBitmask nodes;
- bool okToCleanMaster;
- bool closingFiles;
-
Uint64 noOfBytes;
Uint64 noOfRecords;
Uint64 noOfLogBytes;
@@ -444,15 +440,13 @@
SimpleProperties props;// Used for (un)packing backup request
struct MasterData {
- MasterData(Backup & b, const State valid[], Uint32 count)
- : state(b, valid, count, 0)
- {
- }
+ MasterData(Backup & b)
+ {
+ }
MutexHandle2<BACKUP_DEFINE_MUTEX> m_defineBackupMutex;
MutexHandle2<DICT_COMMIT_TABLE_MUTEX> m_dictCommitTableMutex;
Uint32 gsn;
- CompoundState state;
SignalCounter sendCounter;
Uint32 errorCode;
struct {
@@ -557,7 +551,8 @@
void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0);
-
+ void backupFragmentRef(Signal * signal, BackupFilePtr filePtr);
+
void nextFragment(Signal*, BackupRecordPtr);
void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr);
@@ -578,14 +573,14 @@
void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode);
void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr,
Uint32 errCode);
- void masterAbort(Signal*, BackupRecordPtr ptr, bool controlledAbort);
+ void masterAbort(Signal*, BackupRecordPtr ptr);
void masterSendAbortBackup(Signal*, BackupRecordPtr ptr);
void slaveAbort(Signal*, BackupRecordPtr ptr);
void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr);
void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone);
- bool verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask);
+ bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask);
bool checkAbort(BackupRecordPtr ptr);
void checkNodeFail(Signal* signal,
BackupRecordPtr ptr,
@@ -603,9 +598,8 @@
void sendBackupRef(BlockReference ref, Signal *signal,
Uint32 senderData, Uint32 errorCode);
void dumpUsedResources();
- void cleanupMasterResources(BackupRecordPtr ptr);
- void cleanupSlaveResources(BackupRecordPtr ptr);
- void cleanupFinalResources(BackupRecordPtr ptr);
+ void cleanup(Signal*, BackupRecordPtr ptr);
+ void abort_scan(Signal*, BackupRecordPtr ptr);
void removeBackup(Signal*, BackupRecordPtr ptr);
void sendSTTORRY(Signal*);
--- 1.1/ndb/src/kernel/blocks/backup/Backup.txt Wed Apr 14 10:24:18 2004
+++ 1.2/ndb/src/kernel/blocks/backup/Backup.txt Fri Apr 22 09:07:22 2005
@@ -341,3 +341,28 @@
(ERROR_INSERTED(10022))) {
if (ERROR_INSERTED(10029)) {
if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) {
+
+----- XXX ---
+
+DEFINE_BACKUP_REF ->
+ ABORT_BACKUP_ORD(no reply) when all DEFINE_BACKUP replies has arrived
+
+START_BACKUP_REF
+ ABORT_BACKUP_ORD(no reply) when all START_BACKUP_ replies has arrived
+
+BACKUP_FRAGMENT_REF
+ ABORT_BACKUP_ORD(reply) directly to all nodes running BACKUP_FRAGMENT
+
+ When all nodes has replied BACKUP_FRAGMENT
+ ABORT_BACKUP_ORD(no reply)
+
+STOP_BACKUP_REF
+ ABORT_BACKUP_ORD(no reply) when all STOP_BACKUP_ replies has arrived
+
+NF_COMPLETE_REP
+ slave dies
+ master sends OUTSTANDING_REF to self
+ slave does nothing
+
+ master dies
+ slave elects self as master and sets only itself as participant
--- 1.9/ndb/src/kernel/blocks/backup/BackupInit.cpp Wed Jan 19 09:15:31 2005
+++ 1.10/ndb/src/kernel/blocks/backup/BackupInit.cpp Fri Apr 22 09:07:22 2005
@@ -175,7 +175,7 @@
addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF);
addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ);
- //addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF);
+ addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF);
addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF);
addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ);
--- 1.16/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp Wed Jan 19 09:15:31 2005
+++ 1.17/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp Fri Apr 22 09:07:22 2005
@@ -126,6 +126,7 @@
}
setNodeInfo(getOwnNodeId()).m_connected = true;
+ setNodeInfo(getOwnNodeId()).m_version = ndbGetOwnVersion();
}
Cmvmi::~Cmvmi()
--- 1.25/ndb/src/mgmapi/mgmapi.cpp Wed Feb 16 20:46:28 2005
+++ 1.26/ndb/src/mgmapi/mgmapi.cpp Fri Apr 22 09:07:22 2005
@@ -1565,9 +1565,9 @@
{ // start backup can take some time, set timeout high
Uint64 old_timeout= handle->read_timeout;
if (wait_completed == 2)
- handle->read_timeout= 30*60*1000; // 30 minutes
+ handle->read_timeout= 48*60*60*1000; // 48 hours
else if (wait_completed == 1)
- handle->read_timeout= 5*60*1000; // 5 minutes
+ handle->read_timeout= 10*60*1000; // 10 minutes
reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args);
handle->read_timeout= old_timeout;
}
--- 1.58/ndb/src/mgmsrv/MgmtSrvr.cpp Tue Apr 12 13:56:22 2005
+++ 1.59/ndb/src/mgmsrv/MgmtSrvr.cpp Fri Apr 22 09:07:22 2005
@@ -791,7 +791,7 @@
result = sendSignal(processId, NO_WAIT, signal, true);
}
- if (result == -1) {
+ if (result == -1 && theWaitState != WAIT_NODEFAILURE) {
m_stopRec.inUse = false;
return SEND_OR_RECEIVE_FAILED;
}
@@ -1920,6 +1920,7 @@
#ifdef VM_TRACE
ndbout_c("I'm not master resending to %d", aNodeId);
#endif
+ theWaitNode= aNodeId;
NdbApiSignal aSignal(_ownReference);
BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend());
aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ,
@@ -1947,6 +1948,7 @@
event.Event = BackupEvent::BackupAborted;
event.Aborted.Reason = rep->reason;
event.Aborted.BackupId = rep->backupId;
+ event.Aborted.ErrorCode = rep->reason;
backupCallback(event);
}
break;
@@ -2076,6 +2078,13 @@
handleStopReply(nodeId, 0);
DBUG_VOID_RETURN;
}
+
+ if(theWaitNode == nodeId &&
+ theWaitState != NO_WAIT && theWaitState != WAIT_STOP)
+ {
+ theWaitState = WAIT_NODEFAILURE;
+ NdbCondition_Signal(theMgmtWaitForResponseCondPtr);
+ }
}
eventReport(_ownNodeId, theData);
@@ -2427,7 +2436,7 @@
int result;
if (waitCompleted == 2) {
result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED,
- signal, true, 30*60*1000 /*30 secs*/);
+ signal, true, 48*60*60*1000 /* 48 hours */);
}
else if (waitCompleted == 1) {
result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED,
@@ -2441,22 +2450,6 @@
}
if (waitCompleted){
- switch(m_lastBackupEvent.Event){
- case BackupEvent::BackupCompleted:
- backupId = m_lastBackupEvent.Completed.BackupId;
- break;
- case BackupEvent::BackupStarted:
- backupId = m_lastBackupEvent.Started.BackupId;
- break;
- case BackupEvent::BackupFailedToStart:
- return m_lastBackupEvent.FailedToStart.ErrorCode;
- case BackupEvent::BackupAborted:
- return m_lastBackupEvent.Aborted.ErrorCode;
- default:
- return -1;
- break;
- }
- } else {
switch(m_lastBackupEvent.Event){
case BackupEvent::BackupCompleted:
backupId = m_lastBackupEvent.Completed.BackupId;
--- 1.24/ndb/src/mgmsrv/MgmtSrvr.hpp Tue Apr 12 13:36:40 2005
+++ 1.25/ndb/src/mgmsrv/MgmtSrvr.hpp Fri Apr 22 09:07:22 2005
@@ -611,7 +611,8 @@
WAIT_STOP,
WAIT_BACKUP_STARTED,
WAIT_BACKUP_COMPLETED,
- WAIT_VERSION
+ WAIT_VERSION,
+ WAIT_NODEFAILURE
};
/**
@@ -695,6 +696,7 @@
NdbApiSignal* theSignalIdleList;
// List of unused signals
+ Uint32 theWaitNode;
WaitSignalType theWaitState;
// State denoting a set of signals we accept to recieve.
--- 1.1/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp Wed Apr 14 10:24:24 2004
+++ 1.2/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp Fri Apr 22 09:07:23 2005
@@ -108,6 +108,7 @@
return -1;
}
theWaitState = aWaitState;
+ theWaitNode = aNodeId;
return receiveOptimisedResponse(waitTime);
}
@@ -119,11 +120,12 @@
theFacade->checkForceSend(_blockNumber);
NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
- while (theWaitState != NO_WAIT && waitTime > 0) {
+ while (theWaitState != NO_WAIT && theWaitState != WAIT_NODEFAILURE
+ && waitTime > 0) {
NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr,
theFacade->theMutexPtr,
waitTime);
- if(theWaitState == NO_WAIT)
+ if(theWaitState == NO_WAIT || theWaitState == WAIT_NODEFAILURE)
break;
waitTime = (maxTime - NdbTick_CurrentMillisecond());
}//while
--- 1.9/ndb/test/ndbapi/testBackup.cpp Wed Sep 22 09:27:30 2004
+++ 1.10/ndb/test/ndbapi/testBackup.cpp Fri Apr 22 09:07:23 2005
@@ -74,20 +74,20 @@
if (testMaster) {
if (testSlave) {
- if (backup.NFMasterAsSlave(restarter) == -1){
+ if (backup.NFMasterAsSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
} else {
- if (backup.NFMaster(restarter) == -1){
+ if (backup.NFMaster(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
} else {
- if (backup.NFSlave(restarter) == -1){
+ if (backup.NFSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
-
+
return NDBT_OK;
}
@@ -108,16 +108,16 @@
if (testMaster) {
if (testSlave) {
- if (backup.FailMasterAsSlave(restarter) == -1){
+ if (backup.FailMasterAsSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
} else {
- if (backup.FailMaster(restarter) == -1){
+ if (backup.FailMaster(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
} else {
- if (backup.FailSlave(restarter) == -1){
+ if (backup.FailSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
--- 1.19/ndb/test/src/NdbBackup.cpp Mon Dec 13 00:48:01 2004
+++ 1.20/ndb/test/src/NdbBackup.cpp Fri Apr 22 09:07:23 2005
@@ -245,6 +245,10 @@
int
NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool
onMaster){
{
+ int nNodes = _restarter.getNumDbNodes();
+ if(nNodes == 1)
+ return NDBT_OK;
+
int nodeId = _restarter.getMasterNodeId();
CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
@@ -255,15 +259,11 @@
CHECK(_restarter.startNodes(&nodeId, 1) == 0,
"failed to start node");
-
- NdbSleep_SecSleep(10);
}
-
+
CHECK(_restarter.waitClusterStarted() == 0,
"waitClusterStarted failed");
-
- int nNodes = _restarter.getNumDbNodes();
-
+
myRandom48Init(NdbTick_CurrentMillisecond());
for(int i = 0; i<sz; i++){
@@ -296,6 +296,7 @@
"failed to set error insert");
g_info << "error inserted" << endl;
+ NdbSleep_SecSleep(1);
g_info << "starting backup" << endl;
int r = start(backupId);
@@ -304,6 +305,7 @@
if (r == 0) {
g_err << "Backup should have failed on error_insertion " << error
<< endl
<< "Master = " << masterNodeId << "Node = " << nodeId
<< endl;
+ return NDBT_FAILED;
}
CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
@@ -316,8 +318,6 @@
return NDBT_FAILED;
}
- NdbSleep_SecSleep(1);
-
g_info << "starting new backup" << endl;
CHECK(start(backupId) == 0,
"failed to start backup");
@@ -331,8 +331,14 @@
"waitClusterStarted failed");
g_info << "node started" << endl;
+ int val2[] = { 24, 2424 };
+ CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
+ "failed to check backup resources RestartOnErrorInsert");
+
CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
"failed to set error insert");
+
+ NdbSleep_SecSleep(1);
}
return NDBT_OK;
@@ -340,15 +346,8 @@
int
FailS_codes[] = {
- 10023,
- 10024,
- 10025,
- 10026,
10027,
- 10028,
- 10029,
- 10030,
- 10031
+ 10033
};
int
@@ -359,9 +358,8 @@
10026,
10027,
10028,
- 10029,
- 10030,
- 10031
+ 10031,
+ 10033
};
int
@@ -426,13 +424,21 @@
if (r == 0) {
g_err << "Backup should have failed on error_insertion " << error
<< endl
<< "Master = " << masterNodeId << "Node = " << nodeId
<< endl;
+ return NDBT_FAILED;
}
-
+
CHECK(_restarter.waitClusterStarted() == 0,
"waitClusterStarted failed");
CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
"failed to set error insert");
+
+ NdbSleep_SecSleep(5);
+
+ int val2[] = { 24, 2424 };
+ CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
+ "failed to check backup resources RestartOnErrorInsert");
+
}
return NDBT_OK;
--- 1.21/ndb/src/ndbapi/ndberror.c Fri Dec 17 10:32:23 2004
+++ 1.22/ndb/src/ndbapi/ndberror.c Fri Apr 22 09:07:23 2005
@@ -345,7 +345,7 @@
{ 1325, IE, "File or scan error" },
{ 1326, IE, "Backup abortet due to node failure" },
{ 1327, IE, "1327" },
-
+
{ 1340, IE, "Backup undefined error" },
{ 1342, AE, "Backup failed to allocate buffers (check configuration)" },
{ 1343, AE, "Backup failed to setup fs buffers (check configuration)" },
@@ -355,7 +355,8 @@
{ 1347, AE, "Backup failed to allocate table memory (check configuration)" },
{ 1348, AE, "Backup failed to allocate file record (check configuration)" },
{ 1349, AE, "Backup failed to allocate attribute record (check configuration)" },
-
+ { 1329, AE, "Backup during software upgrade not supported" },
+
/**
* Still uncategorized
*/
| Thread |
|---|
| • bk commit into 4.1 tree (joreland:1.2180) BUG#9960 | jonas.oreland | 22 Apr |