From: Date: April 22 2005 9:07am Subject: bk commit into 4.1 tree (joreland:1.2180) BUG#9960 List-Archive: http://lists.mysql.com/internals/24215 X-Bug: 9960 Message-Id: <20050422070731.2A909DE370@eel.hemma.oreland.se.ndb.mysql.com> Below is the list of changes that have just been committed into a local 4.1 repository of jonas. When jonas does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2180 05/04/22 09:07:25 joreland@stripped +16 -0 bug#9924 - ndb backup abort handling Redo abort handling according to descr. in Backup.txt bug#9960 - ndb backup increase wait completed timeout to 48 hours ndb/test/src/NdbBackup.cpp 1.20 05/04/22 09:07:23 joreland@stripped +26 -20 fix error codes introduce checking of backup resources after each test ndb/test/run-test/daily-basic-tests.txt 1.17 05/04/22 09:07:23 joreland@stripped +24 -0 Add failure test cases to autotest ndb/test/ndbapi/testBackup.cpp 1.10 05/04/22 09:07:23 joreland@stripped +7 -7 fix return codes ndb/src/ndbapi/ndberror.c 1.22 05/04/22 09:07:23 joreland@stripped +3 -2 new error codes ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp 1.2 05/04/22 09:07:23 joreland@stripped +4 -2 Handle node failures activly (mainly for backup...) ndb/src/mgmsrv/MgmtSrvr.hpp 1.25 05/04/22 09:07:22 joreland@stripped +3 -1 Handle node failures activly (mainly for backup...) ndb/src/mgmsrv/MgmtSrvr.cpp 1.59 05/04/22 09:07:22 joreland@stripped +11 -18 Handle node failures activly (mainly for backup...) ndb/src/mgmapi/mgmapi.cpp 1.26 05/04/22 09:07:22 joreland@stripped +2 -2 bug#9960 - ndb backup increase wait completed timeout to 48 hours ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp 1.17 05/04/22 09:07:22 joreland@stripped +1 -0 Init own version ndb/src/kernel/blocks/backup/BackupInit.cpp 1.10 05/04/22 09:07:22 joreland@stripped +1 -1 bug#9924 - ndb backup abort handling Redo abort handling according to descr. in Backup.txt ndb/src/kernel/blocks/backup/Backup.txt 1.2 05/04/22 09:07:22 joreland@stripped +25 -0 bug#9924 - ndb backup abort handling Redo abort handling according to descr. in Backup.txt ndb/src/kernel/blocks/backup/Backup.hpp 1.7 05/04/22 09:07:22 joreland@stripped +15 -21 bug#9924 - ndb backup abort handling Redo abort handling according to descr. in Backup.txt ndb/src/kernel/blocks/backup/Backup.cpp 1.14 05/04/22 09:07:22 joreland@stripped +616 -876 bug#9924 - ndb backup abort handling Redo abort handling according to descr. in Backup.txt ndb/src/common/debugger/signaldata/BackupImpl.cpp 1.3 05/04/22 09:07:22 joreland@stripped +2 -4 fix printout ndb/include/kernel/signaldata/BackupSignalData.hpp 1.3 05/04/22 09:07:22 joreland@stripped +3 -0 new error codes ndb/include/kernel/signaldata/BackupImpl.hpp 1.3 05/04/22 09:07:22 joreland@stripped +7 -5 Add nodeid to reply to be able to fake reply during NF # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: joreland # Host: eel.hemma.oreland.se.ndb.mysql.com # Root: /home/jonas/src/mysql-4.1 --- 1.16/ndb/test/run-test/daily-basic-tests.txt Thu Feb 10 18:15:12 2005 +++ 1.17/ndb/test/run-test/daily-basic-tests.txt Fri Apr 22 09:07:23 2005 @@ -4,6 +4,30 @@ max-time: 600 cmd: atrt-testBackup +args: -n NFMaster T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n NFMasterAsSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n NFSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailMaster T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailMasterAsSlave T1 + +max-time: 600 +cmd: atrt-testBackup +args: -n FailSlave T1 + +max-time: 600 +cmd: atrt-testBackup args: -n BackupOne T1 T6 T3 I3 # BASIC FUNCTIONALITY --- 1.2/ndb/include/kernel/signaldata/BackupImpl.hpp Mon Nov 29 16:14:40 2004 +++ 1.3/ndb/include/kernel/signaldata/BackupImpl.hpp Fri Apr 22 09:07:22 2005 @@ -75,7 +75,7 @@ friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 4 ); enum ErrorCode { Undefined = 1340, @@ -92,6 +92,7 @@ Uint32 backupId; Uint32 backupPtr; Uint32 errorCode; + Uint32 nodeId; }; class DefineBackupConf { @@ -158,7 +159,7 @@ friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 4 ); + STATIC_CONST( SignalLength = 5 ); enum ErrorCode { FailedToAllocateTriggerRecord = 1 @@ -168,6 +169,7 @@ Uint32 backupPtr; Uint32 signalNo; Uint32 errorCode; + Uint32 nodeId; }; class StartBackupConf { @@ -232,9 +234,8 @@ private: Uint32 backupId; Uint32 backupPtr; - Uint32 tableId; - Uint32 fragmentNo; Uint32 errorCode; + Uint32 nodeId; }; class BackupFragmentConf { @@ -296,12 +297,13 @@ friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); public: - STATIC_CONST( SignalLength = 3 ); + STATIC_CONST( SignalLength = 4 ); private: Uint32 backupId; Uint32 backupPtr; Uint32 errorCode; + Uint32 nodeId; }; class StopBackupConf { --- 1.2/ndb/include/kernel/signaldata/BackupSignalData.hpp Mon Nov 29 16:14:40 2004 +++ 1.3/ndb/include/kernel/signaldata/BackupSignalData.hpp Fri Apr 22 09:07:22 2005 @@ -240,6 +240,9 @@ FileOrScanError = 1325, // slave -> coordinator BackupFailureDueToNodeFail = 1326, // slave -> slave OkToClean = 1327 // master -> slave + + ,AbortScan = 1328 + ,IncompatibleVersions = 1329 }; private: Uint32 requestType; --- 1.2/ndb/src/common/debugger/signaldata/BackupImpl.cpp Thu Jun 17 02:33:55 2004 +++ 1.3/ndb/src/common/debugger/signaldata/BackupImpl.cpp Fri Apr 22 09:07:22 2005 @@ -90,10 +90,8 @@ bool printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ BackupFragmentRef* sig = (BackupFragmentRef*)data; - fprintf(out, " backupPtr: %d backupId: %d\n", - sig->backupPtr, sig->backupId); - fprintf(out, " tableId: %d fragmentNo: %d errorCode: %d\n", - sig->tableId, sig->fragmentNo, sig->errorCode); + fprintf(out, " backupPtr: %d backupId: %d nodeId: %d errorCode: %d\n", + sig->backupPtr, sig->backupId, sig->nodeId, sig->errorCode); return true; } --- 1.13/ndb/src/kernel/blocks/backup/Backup.cpp Wed Feb 2 03:55:36 2005 +++ 1.14/ndb/src/kernel/blocks/backup/Backup.cpp Fri Apr 22 09:07:22 2005 @@ -67,31 +67,6 @@ //#define DEBUG_ABORT -//--------------------------------------------------------- -// Ignore this since a completed abort could have preceded -// this message. -//--------------------------------------------------------- -#define slaveAbortCheck() \ -if ((ptr.p->backupId != backupId) || \ - (ptr.p->slaveState.getState() == ABORTING)) { \ - jam(); \ - return; \ -} - -#define masterAbortCheck() \ -if ((ptr.p->backupId != backupId) || \ - (ptr.p->masterData.state.getState() == ABORTING)) { \ - jam(); \ - return; \ -} - -#define defineSlaveAbortCheck() \ - if (ptr.p->slaveState.getState() == ABORTING) { \ - jam(); \ - closeFiles(signal, ptr); \ - return; \ - } - static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE; void @@ -221,12 +196,7 @@ jam(); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, Tdata1); - - if (ptr.p->slaveState.getState() == ABORTING) { - jam(); - closeFiles(signal, ptr); - return; - }//if + BackupFilePtr filePtr; ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr); FsBuffer & buf = filePtr.p->operation.dataBuffer; @@ -324,13 +294,7 @@ for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)){ infoEvent("BackupRecord %d: BackupId: %d MasterRef: %x ClientRef: %x", ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef); - if(ptr.p->masterRef == reference()){ - infoEvent(" MasterState: %d State: %d", - ptr.p->masterData.state.getState(), - ptr.p->slaveState.getState()); - } else { - infoEvent(" State: %d", ptr.p->slaveState.getState()); - } + infoEvent(" State: %d", ptr.p->slaveState.getState()); BackupFilePtr filePtr; for(ptr.p->files.first(filePtr); filePtr.i != RNIL; ptr.p->files.next(filePtr)){ @@ -338,7 +302,7 @@ infoEvent(" file %d: type: %d open: %d running: %d done: %d scan: %d", filePtr.i, filePtr.p->fileType, filePtr.p->fileOpened, filePtr.p->fileRunning, - filePtr.p->fileDone, filePtr.p->scanRunning); + filePtr.p->fileClosing, filePtr.p->scanRunning); } } } @@ -356,6 +320,17 @@ infoEvent("PagePool: %d", c_pagePool.getSize()); + + if(signal->getLength() == 2 && signal->theData[1] == 2424) + { + ndbrequire(c_tablePool.getSize() == c_tablePool.getNoOfFree()); + ndbrequire(c_attributePool.getSize() == c_attributePool.getNoOfFree()); + ndbrequire(c_backupPool.getSize() == c_backupPool.getNoOfFree()); + ndbrequire(c_backupFilePool.getSize() == c_backupFilePool.getNoOfFree()); + ndbrequire(c_pagePool.getSize() == c_pagePool.getNoOfFree()); + ndbrequire(c_fragmentPool.getSize() == c_fragmentPool.getNoOfFree()); + ndbrequire(c_triggerPool.getSize() == c_triggerPool.getNoOfFree()); + } } } @@ -512,27 +487,6 @@ }; const Backup::State -Backup::validMasterTransitions[] = { - INITIAL, DEFINING, - DEFINING, DEFINED, - DEFINED, STARTED, - STARTED, SCANNING, - SCANNING, STOPPING, - STOPPING, INITIAL, - - DEFINING, ABORTING, - DEFINED, ABORTING, - STARTED, ABORTING, - SCANNING, ABORTING, - STOPPING, ABORTING, - ABORTING, ABORTING, - - DEFINING, INITIAL, - ABORTING, INITIAL, - INITIAL, INITIAL -}; - -const Backup::State Backup::validSlaveTransitions[] = { INITIAL, DEFINING, DEFINING, DEFINED, @@ -561,10 +515,6 @@ Backup::validSlaveTransitionsCount = sizeof(Backup::validSlaveTransitions) / sizeof(Backup::State); -const Uint32 -Backup::validMasterTransitionsCount = -sizeof(Backup::validMasterTransitions) / sizeof(Backup::State); - void Backup::CompoundState::setState(State newState){ bool found = false; @@ -578,7 +528,8 @@ break; } } - ndbrequire(found); + + //ndbrequire(found); if (newState == INITIAL) abortState = INITIAL; @@ -647,8 +598,7 @@ Uint32 theFailedNodes[NodeBitmask::Size]; for (Uint32 i = 0; i < NodeBitmask::Size; i++) theFailedNodes[i] = rep->theNodes[i]; - -// NodeId old_master_node_id = getMasterNodeId(); + c_masterNodeId = new_master_node_id; NodePtr nodePtr; @@ -686,15 +636,24 @@ } bool -Backup::verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask) +Backup::verifyNodesAlive(BackupRecordPtr ptr, + const NdbNodeBitmask& aNodeBitMask) { + Uint32 version = getNodeInfo(getOwnNodeId()).m_version; for (Uint32 i = 0; i < MAX_NDB_NODES; i++) { jam(); if(aNodeBitMask.get(i)) { if(!c_aliveNodes.get(i)){ jam(); + ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail); return false; }//if + if(getNodeInfo(i).m_version != version) + { + jam(); + ptr.p->setErrorCode(AbortBackupOrd::IncompatibleVersions); + return false; + } }//if }//for return true; @@ -709,6 +668,10 @@ ndbrequire( ptr.p->nodes.get(newCoord)); /* just to make sure newCoord * is part of the backup */ + + NdbNodeBitmask mask; + mask.assign(2, theFailedNodes); + /* Update ptr.p->nodes to be up to date with current alive nodes */ NodePtr nodePtr; @@ -730,26 +693,42 @@ return; // failed node is not part of backup process, safe to continue } - bool doMasterTakeover = false; - if(NodeBitmask::get(theFailedNodes, refToNode(ptr.p->masterRef))){ - jam(); - doMasterTakeover = true; - }; - - if (newCoord == getOwnNodeId()){ - jam(); - if (doMasterTakeover) { - /** - * I'm new master - */ - CRASH_INSERTION((10002)); -#ifdef DEBUG_ABORT - ndbout_c("**** Master Takeover: Node failed: Master id = %u", - refToNode(ptr.p->masterRef)); -#endif - masterTakeOver(signal, ptr); + if(mask.get(refToNode(ptr.p->masterRef))) + { + /** + * Master died...abort + */ + ptr.p->masterRef = reference(); + ptr.p->nodes.clear(); + ptr.p->nodes.set(getOwnNodeId()); + ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail); + switch(ptr.p->m_gsn){ + case GSN_DEFINE_BACKUP_REQ: + case GSN_START_BACKUP_REQ: + case GSN_BACKUP_FRAGMENT_REQ: + case GSN_STOP_BACKUP_REQ: + // I'm currently processing...reply to self and abort... + ptr.p->masterData.gsn = ptr.p->m_gsn; + ptr.p->masterData.sendCounter = ptr.p->nodes; return; - }//if + case GSN_DEFINE_BACKUP_REF: + case GSN_DEFINE_BACKUP_CONF: + case GSN_START_BACKUP_REF: + case GSN_START_BACKUP_CONF: + case GSN_BACKUP_FRAGMENT_REF: + case GSN_BACKUP_FRAGMENT_CONF: + case GSN_STOP_BACKUP_REF: + case GSN_STOP_BACKUP_CONF: + ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ; + masterAbort(signal, ptr); + return; + case GSN_ABORT_BACKUP_ORD: + // Already aborting + return; + } + } + else if (newCoord == getOwnNodeId()) + { /** * I'm master for this backup */ @@ -759,62 +738,82 @@ ndbout_c("**** Master: Node failed: Master id = %u", refToNode(ptr.p->masterRef)); #endif - masterAbort(signal, ptr, false); - return; - }//if - /** - * If there's a new master, (it's not me) - * but remember who it is - */ - ptr.p->masterRef = calcBackupBlockRef(newCoord); + Uint32 gsn, len, pos; + ptr.p->nodes.bitANDC(mask); + switch(ptr.p->masterData.gsn){ + case GSN_DEFINE_BACKUP_REQ: + { + DefineBackupRef * ref = (DefineBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_DEFINE_BACKUP_REF; + len= DefineBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_START_BACKUP_REQ: + { + StartBackupRef * ref = (StartBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + ref->signalNo = ptr.p->masterData.startBackup.signalNo; + gsn= GSN_START_BACKUP_REF; + len= StartBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_BACKUP_FRAGMENT_REQ: + { + BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_BACKUP_FRAGMENT_REF; + len= BackupFragmentRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_STOP_BACKUP_REQ: + { + StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr(); + ref->backupPtr = ptr.i; + ref->backupId = ptr.p->backupId; + ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; + gsn= GSN_STOP_BACKUP_REF; + len= StopBackupRef::SignalLength; + pos= &ref->nodeId - signal->getDataPtr(); + break; + } + case GSN_CREATE_TRIG_REQ: + case GSN_ALTER_TRIG_REQ: + case GSN_WAIT_GCP_REQ: + case GSN_UTIL_SEQUENCE_REQ: + case GSN_UTIL_LOCK_REQ: + case GSN_DROP_TRIG_REQ: + return; + } + + for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; ) + { + signal->theData[pos] = i; + sendSignal(reference(), gsn, signal, len, JBB); #ifdef DEBUG_ABORT - ndbout_c("**** Slave: Node failed: Master id = %u", - refToNode(ptr.p->masterRef)); + ndbout_c("sending %d to self from %d", gsn, i); #endif + } + return; + }//if + /** * I abort myself as slave if not master */ CRASH_INSERTION((10021)); - // slaveAbort(signal, ptr); } void -Backup::masterTakeOver(Signal* signal, BackupRecordPtr ptr) -{ - ptr.p->masterRef = reference(); - ptr.p->masterData.gsn = MAX_GSN + 1; - - switch(ptr.p->slaveState.getState()){ - case INITIAL: - jam(); - ptr.p->masterData.state.forceState(INITIAL); - break; - case ABORTING: - jam(); - case DEFINING: - jam(); - case DEFINED: - jam(); - case STARTED: - jam(); - case SCANNING: - jam(); - ptr.p->masterData.state.forceState(STARTED); - break; - case STOPPING: - jam(); - case CLEANING: - jam(); - ptr.p->masterData.state.forceState(STOPPING); - break; - default: - ndbrequire(false); - } - masterAbort(signal, ptr, false); -} - -void Backup::execINCL_NODEREQ(Signal* signal) { jamEntry(); @@ -895,8 +894,8 @@ ndbrequire(ptr.p->pages.empty()); ndbrequire(ptr.p->tables.isEmpty()); - ptr.p->masterData.state.forceState(INITIAL); - ptr.p->masterData.state.setState(DEFINING); + ptr.p->m_gsn = 0; + ptr.p->errorCode = 0; ptr.p->clientRef = senderRef; ptr.p->clientData = senderData; ptr.p->masterRef = reference(); @@ -905,6 +904,7 @@ ptr.p->backupKey[0] = 0; ptr.p->backupKey[1] = 0; ptr.p->backupDataLen = 0; + ptr.p->masterData.errorCode = 0; ptr.p->masterData.dropTrig.tableId = RNIL; ptr.p->masterData.alterTrig.tableId = RNIL; @@ -928,7 +928,6 @@ ndbrequire(ptr.i == RNIL); c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ); - ptr.p->masterData.gsn = 0; sendBackupRef(signal, ptr, BackupRef::SequenceFailure); }//execUTIL_SEQUENCE_REF() @@ -938,8 +937,7 @@ { jam(); sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, errorCode); - // ptr.p->masterData.state.setState(INITIAL); - cleanupSlaveResources(ptr); + cleanup(signal, ptr); } void @@ -968,7 +966,8 @@ UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr(); - if(conf->requestType == UtilSequenceReq::Create) { + if(conf->requestType == UtilSequenceReq::Create) + { jam(); sendSTTORRY(signal); // At startup in NDB return; @@ -979,18 +978,20 @@ c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ); - ptr.p->masterData.gsn = 0; - if (ptr.p->masterData.state.getState() == ABORTING) { + + if (ptr.p->checkError()) + { jam(); sendBackupRef(signal, ptr, ptr.p->errorCode); return; }//if - if (ERROR_INSERTED(10023)) { - ptr.p->masterData.state.setState(ABORTING); + + if (ERROR_INSERTED(10023)) + { sendBackupRef(signal, ptr, 323); return; }//if - ndbrequire(ptr.p->masterData.state.getState() == DEFINING); + { Uint64 backupId; @@ -1018,7 +1019,6 @@ c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ); - ptr.p->masterData.gsn = 0; ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ; Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex); @@ -1040,14 +1040,13 @@ c_backupPool.getPtr(ptr); ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ); - ptr.p->masterData.gsn = 0; if (ERROR_INSERTED(10031)) { - ptr.p->masterData.state.setState(ABORTING); ptr.p->setErrorCode(331); }//if - if (ptr.p->masterData.state.getState() == ABORTING) { + if (ptr.p->checkError()) + { jam(); /** @@ -1062,13 +1061,11 @@ Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex); jam(); mutex2.unlock(); // ignore response - + sendBackupRef(signal, ptr, ptr.p->errorCode); return; }//if - ndbrequire(ptr.p->masterData.state.getState() == DEFINING); - sendDefineBackupReq(signal, ptr); } @@ -1078,33 +1075,6 @@ * *****************************************************************************/ -void -Backup::sendSignalAllWait(BackupRecordPtr ptr, Uint32 gsn, Signal *signal, - Uint32 signalLength, bool executeDirect) -{ - jam(); - ptr.p->masterData.gsn = gsn; - ptr.p->masterData.sendCounter.clearWaitingFor(); - NodePtr node; - for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)){ - jam(); - const Uint32 nodeId = node.p->nodeId; - if(node.p->alive && ptr.p->nodes.get(nodeId)){ - jam(); - - ptr.p->masterData.sendCounter.setWaitingFor(nodeId); - - const BlockReference ref = numberToRef(BACKUP, nodeId); - if (!executeDirect || ref != reference()) { - sendSignal(ref, gsn, signal, signalLength, JBB); - }//if - }//if - }//for - if (executeDirect) { - EXECUTE_DIRECT(BACKUP, gsn, signal, signalLength); - } -} - bool Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId) { @@ -1114,10 +1084,6 @@ ndbrequire(ptr.p->masterData.sendCounter.isWaitingFor(nodeId)); ptr.p->masterData.sendCounter.clearWaitingFor(nodeId); - - if (ptr.p->masterData.sendCounter.done()) - ptr.p->masterData.gsn = 0; - return ptr.p->masterData.sendCounter.done(); } @@ -1138,11 +1104,12 @@ req->nodes = ptr.p->nodes; req->backupDataLen = ptr.p->backupDataLen; - ptr.p->masterData.errorCode = 0; - ptr.p->okToCleanMaster = false; // master must wait with cleaning to last - sendSignalAllWait(ptr, GSN_DEFINE_BACKUP_REQ, signal, - DefineBackupReq::SignalLength, - true /* do execute direct on oneself */); + ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_DEFINE_BACKUP_REQ, signal, + DefineBackupReq::SignalLength, JBB); + /** * Now send backup data */ @@ -1167,17 +1134,15 @@ jamEntry(); DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtr(); - + const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; - const Uint32 nodeId = refToNode(signal->senderBlockRef()); - + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->masterData.errorCode = ref->errorCode; + ptr.p->setErrorCode(ref->errorCode); defineBackupReply(signal, ptr, nodeId); } @@ -1188,17 +1153,16 @@ DefineBackupConf* conf = (DefineBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - - if (ERROR_INSERTED(10024)) { - ptr.p->masterData.errorCode = 324; - }//if + if (ERROR_INSERTED(10024)) + { + ptr.p->setErrorCode(324); + } defineBackupReply(signal, ptr, nodeId); } @@ -1210,6 +1174,7 @@ jam(); return; } + /** * Unlock mutexes */ @@ -1223,16 +1188,10 @@ jam(); mutex2.unlock(); // ignore response - if(ptr.p->errorCode) { - jam(); - ptr.p->masterData.errorCode = ptr.p->errorCode; - } - - if(ptr.p->masterData.errorCode){ + if(ptr.p->checkError()) + { jam(); - ptr.p->setErrorCode(ptr.p->masterData.errorCode); - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - masterSendAbortBackup(signal, ptr); + masterAbort(signal, ptr); return; } @@ -1252,7 +1211,6 @@ ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3+NdbNodeBitmask::Size, JBB); - ptr.p->masterData.state.setState(DEFINED); /** * Prepare Trig */ @@ -1286,7 +1244,6 @@ { CreateTrigReq * req =(CreateTrigReq *)signal->getDataPtrSend(); - ptr.p->errorCode = 0; ptr.p->masterData.gsn = GSN_CREATE_TRIG_REQ; ptr.p->masterData.sendCounter = 3; ptr.p->masterData.createTrig.tableId = tabPtr.p->tableId; @@ -1395,17 +1352,14 @@ return; }//if - ptr.p->masterData.gsn = 0; + if (ERROR_INSERTED(10025)) + { + ptr.p->errorCode = 325; + } if(ptr.p->checkError()) { jam(); - masterAbort(signal, ptr, true); - return; - }//if - - if (ERROR_INSERTED(10025)) { - ptr.p->errorCode = 325; - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1425,10 +1379,7 @@ /** * Finished with all tables, send StartBackupReq */ - ptr.p->masterData.state.setState(STARTED); - ptr.p->tables.first(tabPtr); - ptr.p->errorCode = 0; ptr.p->masterData.startBackup.signalNo = 0; ptr.p->masterData.startBackup.noOfSignals = (ptr.p->tables.noOfElements() + StartBackupReq::MaxTableTriggers - 1) / @@ -1467,9 +1418,12 @@ }//for req->noOfTableTriggers = i; - sendSignalAllWait(ptr, GSN_START_BACKUP_REQ, signal, - StartBackupReq::HeaderLength + - (i * StartBackupReq::TableTriggerLength)); + ptr.p->masterData.gsn = GSN_START_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_START_BACKUP_REQ, signal, + StartBackupReq::HeaderLength + + (i * StartBackupReq::TableTriggerLength), JBB); } void @@ -1479,15 +1433,13 @@ StartBackupRef* ref = (StartBackupRef*)signal->getDataPtr(); const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; + //const Uint32 backupId = ref->backupId; const Uint32 signalNo = ref->signalNo; - const Uint32 nodeId = refToNode(signal->senderBlockRef()); + const Uint32 nodeId = ref->nodeId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->setErrorCode(ref->errorCode); startBackupReply(signal, ptr, nodeId, signalNo); } @@ -1499,15 +1451,13 @@ StartBackupConf* conf = (StartBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 signalNo = conf->signalNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - startBackupReply(signal, ptr, nodeId, signalNo); } @@ -1524,17 +1474,16 @@ return; } + if (ERROR_INSERTED(10026)) + { + ptr.p->errorCode = 326; + } + if(ptr.p->checkError()){ jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; } - - if (ERROR_INSERTED(10026)) { - ptr.p->errorCode = 326; - masterAbort(signal, ptr, true); - return; - }//if TablePtr tabPtr; c_tablePool.getPtr(tabPtr, ptr.p->masterData.startBackup.tablePtr); @@ -1566,7 +1515,6 @@ { AlterTrigReq * req =(AlterTrigReq *)signal->getDataPtrSend(); - ptr.p->errorCode = 0; ptr.p->masterData.gsn = GSN_ALTER_TRIG_REQ; ptr.p->masterData.sendCounter = 0; @@ -1608,6 +1556,7 @@ return; }//if ptr.p->masterData.alterTrig.tableId = RNIL; + /** * Finished with all tables */ @@ -1669,11 +1618,9 @@ return; }//if - ptr.p->masterData.gsn = 0; - if(ptr.p->checkError()){ jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1719,11 +1666,10 @@ ndbrequire(ptr.p->masterRef == reference()); ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ); - ptr.p->masterData.gsn = 0; if(ptr.p->checkError()) { jam(); - masterAbort(signal, ptr, true); + masterAbort(signal, ptr); return; }//if @@ -1731,13 +1677,13 @@ jam(); CRASH_INSERTION((10008)); ptr.p->startGCP = gcp; - ptr.p->masterData.state.setState(SCANNING); + ptr.p->masterData.sendCounter= 0; + ptr.p->masterData.gsn = GSN_BACKUP_FRAGMENT_REQ; nextFragment(signal, ptr); } else { jam(); CRASH_INSERTION((10009)); ptr.p->stopGCP = gcp; - ptr.p->masterData.state.setState(STOPPING); sendDropTrig(signal, ptr); // regular dropping of triggers }//if } @@ -1787,6 +1733,7 @@ req->fragmentNo = i; req->count = 0; + ptr.p->masterData.sendCounter++; const BlockReference ref = numberToRef(BACKUP, nodeId); sendSignal(ref, GSN_BACKUP_FRAGMENT_REQ, signal, BackupFragmentReq::SignalLength, JBB); @@ -1824,7 +1771,7 @@ BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 tableId = conf->tableId; const Uint32 fragmentNo = conf->fragmentNo; const Uint32 nodeId = refToNode(signal->senderBlockRef()); @@ -1834,10 +1781,9 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->noOfBytes += noOfBytes; ptr.p->noOfRecords += noOfRecords; + ptr.p->masterData.sendCounter--; TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -1852,17 +1798,24 @@ fragPtr.p->scanned = 1; fragPtr.p->scanning = 0; - if(ptr.p->checkError()) { - jam(); - masterAbort(signal, ptr, true); - return; - }//if - if (ERROR_INSERTED(10028)) { + if (ERROR_INSERTED(10028)) + { ptr.p->errorCode = 328; - masterAbort(signal, ptr, true); - return; - }//if - nextFragment(signal, ptr); + } + + if(ptr.p->checkError()) + { + if(ptr.p->masterData.sendCounter.done()) + { + jam(); + masterAbort(signal, ptr); + return; + }//if + } + else + { + nextFragment(signal, ptr); + } } void @@ -1874,15 +1827,52 @@ BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr(); const Uint32 ptrI = ref->backupPtr; - const Uint32 backupId = ref->backupId; + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) { + jam(); + FragmentPtr fragPtr; + Array & frags = tabPtr.p->fragments; + const Uint32 fragCount = frags.getSize(); + + for(Uint32 i = 0; ifragments.getPtr(fragPtr, i); + if(fragPtr.p->scanning != 0 && nodeId == fragPtr.p->node) + { + jam(); + ndbrequire(fragPtr.p->scanned == 0); + fragPtr.p->scanned = 1; + fragPtr.p->scanning = 0; + goto done; + } + } + } + ndbrequire(false); +done: + ptr.p->masterData.sendCounter--; ptr.p->setErrorCode(ref->errorCode); - masterAbort(signal, ptr, true); + + if(ptr.p->masterData.sendCounter.done()) + { + jam(); + masterAbort(signal, ptr); + return; + }//if + + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::LogBufferFull; + ord->senderData= ptr.i; + execABORT_BACKUP_ORD(signal); } /***************************************************************************** @@ -1910,15 +1900,7 @@ jam(); ptr.p->masterData.dropTrig.tableId = RNIL; - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - - if(ptr.p->masterData.state.getState() == STOPPING) { - jam(); - sendStopBackup(signal, ptr); - return; - }//if - ndbrequire(ptr.p->masterData.state.getState() == ABORTING); - masterSendAbortBackup(signal, ptr); + sendStopBackup(signal, ptr); }//if } @@ -2010,7 +1992,6 @@ return; }//if - ptr.p->masterData.gsn = 0; sendDropTrig(signal, ptr); // recursive next } @@ -2023,14 +2004,23 @@ Backup::execSTOP_BACKUP_REF(Signal* signal) { jamEntry(); - ndbrequire(0); + + StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr(); + const Uint32 ptrI = ref->backupPtr; + //const Uint32 backupId = ref->backupId; + const Uint32 nodeId = ref->nodeId; + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptrI); + + ptr.p->setErrorCode(ref->errorCode); + stopBackupReply(signal, ptr, nodeId); } void Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr) { jam(); - ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; StopBackupReq* stop = (StopBackupReq*)signal->getDataPtrSend(); stop->backupPtr = ptr.i; @@ -2038,8 +2028,11 @@ stop->startGCP = ptr.p->startGCP; stop->stopGCP = ptr.p->stopGCP; - sendSignalAllWait(ptr, GSN_STOP_BACKUP_REQ, signal, - StopBackupReq::SignalLength); + ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; + ptr.p->masterData.sendCounter = ptr.p->nodes; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + sendSignal(rg, GSN_STOP_BACKUP_REQ, signal, + StopBackupReq::SignalLength, JBB); } void @@ -2049,14 +2042,12 @@ StopBackupConf* conf = (StopBackupConf*)signal->getDataPtr(); const Uint32 ptrI = conf->backupPtr; - const Uint32 backupId = conf->backupId; + //const Uint32 backupId = conf->backupId; const Uint32 nodeId = refToNode(signal->senderBlockRef()); BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - masterAbortCheck(); // macro will do return if ABORTING - ptr.p->noOfLogBytes += conf->noOfLogBytes; ptr.p->noOfLogRecords += conf->noOfLogRecords; @@ -2073,35 +2064,39 @@ return; } - // ptr.p->masterData.state.setState(INITIAL); - - // send backup complete first to slaves so that they know sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupComplete); - - BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend(); - rep->backupId = ptr.p->backupId; - rep->senderData = ptr.p->clientData; - rep->startGCP = ptr.p->startGCP; - rep->stopGCP = ptr.p->stopGCP; - rep->noOfBytes = ptr.p->noOfBytes; - rep->noOfRecords = ptr.p->noOfRecords; - rep->noOfLogBytes = ptr.p->noOfLogBytes; - rep->noOfLogRecords = ptr.p->noOfLogRecords; - rep->nodes = ptr.p->nodes; - sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal, - BackupCompleteRep::SignalLength, JBB); - - signal->theData[0] = EventReport::BackupCompleted; - signal->theData[1] = ptr.p->clientRef; - signal->theData[2] = ptr.p->backupId; - signal->theData[3] = ptr.p->startGCP; - signal->theData[4] = ptr.p->stopGCP; - signal->theData[5] = ptr.p->noOfBytes; - signal->theData[6] = ptr.p->noOfRecords; - signal->theData[7] = ptr.p->noOfLogBytes; - signal->theData[8] = ptr.p->noOfLogRecords; - ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + + if(!ptr.p->checkError()) + { + BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->senderData = ptr.p->clientData; + rep->startGCP = ptr.p->startGCP; + rep->stopGCP = ptr.p->stopGCP; + rep->noOfBytes = ptr.p->noOfBytes; + rep->noOfRecords = ptr.p->noOfRecords; + rep->noOfLogBytes = ptr.p->noOfLogBytes; + rep->noOfLogRecords = ptr.p->noOfLogRecords; + rep->nodes = ptr.p->nodes; + sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal, + BackupCompleteRep::SignalLength, JBB); + + signal->theData[0] = EventReport::BackupCompleted; + signal->theData[1] = ptr.p->clientRef; + signal->theData[2] = ptr.p->backupId; + signal->theData[3] = ptr.p->startGCP; + signal->theData[4] = ptr.p->stopGCP; + signal->theData[5] = ptr.p->noOfBytes; + signal->theData[6] = ptr.p->noOfRecords; + signal->theData[7] = ptr.p->noOfLogBytes; + signal->theData[8] = ptr.p->noOfLogRecords; + ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9); + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB); + } + else + { + masterAbort(signal, ptr); + } } /***************************************************************************** @@ -2110,199 +2105,96 @@ * *****************************************************************************/ void -Backup::masterAbort(Signal* signal, BackupRecordPtr ptr, bool controlledAbort) +Backup::masterAbort(Signal* signal, BackupRecordPtr ptr) { - if(ptr.p->masterData.state.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Master already aborting"); -#endif - jam(); - return; - } jam(); #ifdef DEBUG_ABORT ndbout_c("************ masterAbort"); #endif - - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); - if (!ptr.p->checkError()) - ptr.p->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail; - - const State s = ptr.p->masterData.state.getState(); - - ptr.p->masterData.state.setState(ABORTING); - - ndbrequire(s == INITIAL || - s == STARTED || - s == DEFINING || - s == DEFINED || - s == SCANNING || - s == STOPPING || - s == ABORTING); - if(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_UTIL_SEQUENCE_REQ"); - //------------------------------------------------------- - // We are waiting for UTIL_SEQUENCE response. We rely on - // this to arrive and check for ABORTING in response. - // No slaves are involved at this point and ABORT simply - // results in BACKUP_REF to client - //------------------------------------------------------- - /** - * Waiting for Sequence Id - * @see execUTIL_SEQUENCE_CONF - */ - return; - }//if - - if(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ) { + if(ptr.p->masterData.errorCode != 0) + { jam(); - DEBUG_OUT("masterAbort: gsn = GSN_UTIL_LOCK_REQ"); - //------------------------------------------------------- - // We are waiting for UTIL_LOCK response (mutex). We rely on - // this to arrive and check for ABORTING in response. - // No slaves are involved at this point and ABORT simply - // results in BACKUP_REF to client - //------------------------------------------------------- - /** - * Waiting for lock - * @see execUTIL_LOCK_CONF - */ return; - }//if - - /** - * Unlock mutexes only at master - */ - jam(); - Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex); - jam(); - mutex1.unlock(); // ignore response - - jam(); - Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex); - jam(); - mutex2.unlock(); // ignore response - - if (!controlledAbort) { - jam(); - if (s == DEFINING) { - jam(); -//------------------------------------------------------- -// If we are in the defining phase all work is done by -// slaves. No triggers have been allocated thus slaves -// may free all "Master" resources, let them know... -//------------------------------------------------------- - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean); - return; - }//if - if (s == DEFINED) { - jam(); -//------------------------------------------------------- -// DEFINED is the state when triggers are created. We rely -// on that DICT will report create trigger failure in case -// of node failure. Thus no special action is needed here. -// We will check for errorCode != 0 when receiving -// replies on create trigger. -//------------------------------------------------------- - return; - }//if - if(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_WAIT_GCP_REQ"); -//------------------------------------------------------- -// We are waiting for WAIT_GCP response. We rely on -// this to arrive and check for ABORTING in response. -//------------------------------------------------------- - - /** - * Waiting for GCP - * @see execWAIT_GCP_CONF - */ - return; - }//if - - if(ptr.p->masterData.gsn == GSN_ALTER_TRIG_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_ALTER_TRIG_REQ"); -//------------------------------------------------------- -// We are waiting for ALTER_TRIG response. We rely on -// this to arrive and check for ABORTING in response. -//------------------------------------------------------- + } - /** - * All triggers haven't been created yet - */ - return; - }//if + BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend(); + rep->backupId = ptr.p->backupId; + rep->senderData = ptr.p->clientData; + rep->reason = ptr.p->errorCode; + sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, + BackupAbortRep::SignalLength, JBB); - if(ptr.p->masterData.gsn == GSN_DROP_TRIG_REQ) { - jam(); - DEBUG_OUT("masterAbort: gsn = GSN_DROP_TRIG_REQ"); -//------------------------------------------------------- -// We are waiting for DROP_TRIG response. We rely on -// this to arrive and will continue dropping triggers -// until completed. -//------------------------------------------------------- + signal->theData[0] = EventReport::BackupAborted; + signal->theData[1] = ptr.p->clientRef; + signal->theData[2] = ptr.p->backupId; + signal->theData[3] = ptr.p->errorCode; + sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - /** - * I'm currently dropping the trigger - */ - return; - }//if - }//if + ndbrequire(ptr.p->errorCode); + ptr.p->masterData.errorCode = ptr.p->errorCode; -//------------------------------------------------------- -// If we are waiting for START_BACKUP responses we can -// safely start dropping triggers (state == STARTED). -// We will ignore any START_BACKUP responses after this. -//------------------------------------------------------- - DEBUG_OUT("masterAbort: sendDropTrig"); - sendDropTrig(signal, ptr); // dropping due to error + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->senderData= ptr.i; + NodeReceiverGroup rg(BACKUP, ptr.p->nodes); + + switch(ptr.p->masterData.gsn){ + case GSN_DEFINE_BACKUP_REQ: + ord->requestType = AbortBackupOrd::BackupFailure; + sendSignal(rg, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + return; + case GSN_CREATE_TRIG_REQ: + case GSN_START_BACKUP_REQ: + case GSN_ALTER_TRIG_REQ: + case GSN_WAIT_GCP_REQ: + case GSN_BACKUP_FRAGMENT_REQ: + jam(); + ptr.p->stopGCP= ptr.p->startGCP + 1; + sendDropTrig(signal, ptr); // dropping due to error + return; + case GSN_UTIL_SEQUENCE_REQ: + case GSN_UTIL_LOCK_REQ: + case GSN_DROP_TRIG_REQ: + ndbrequire(false); + return; + case GSN_STOP_BACKUP_REQ: + return; + } } void -Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr) +Backup::abort_scan(Signal * signal, BackupRecordPtr ptr) { - if (ptr.p->masterData.state.getState() != ABORTING) { - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); - ptr.p->masterData.state.setState(ABORTING); - } - const State s = ptr.p->masterData.state.getAbortState(); - - /** - * First inform to client - */ - if(s == DEFINING) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("** Abort: sending BACKUP_REF to mgmtsrvr"); -#endif - sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, - ptr.p->errorCode); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->senderData= ptr.i; + ord->requestType = AbortBackupOrd::AbortScan; - } else { + TablePtr tabPtr; + ptr.p->tables.first(tabPtr); + for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) { jam(); -#ifdef DEBUG_ABORT - ndbout_c("** Abort: sending BACKUP_ABORT_REP to mgmtsrvr"); -#endif - BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend(); - rep->backupId = ptr.p->backupId; - rep->senderData = ptr.p->clientData; - rep->reason = ptr.p->errorCode; - sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal, - BackupAbortRep::SignalLength, JBB); - - signal->theData[0] = EventReport::BackupAborted; - signal->theData[1] = ptr.p->clientRef; - signal->theData[2] = ptr.p->backupId; - signal->theData[3] = ptr.p->errorCode; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB); - }//if - - // ptr.p->masterData.state.setState(INITIAL); - - sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure); + FragmentPtr fragPtr; + Array & frags = tabPtr.p->fragments; + const Uint32 fragCount = frags.getSize(); + + for(Uint32 i = 0; ifragments.getPtr(fragPtr, i); + const Uint32 nodeId = fragPtr.p->node; + if(fragPtr.p->scanning != 0 && ptr.p->nodes.get(nodeId)) { + jam(); + + const BlockReference ref = numberToRef(BACKUP, nodeId); + sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + + } + } + } } /***************************************************************************** @@ -2313,26 +2205,17 @@ void Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode) { - if (ptr.p->slaveState.getState() == ABORTING) { - jam(); - return; - } - ptr.p->slaveState.setState(ABORTING); - - if (errCode != 0) { - jam(); - ptr.p->setErrorCode(errCode); - }//if + ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF; + ptr.p->setErrorCode(errCode); ndbrequire(ptr.p->errorCode != 0); - + DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend(); ref->backupId = ptr.p->backupId; ref->backupPtr = ptr.i; ref->errorCode = ptr.p->errorCode; + ref->nodeId = getOwnNodeId(); sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal, DefineBackupRef::SignalLength, JBB); - - closeFiles(signal, ptr); } void @@ -2366,6 +2249,7 @@ CRASH_INSERTION((10014)); + ptr.p->m_gsn = GSN_DEFINE_BACKUP_REQ; ptr.p->slaveState.forceState(INITIAL); ptr.p->slaveState.setState(DEFINING); ptr.p->errorCode = 0; @@ -2432,7 +2316,7 @@ files[i].p->tableId = RNIL; files[i].p->backupPtr = ptr.i; files[i].p->filePointer = RNIL; - files[i].p->fileDone = 0; + files[i].p->fileClosing = 0; files[i].p->fileOpened = 0; files[i].p->fileRunning = 0; files[i].p->scanRunning = 0; @@ -2468,17 +2352,14 @@ ptr.p->logFilePtr = files[1].i; ptr.p->dataFilePtr = files[2].i; - if (!verifyNodesAlive(ptr.p->nodes)) { + if (!verifyNodesAlive(ptr, ptr.p->nodes)) { jam(); defineBackupRef(signal, ptr, DefineBackupRef::Undefined); - // sendBackupRef(signal, ptr, - // ptr.p->errorCode?ptr.p->errorCode:BackupRef::Undefined); return; }//if if (ERROR_INSERTED(10027)) { jam(); defineBackupRef(signal, ptr, 327); - // sendBackupRef(signal, ptr, 327); return; }//if @@ -2546,8 +2427,6 @@ return; }//if - defineSlaveAbortCheck(); - /** * All tables fetched */ @@ -2679,8 +2558,6 @@ }//if }//for - defineSlaveAbortCheck(); - /** * Did open succeed for all files */ @@ -2810,8 +2687,6 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - defineBackupRef(signal, ptr, ref->errorCode); } @@ -2833,8 +2708,6 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - SegmentedSectionPtr dictTabInfoPtr; signal->getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO); ndbrequire(dictTabInfoPtr.sz == len); @@ -3047,8 +2920,6 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -3127,8 +2998,6 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, senderData); - defineSlaveAbortCheck(); - TablePtr tabPtr; ndbrequire(findTable(ptr, tabPtr, tableId)); @@ -3143,9 +3012,7 @@ void Backup::getFragmentInfoDone(Signal* signal, BackupRecordPtr ptr) { - // Slave must now hold on to master data until - // AbortBackupOrd::OkToClean signal - ptr.p->okToCleanMaster = false; + ptr.p->m_gsn = GSN_DEFINE_BACKUP_CONF; ptr.p->slaveState.setState(DEFINED); DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtr(); conf->backupPtr = ptr.i; @@ -3169,16 +3036,15 @@ StartBackupReq* req = (StartBackupReq*)signal->getDataPtr(); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 signalNo = req->signalNo; - + BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - - slaveAbortCheck(); // macro will do return if ABORTING ptr.p->slaveState.setState(STARTED); - + ptr.p->m_gsn = GSN_START_BACKUP_REQ; + for(Uint32 i = 0; inoOfTableTriggers; i++) { jam(); TablePtr tabPtr; @@ -3191,11 +3057,13 @@ TriggerPtr trigPtr; if(!ptr.p->triggers.seizeId(trigPtr, triggerId)) { jam(); + ptr.p->m_gsn = GSN_START_BACKUP_REF; StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend(); ref->backupPtr = ptr.i; ref->backupId = ptr.p->backupId; ref->signalNo = signalNo; ref->errorCode = StartBackupRef::FailedToAllocateTriggerRecord; + ref->nodeId = getOwnNodeId(); sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal, StartBackupRef::SignalLength, JBB); return; @@ -3233,6 +3101,7 @@ }//if }//for + ptr.p->m_gsn = GSN_START_BACKUP_CONF; StartBackupConf* conf = (StartBackupConf*)signal->getDataPtrSend(); conf->backupPtr = ptr.i; conf->backupId = ptr.p->backupId; @@ -3255,7 +3124,7 @@ CRASH_INSERTION((10016)); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 tableId = req->tableId; const Uint32 fragNo = req->fragmentNo; const Uint32 count = req->count; @@ -3266,10 +3135,9 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, ptrI); - slaveAbortCheck(); // macro will do return if ABORTING - ptr.p->slaveState.setState(SCANNING); - + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REQ; + /** * Get file */ @@ -3280,7 +3148,7 @@ ndbrequire(filePtr.p->fileOpened == 1); ndbrequire(filePtr.p->fileRunning == 1); ndbrequire(filePtr.p->scanRunning == 0); - ndbrequire(filePtr.p->fileDone == 0); + ndbrequire(filePtr.p->fileClosing == 0); /** * Get table @@ -3350,7 +3218,7 @@ req->transId1 = 0; req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->clientOpPtr= filePtr.i; - req->batch_size_rows= 16; + req->batch_size_rows= parallelism; req->batch_size_bytes= 0; sendSignal(DBLQH_REF, GSN_SCAN_FRAGREQ, signal, ScanFragReq::SignalLength, JBB); @@ -3572,6 +3440,13 @@ return false; } +bool +Backup::OperationRecord::closeScan() +{ + opNoDone = opNoConf = opLen = 0; + return true; +} + bool Backup::OperationRecord::scanConf(Uint32 noOfOps, Uint32 total_len) { @@ -3600,11 +3475,9 @@ c_backupFilePool.getPtr(filePtr, filePtrI); filePtr.p->errorCode = ref->errorCode; + filePtr.p->scanRunning = 0; - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - abortFile(signal, ptr, filePtr); + backupFragmentRef(signal, filePtr); } void @@ -3639,9 +3512,11 @@ { jam(); - if(filePtr.p->errorCode != 0){ + if(filePtr.p->errorCode != 0) + { jam(); - abortFileHook(signal, filePtr, true); // Scan completed + filePtr.p->scanRunning = 0; + backupFragmentRef(signal, filePtr); // Scan completed return; }//if @@ -3669,20 +3544,51 @@ sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal, BackupFragmentConf::SignalLength, JBB); + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF; ptr.p->slaveState.setState(STARTED); return; } + +void +Backup::backupFragmentRef(Signal * signal, BackupFilePtr filePtr) +{ + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + + ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REF; + + BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtrSend(); + ref->backupId = ptr.p->backupId; + ref->backupPtr = ptr.i; + ref->nodeId = getOwnNodeId(); + ref->errorCode = ptr.p->errorCode; + sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_REF, signal, + BackupFragmentRef::SignalLength, JBB); +} void Backup::checkScan(Signal* signal, BackupFilePtr filePtr) { - if(filePtr.p->errorCode != 0){ + OperationRecord & op = filePtr.p->operation; + + if(filePtr.p->errorCode != 0) + { jam(); - abortFileHook(signal, filePtr, false); // Scan not completed + + /** + * Close scan + */ + op.closeScan(); + ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend(); + req->senderData = filePtr.i; + req->closeFlag = 1; + req->transId1 = 0; + req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); return; }//if - - OperationRecord & op = filePtr.p->operation; + if(op.newScan()) { jam(); @@ -3693,8 +3599,28 @@ req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); req->batch_size_rows= 16; req->batch_size_bytes= 0; - sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength, JBB); + if(ERROR_INSERTED(10032)) + sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + 100, ScanFragNextReq::SignalLength); + else if(ERROR_INSERTED(10033)) + { + SET_ERROR_INSERT_VALUE(10032); + sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + 10000, ScanFragNextReq::SignalLength); + + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, filePtr.p->backupPtr); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::FileOrScanError; + ord->senderData= ptr.i; + sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); + } + else + sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength, JBB); return; }//if @@ -3718,11 +3644,8 @@ filePtr.p->fileRunning = 0; filePtr.p->errorCode = errCode; - - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - abortFile(signal, ptr, filePtr); + + checkFile(signal, filePtr); } void @@ -3738,12 +3661,6 @@ BackupFilePtr filePtr; c_backupFilePool.getPtr(filePtr, filePtrI); - - if (ERROR_INSERTED(10029)) { - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - abortFile(signal, ptr, filePtr); - }//if OperationRecord & op = filePtr.p->operation; @@ -3761,30 +3678,25 @@ #endif OperationRecord & op = filePtr.p->operation; - + Uint32 * tmp, sz; bool eof; - if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) { + if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) + { jam(); - if(filePtr.p->errorCode == 0) { - jam(); - FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); - req->filePointer = filePtr.p->filePointer; - req->userPointer = filePtr.i; - req->userReference = reference(); - req->varIndex = 0; - req->offset = tmp - c_startOfPages; - req->size = sz; - - sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, - FsAppendReq::SignalLength, JBA); - return; - } else { - jam(); - if (filePtr.p->scanRunning == 1) - eof = false; - }//if - }//if + jam(); + FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); + req->filePointer = filePtr.p->filePointer; + req->userPointer = filePtr.i; + req->userReference = reference(); + req->varIndex = 0; + req->offset = tmp - c_startOfPages; + req->size = sz; + + sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, + FsAppendReq::SignalLength, JBA); + return; + } if(!eof) { jam(); @@ -3794,9 +3706,7 @@ return; }//if - ndbrequire(filePtr.p->fileDone == 1); - - if(sz > 0 && filePtr.p->errorCode == 0) { + if(sz > 0) { jam(); FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3812,6 +3722,7 @@ }//if filePtr.p->fileRunning = 0; + filePtr.p->fileClosing = 1; FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend(); req->filePointer = filePtr.p->filePointer; @@ -3819,64 +3730,11 @@ req->userReference = reference(); req->fileFlag = 0; #ifdef DEBUG_ABORT - ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i); + ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i); #endif sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA); } -void -Backup::abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr) -{ - jam(); - - if(ptr.p->slaveState.getState() != ABORTING) { - /** - * Inform master of failure - */ - jam(); - ptr.p->slaveState.setState(ABORTING); - ptr.p->setErrorCode(AbortBackupOrd::FileOrScanError); - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::FileOrScanError); - return; - }//if - - - for(ptr.p->files.first(filePtr); - filePtr.i!=RNIL; - ptr.p->files.next(filePtr)){ - jam(); - filePtr.p->errorCode = 1; - }//for - - closeFiles(signal, ptr); -} - -void -Backup::abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanComplete) -{ - jam(); - - if(!scanComplete) { - jam(); - - ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend(); - req->senderData = filePtr.i; - req->closeFlag = 1; - req->transId1 = 0; - req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8); - sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength, JBB); - return; - }//if - - filePtr.p->scanRunning = 0; - - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - - filePtr.i = RNIL; - abortFile(signal, ptr, filePtr); -} /**************************************************************************** * @@ -3953,27 +3811,30 @@ }//if BackupFormat::LogFile::LogEntry * logEntry = trigPtr.p->logEntry; - if(logEntry == 0) { + if(logEntry == 0) + { jam(); Uint32 * dst; FsBuffer & buf = trigPtr.p->operation->dataBuffer; ndbrequire(trigPtr.p->maxRecordSize <= buf.getMaxWrite()); - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); - if(!buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) { + if(ERROR_INSERTED(10030) || + !buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) + { jam(); + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull; - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull); - return; - }//if - if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { - jam(); - trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull; - sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull); + AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); + ord->backupId = ptr.p->backupId; + ord->backupPtr = ptr.i; + ord->requestType = AbortBackupOrd::LogBufferFull; + ord->senderData= ptr.i; + sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, + AbortBackupOrd::SignalLength, JBB); return; }//if - + logEntry = (BackupFormat::LogFile::LogEntry *)dst; trigPtr.p->logEntry = logEntry; logEntry->Length = 0; @@ -4015,9 +3876,10 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, trigPtr.p->backupPtr); - if(gci != ptr.p->currGCP) { + if(gci != ptr.p->currGCP) + { jam(); - + trigPtr.p->logEntry->TriggerEvent = htonl(trigPtr.p->event | 0x10000); trigPtr.p->logEntry->Data[len] = htonl(gci); len ++; @@ -4036,20 +3898,6 @@ } void -Backup::sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, - Uint32 requestType) -{ - jam(); - AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); - ord->backupId = ptr.p->backupId; - ord->backupPtr = ptr.i; - ord->requestType = requestType; - ord->senderData= ptr.i; - sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal, - AbortBackupOrd::SignalLength, JBB); -} - -void Backup::sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 requestType) { @@ -4085,7 +3933,7 @@ CRASH_INSERTION((10020)); const Uint32 ptrI = req->backupPtr; - const Uint32 backupId = req->backupId; + //const Uint32 backupId = req->backupId; const Uint32 startGCP = req->startGCP; const Uint32 stopGCP = req->stopGCP; @@ -4101,7 +3949,7 @@ c_backupPool.getPtr(ptr, ptrI); ptr.p->slaveState.setState(STOPPING); - slaveAbortCheck(); // macro will do return if ABORTING + ptr.p->m_gsn = GSN_STOP_BACKUP_REQ; /** * Insert footers @@ -4140,12 +3988,6 @@ void Backup::closeFiles(Signal* sig, BackupRecordPtr ptr) { - if (ptr.p->closingFiles) { - jam(); - return; - } - ptr.p->closingFiles = true; - /** * Close all files */ @@ -4161,12 +4003,12 @@ jam(); openCount++; - if(filePtr.p->fileDone == 1){ + if(filePtr.p->fileClosing == 1){ jam(); continue; }//if - filePtr.p->fileDone = 1; + filePtr.p->fileClosing = 1; if(filePtr.p->fileRunning == 1){ jam(); @@ -4183,7 +4025,7 @@ req->userReference = reference(); req->fileFlag = 0; #ifdef DEBUG_ABORT - ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i); + ndbout_c("***** b FSCLOSEREQ filePtr.i = %u", filePtr.i); #endif sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, sig, FsCloseReq::SignalLength, JBA); @@ -4210,11 +4052,6 @@ BackupRecordPtr ptr; c_backupPool.getPtr(ptr, filePtr.p->backupPtr); - /** - * This should only happen during abort of backup - */ - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - filePtr.p->fileOpened = 1; FsConf * conf = (FsConf*)signal->getDataPtr(); conf->userPointer = filePtrI; @@ -4237,7 +4074,7 @@ ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI); #endif - ndbrequire(filePtr.p->fileDone == 1); + ndbrequire(filePtr.p->fileClosing == 1); ndbrequire(filePtr.p->fileOpened == 1); ndbrequire(filePtr.p->fileRunning == 0); ndbrequire(filePtr.p->scanRunning == 0); @@ -4265,25 +4102,20 @@ { jam(); - if(ptr.p->slaveState.getState() == STOPPING) { - jam(); - BackupFilePtr filePtr; - ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr); - - StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend(); - conf->backupId = ptr.p->backupId; - conf->backupPtr = ptr.i; - conf->noOfLogBytes = filePtr.p->operation.noOfBytes; - conf->noOfLogRecords = filePtr.p->operation.noOfRecords; - sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal, - StopBackupConf::SignalLength, JBB); - - ptr.p->slaveState.setState(CLEANING); - return; - }//if + jam(); + BackupFilePtr filePtr; + ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr); - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - removeBackup(signal, ptr); + StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend(); + conf->backupId = ptr.p->backupId; + conf->backupPtr = ptr.i; + conf->noOfLogBytes = filePtr.p->operation.noOfBytes; + conf->noOfLogRecords = filePtr.p->operation.noOfRecords; + sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal, + StopBackupConf::SignalLength, JBB); + + ptr.p->m_gsn = GSN_STOP_BACKUP_CONF; + ptr.p->slaveState.setState(CLEANING); } /***************************************************************************** @@ -4291,57 +4123,6 @@ * Slave functionallity: Abort backup * *****************************************************************************/ -void -Backup::removeBackup(Signal* signal, BackupRecordPtr ptr) -{ - jam(); - - FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); - req->userReference = reference(); - req->userPointer = ptr.i; - req->directory = 1; - req->ownDirectory = 1; - FsOpenReq::setVersion(req->fileNumber, 2); - FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); - FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId); - FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId()); - sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, - FsRemoveReq::SignalLength, JBA); -} - -void -Backup::execFSREMOVEREF(Signal* signal) -{ - jamEntry(); - ndbrequire(0); -} - -void -Backup::execFSREMOVECONF(Signal* signal){ - jamEntry(); - - FsConf * conf = (FsConf*)signal->getDataPtr(); - const Uint32 ptrI = conf->userPointer; - - /** - * Get backup record - */ - BackupRecordPtr ptr; - c_backupPool.getPtr(ptr, ptrI); - - ndbrequire(ptr.p->slaveState.getState() == ABORTING); - if (ptr.p->masterRef == reference()) { - if (ptr.p->masterData.state.getAbortState() == DEFINING) { - jam(); - sendBackupRef(signal, ptr, ptr.p->errorCode); - return; - } else { - jam(); - }//if - }//if - cleanupSlaveResources(ptr); -} - /***************************************************************************** * * Slave functionallity: Abort backup @@ -4394,8 +4175,7 @@ if (c_backupPool.findId(senderData)) { jam(); c_backupPool.getPtr(ptr, senderData); - } else { // TODO might be abort sent to not master, - // or master aborting too early + } else { jam(); #ifdef DEBUG_ABORT ndbout_c("Backup: abort request type=%u on id=%u,%u not found", @@ -4405,15 +4185,15 @@ } }//if + ptr.p->m_gsn = GSN_ABORT_BACKUP_ORD; const bool isCoordinator = (ptr.p->masterRef == reference()); - + bool ok = false; switch(requestType){ /** * Requests sent to master */ - case AbortBackupOrd::ClientAbort: jam(); // fall through @@ -4422,113 +4202,61 @@ // fall through case AbortBackupOrd::FileOrScanError: jam(); - if(ptr.p->masterData.state.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Already aborting"); -#endif - jam(); - return; - } + ndbrequire(isCoordinator); ptr.p->setErrorCode(requestType); - ndbrequire(isCoordinator); // Sent from slave to coordinator - masterAbort(signal, ptr, false); + if(ptr.p->masterData.gsn == GSN_BACKUP_FRAGMENT_REQ) + { + /** + * Only scans are actively aborted + */ + abort_scan(signal, ptr); + } return; - + /** - * Info sent to slave + * Requests sent to slave */ - - case AbortBackupOrd::OkToClean: + case AbortBackupOrd::AbortScan: jam(); - cleanupMasterResources(ptr); + ptr.p->setErrorCode(requestType); return; - - /** - * Requests sent to slave - */ - + case AbortBackupOrd::BackupComplete: jam(); - if (ptr.p->slaveState.getState() == CLEANING) { // TODO what if state is - // not CLEANING? - jam(); - cleanupSlaveResources(ptr); - }//if + cleanup(signal, ptr); return; - break; - case AbortBackupOrd::BackupFailureDueToNodeFail: - jam(); - ok = true; - if (ptr.p->errorCode != 0) - ptr.p->setErrorCode(requestType); - break; case AbortBackupOrd::BackupFailure: - jam(); - ok = true; - break; + case AbortBackupOrd::BackupFailureDueToNodeFail: + case AbortBackupOrd::OkToClean: + case AbortBackupOrd::IncompatibleVersions: +#ifndef VM_TRACE + default: +#endif + ptr.p->setErrorCode(requestType); + ok= true; } ndbrequire(ok); - /** - * Slave abort - */ - slaveAbort(signal, ptr); -} - -void -Backup::slaveAbort(Signal* signal, BackupRecordPtr ptr) -{ - if(ptr.p->slaveState.getState() == ABORTING) { -#ifdef DEBUG_ABORT - ndbout_c("---- Slave already aborting"); -#endif - jam(); - return; + Uint32 ref= ptr.p->masterRef; + ptr.p->masterRef = reference(); + ptr.p->nodes.clear(); + ptr.p->nodes.set(getOwnNodeId()); + + if(ref == reference()) + { + ptr.p->stopGCP= ptr.p->startGCP + 1; + sendDropTrig(signal, ptr); } -#ifdef DEBUG_ABORT - ndbout_c("************* slaveAbort"); -#endif - - State slaveState = ptr.p->slaveState.getState(); - ptr.p->slaveState.setState(ABORTING); - switch(slaveState) { - case DEFINING: - jam(); - return; -//------------------------------------------ -// Will watch for the abort at various places -// in the defining phase. -//------------------------------------------ - case ABORTING: - jam(); - //Fall through - case DEFINED: - jam(); - //Fall through - case STOPPING: - jam(); + else + { + ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ; + ptr.p->masterData.sendCounter.clearWaitingFor(); + ptr.p->masterData.sendCounter.setWaitingFor(getOwnNodeId()); closeFiles(signal, ptr); - return; - case STARTED: - jam(); - //Fall through - case SCANNING: - jam(); - BackupFilePtr filePtr; - filePtr.i = RNIL; - abortFile(signal, ptr, filePtr); - return; - case CLEANING: - jam(); - cleanupSlaveResources(ptr); - return; - case INITIAL: - jam(); - ndbrequire(false); - return; } } + void Backup::dumpUsedResources() { @@ -4576,12 +4304,8 @@ } void -Backup::cleanupMasterResources(BackupRecordPtr ptr) +Backup::cleanup(Signal* signal, BackupRecordPtr ptr) { -#ifdef DEBUG_ABORT - ndbout_c("******** Cleanup Master Resources *********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif TablePtr tabPtr; for(ptr.p->tables.first(tabPtr); tabPtr.i != RNIL;ptr.p->tables.next(tabPtr)) @@ -4601,20 +4325,6 @@ tabPtr.p->triggerIds[j] = ILLEGAL_TRIGGER_ID; }//for }//for - ptr.p->tables.release(); - ptr.p->triggers.release(); - ptr.p->okToCleanMaster = true; - - cleanupFinalResources(ptr); -} - -void -Backup::cleanupSlaveResources(BackupRecordPtr ptr) -{ -#ifdef DEBUG_ABORT - ndbout_c("******** Clean Up Slave Resources*********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif BackupFilePtr filePtr; for(ptr.p->files.first(filePtr); @@ -4626,35 +4336,65 @@ ndbrequire(filePtr.p->scanRunning == 0); filePtr.p->pages.release(); }//for + ptr.p->files.release(); + ptr.p->tables.release(); + ptr.p->triggers.release(); + + ptr.p->tables.release(); + ptr.p->triggers.release(); + ptr.p->pages.release(); + ptr.p->backupId = ~0; + + if(ptr.p->checkError()) + removeBackup(signal, ptr); + else + c_backups.release(ptr); +} + - cleanupFinalResources(ptr); +void +Backup::removeBackup(Signal* signal, BackupRecordPtr ptr) +{ + jam(); + + FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend(); + req->userReference = reference(); + req->userPointer = ptr.i; + req->directory = 1; + req->ownDirectory = 1; + FsOpenReq::setVersion(req->fileNumber, 2); + FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL); + FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId); + FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId()); + sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal, + FsRemoveReq::SignalLength, JBA); } void -Backup::cleanupFinalResources(BackupRecordPtr ptr) +Backup::execFSREMOVEREF(Signal* signal) { -#ifdef DEBUG_ABORT - ndbout_c("******** Clean Up Final Resources*********"); - ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode); -#endif + jamEntry(); + FsRef * ref = (FsRef*)signal->getDataPtr(); + const Uint32 ptrI = ref->userPointer; - // if (!ptr.p->tables.empty() || !ptr.p->files.empty()) { - if (!ptr.p->okToCleanMaster || !ptr.p->files.empty()) { - jam(); -#ifdef DEBUG_ABORT - ndbout_c("******** Waiting to do final cleanup"); -#endif - return; - } - ptr.p->pages.release(); - ptr.p->masterData.state.setState(INITIAL); - ptr.p->slaveState.setState(INITIAL); - ptr.p->backupId = 0; + FsConf * conf = (FsConf*)signal->getDataPtr(); + conf->userPointer = ptrI; + execFSREMOVECONF(signal); +} - ptr.p->closingFiles = false; - ptr.p->okToCleanMaster = true; +void +Backup::execFSREMOVECONF(Signal* signal){ + jamEntry(); + FsConf * conf = (FsConf*)signal->getDataPtr(); + const Uint32 ptrI = conf->userPointer; + + /** + * Get backup record + */ + BackupRecordPtr ptr; + c_backupPool.getPtr(ptr, ptrI); c_backups.release(ptr); - // ndbrequire(false); } + --- 1.6/ndb/src/kernel/blocks/backup/Backup.hpp Thu Dec 9 14:04:26 2004 +++ 1.7/ndb/src/kernel/blocks/backup/Backup.hpp Fri Apr 22 09:07:22 2005 @@ -232,6 +232,7 @@ */ bool newScan(); bool scanConf(Uint32 noOfOps, Uint32 opLen); + bool closeScan(); /** * Per record @@ -330,7 +331,7 @@ Uint8 fileOpened; Uint8 fileRunning; - Uint8 fileDone; + Uint8 fileClosing; Uint8 scanRunning; }; typedef Ptr BackupFilePtr; @@ -403,13 +404,11 @@ ArrayPool & trp) : slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1) , tables(tp), triggers(trp), files(bp), pages(pp) - , masterData(b, validMasterTransitions, validMasterTransitionsCount) - , backup(b) - { - closingFiles = false; - okToCleanMaster = true; - } + , masterData(b), backup(b) + { + } + Uint32 m_gsn; CompoundState slaveState; Uint32 clientRef; @@ -420,9 +419,6 @@ Uint32 errorCode; NdbNodeBitmask nodes; - bool okToCleanMaster; - bool closingFiles; - Uint64 noOfBytes; Uint64 noOfRecords; Uint64 noOfLogBytes; @@ -444,15 +440,13 @@ SimpleProperties props;// Used for (un)packing backup request struct MasterData { - MasterData(Backup & b, const State valid[], Uint32 count) - : state(b, valid, count, 0) - { - } + MasterData(Backup & b) + { + } MutexHandle2 m_defineBackupMutex; MutexHandle2 m_dictCommitTableMutex; Uint32 gsn; - CompoundState state; SignalCounter sendCounter; Uint32 errorCode; struct { @@ -557,7 +551,8 @@ void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId); void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0); - + void backupFragmentRef(Signal * signal, BackupFilePtr filePtr); + void nextFragment(Signal*, BackupRecordPtr); void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr); @@ -578,14 +573,14 @@ void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); - void masterAbort(Signal*, BackupRecordPtr ptr, bool controlledAbort); + void masterAbort(Signal*, BackupRecordPtr ptr); void masterSendAbortBackup(Signal*, BackupRecordPtr ptr); void slaveAbort(Signal*, BackupRecordPtr ptr); void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr); void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone); - bool verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask); + bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask); bool checkAbort(BackupRecordPtr ptr); void checkNodeFail(Signal* signal, BackupRecordPtr ptr, @@ -603,9 +598,8 @@ void sendBackupRef(BlockReference ref, Signal *signal, Uint32 senderData, Uint32 errorCode); void dumpUsedResources(); - void cleanupMasterResources(BackupRecordPtr ptr); - void cleanupSlaveResources(BackupRecordPtr ptr); - void cleanupFinalResources(BackupRecordPtr ptr); + void cleanup(Signal*, BackupRecordPtr ptr); + void abort_scan(Signal*, BackupRecordPtr ptr); void removeBackup(Signal*, BackupRecordPtr ptr); void sendSTTORRY(Signal*); --- 1.1/ndb/src/kernel/blocks/backup/Backup.txt Wed Apr 14 10:24:18 2004 +++ 1.2/ndb/src/kernel/blocks/backup/Backup.txt Fri Apr 22 09:07:22 2005 @@ -341,3 +341,28 @@ (ERROR_INSERTED(10022))) { if (ERROR_INSERTED(10029)) { if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { + +----- XXX --- + +DEFINE_BACKUP_REF -> + ABORT_BACKUP_ORD(no reply) when all DEFINE_BACKUP replies has arrived + +START_BACKUP_REF + ABORT_BACKUP_ORD(no reply) when all START_BACKUP_ replies has arrived + +BACKUP_FRAGMENT_REF + ABORT_BACKUP_ORD(reply) directly to all nodes running BACKUP_FRAGMENT + + When all nodes has replied BACKUP_FRAGMENT + ABORT_BACKUP_ORD(no reply) + +STOP_BACKUP_REF + ABORT_BACKUP_ORD(no reply) when all STOP_BACKUP_ replies has arrived + +NF_COMPLETE_REP + slave dies + master sends OUTSTANDING_REF to self + slave does nothing + + master dies + slave elects self as master and sets only itself as participant --- 1.9/ndb/src/kernel/blocks/backup/BackupInit.cpp Wed Jan 19 09:15:31 2005 +++ 1.10/ndb/src/kernel/blocks/backup/BackupInit.cpp Fri Apr 22 09:07:22 2005 @@ -175,7 +175,7 @@ addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF); addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ); - //addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); + addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF); addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ); --- 1.16/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp Wed Jan 19 09:15:31 2005 +++ 1.17/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp Fri Apr 22 09:07:22 2005 @@ -126,6 +126,7 @@ } setNodeInfo(getOwnNodeId()).m_connected = true; + setNodeInfo(getOwnNodeId()).m_version = ndbGetOwnVersion(); } Cmvmi::~Cmvmi() --- 1.25/ndb/src/mgmapi/mgmapi.cpp Wed Feb 16 20:46:28 2005 +++ 1.26/ndb/src/mgmapi/mgmapi.cpp Fri Apr 22 09:07:22 2005 @@ -1565,9 +1565,9 @@ { // start backup can take some time, set timeout high Uint64 old_timeout= handle->read_timeout; if (wait_completed == 2) - handle->read_timeout= 30*60*1000; // 30 minutes + handle->read_timeout= 48*60*60*1000; // 48 hours else if (wait_completed == 1) - handle->read_timeout= 5*60*1000; // 5 minutes + handle->read_timeout= 10*60*1000; // 10 minutes reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args); handle->read_timeout= old_timeout; } --- 1.58/ndb/src/mgmsrv/MgmtSrvr.cpp Tue Apr 12 13:56:22 2005 +++ 1.59/ndb/src/mgmsrv/MgmtSrvr.cpp Fri Apr 22 09:07:22 2005 @@ -791,7 +791,7 @@ result = sendSignal(processId, NO_WAIT, signal, true); } - if (result == -1) { + if (result == -1 && theWaitState != WAIT_NODEFAILURE) { m_stopRec.inUse = false; return SEND_OR_RECEIVE_FAILED; } @@ -1920,6 +1920,7 @@ #ifdef VM_TRACE ndbout_c("I'm not master resending to %d", aNodeId); #endif + theWaitNode= aNodeId; NdbApiSignal aSignal(_ownReference); BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend()); aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ, @@ -1947,6 +1948,7 @@ event.Event = BackupEvent::BackupAborted; event.Aborted.Reason = rep->reason; event.Aborted.BackupId = rep->backupId; + event.Aborted.ErrorCode = rep->reason; backupCallback(event); } break; @@ -2076,6 +2078,13 @@ handleStopReply(nodeId, 0); DBUG_VOID_RETURN; } + + if(theWaitNode == nodeId && + theWaitState != NO_WAIT && theWaitState != WAIT_STOP) + { + theWaitState = WAIT_NODEFAILURE; + NdbCondition_Signal(theMgmtWaitForResponseCondPtr); + } } eventReport(_ownNodeId, theData); @@ -2427,7 +2436,7 @@ int result; if (waitCompleted == 2) { result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED, - signal, true, 30*60*1000 /*30 secs*/); + signal, true, 48*60*60*1000 /* 48 hours */); } else if (waitCompleted == 1) { result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED, @@ -2441,22 +2450,6 @@ } if (waitCompleted){ - switch(m_lastBackupEvent.Event){ - case BackupEvent::BackupCompleted: - backupId = m_lastBackupEvent.Completed.BackupId; - break; - case BackupEvent::BackupStarted: - backupId = m_lastBackupEvent.Started.BackupId; - break; - case BackupEvent::BackupFailedToStart: - return m_lastBackupEvent.FailedToStart.ErrorCode; - case BackupEvent::BackupAborted: - return m_lastBackupEvent.Aborted.ErrorCode; - default: - return -1; - break; - } - } else { switch(m_lastBackupEvent.Event){ case BackupEvent::BackupCompleted: backupId = m_lastBackupEvent.Completed.BackupId; --- 1.24/ndb/src/mgmsrv/MgmtSrvr.hpp Tue Apr 12 13:36:40 2005 +++ 1.25/ndb/src/mgmsrv/MgmtSrvr.hpp Fri Apr 22 09:07:22 2005 @@ -611,7 +611,8 @@ WAIT_STOP, WAIT_BACKUP_STARTED, WAIT_BACKUP_COMPLETED, - WAIT_VERSION + WAIT_VERSION, + WAIT_NODEFAILURE }; /** @@ -695,6 +696,7 @@ NdbApiSignal* theSignalIdleList; // List of unused signals + Uint32 theWaitNode; WaitSignalType theWaitState; // State denoting a set of signals we accept to recieve. --- 1.1/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp Wed Apr 14 10:24:24 2004 +++ 1.2/ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp Fri Apr 22 09:07:23 2005 @@ -108,6 +108,7 @@ return -1; } theWaitState = aWaitState; + theWaitNode = aNodeId; return receiveOptimisedResponse(waitTime); } @@ -119,11 +120,12 @@ theFacade->checkForceSend(_blockNumber); NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime; - while (theWaitState != NO_WAIT && waitTime > 0) { + while (theWaitState != NO_WAIT && theWaitState != WAIT_NODEFAILURE + && waitTime > 0) { NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr, theFacade->theMutexPtr, waitTime); - if(theWaitState == NO_WAIT) + if(theWaitState == NO_WAIT || theWaitState == WAIT_NODEFAILURE) break; waitTime = (maxTime - NdbTick_CurrentMillisecond()); }//while --- 1.9/ndb/test/ndbapi/testBackup.cpp Wed Sep 22 09:27:30 2004 +++ 1.10/ndb/test/ndbapi/testBackup.cpp Fri Apr 22 09:07:23 2005 @@ -74,20 +74,20 @@ if (testMaster) { if (testSlave) { - if (backup.NFMasterAsSlave(restarter) == -1){ + if (backup.NFMasterAsSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } else { - if (backup.NFMaster(restarter) == -1){ + if (backup.NFMaster(restarter) != NDBT_OK){ return NDBT_FAILED; } } } else { - if (backup.NFSlave(restarter) == -1){ + if (backup.NFSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } - + return NDBT_OK; } @@ -108,16 +108,16 @@ if (testMaster) { if (testSlave) { - if (backup.FailMasterAsSlave(restarter) == -1){ + if (backup.FailMasterAsSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } else { - if (backup.FailMaster(restarter) == -1){ + if (backup.FailMaster(restarter) != NDBT_OK){ return NDBT_FAILED; } } } else { - if (backup.FailSlave(restarter) == -1){ + if (backup.FailSlave(restarter) != NDBT_OK){ return NDBT_FAILED; } } --- 1.19/ndb/test/src/NdbBackup.cpp Mon Dec 13 00:48:01 2004 +++ 1.20/ndb/test/src/NdbBackup.cpp Fri Apr 22 09:07:23 2005 @@ -245,6 +245,10 @@ int NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){ { + int nNodes = _restarter.getNumDbNodes(); + if(nNodes == 1) + return NDBT_OK; + int nodeId = _restarter.getMasterNodeId(); CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, @@ -255,15 +259,11 @@ CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); - - NdbSleep_SecSleep(10); } - + CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); - - int nNodes = _restarter.getNumDbNodes(); - + myRandom48Init(NdbTick_CurrentMillisecond()); for(int i = 0; i