List:Commits« Previous MessageNext Message »
From:Stewart Smith Date:July 30 2007 1:58am
Subject:Re: bk commit into 5.0 tree (lzhou:1.2477) BUG#28647
View as plain text  
On Mon, 2007-07-23 at 15:25 +0000, lzhou@stripped wrote:
> --- 1.6/ndb/include/kernel/signaldata/BackupSignalData.hpp	2007-07-23 15:25:21
> +00:00
> +++ 1.7/ndb/include/kernel/signaldata/BackupSignalData.hpp	2007-07-23 15:25:21
> +00:00
> @@ -247,6 +247,7 @@ public:
>      
>      ,AbortScan = 1328
>      ,IncompatibleVersions = 1329
> +    ,NdbdExitAfsDiskFull = 2810
>    };
>  private:
>    Uint32 requestType;

Looks like we don't need this any more (reusing FS errors)

> --- 1.38/ndb/src/kernel/blocks/backup/Backup.cpp	2007-07-23 15:25:21 +00:00
> +++ 1.39/ndb/src/kernel/blocks/backup/Backup.cpp	2007-07-23 15:25:21 +00:00
> @@ -2261,7 +2261,7 @@ Backup::execSTOP_BACKUP_REF(Signal* sign
>    StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr();
>    const Uint32 ptrI = ref->backupPtr;
>    //const Uint32 backupId = ref->backupId;
> -  const Uint32 nodeId = ref->nodeId;
> +  const Uint32 nodeId = refToNode(signal->senderBlockRef());
>    
>    BackupRecordPtr ptr LINT_SET_PTR;
>    c_backupPool.getPtr(ptr, ptrI);

looks unneeded.

> @@ -3931,7 +3931,31 @@ Backup::execFSAPPENDREF(Signal* signal)
>    filePtr.p->fileRunning = 0;  
>    filePtr.p->errorCode = errCode;
>  
> -  checkFile(signal, filePtr);
> +  if(errCode != 0)
> +  {
> +    BackupRecordPtr ptr LINT_SET_PTR;
> +    c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
> +    ptr.p->setErrorCode(errCode);
> +    
> +    if(ptr.p->m_gsn == GSN_STOP_BACKUP_REQ &&
> !filePtr.p->fileClosing)
> +    {
> +      jam();
> +
> +      filePtr.p->fileClosing = 1;
> +
> +      FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
> +      req->filePointer = filePtr.p->filePointer;
> +      req->userPointer = filePtr.i;
> +      req->userReference = reference();
> +      req->fileFlag = 0;
> +#ifdef DEBUG_ABORT
> +      ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i);
> +#endif

This should no doubt be a different line as we're not in FSCLOSEREQ.
(having DEBUG_ABORT enabled is useful when tracking down NF problems in
BACKUP)

> +      sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);

do we go and close the rest of the files on FSCLOSECONF? or could this
leak files?

> +    }
> +  }
> +  else
> +    checkFile(signal, filePtr);
>  }
>  
>  void
> @@ -4224,6 +4248,14 @@ Backup::execSTOP_BACKUP_REQ(Signal* sign
>    ptr.p->slaveState.setState(STOPPING);
>    ptr.p->m_gsn = GSN_STOP_BACKUP_REQ;
>  
> +  if(ptr.p->checkError())
> +  {
> +    jam();
> +    ptr.p->errorCode = 0;
> +    closeFiles(signal, ptr);
> +    return;
> +  }
> +  

this looks like it's discarding the error... is it?

(also, it looks like there's some trailing whitespace there)

>    /**
>     * Insert footers
>     */
> @@ -4298,8 +4330,6 @@ Backup::closeFiles(Signal* sig, BackupRe
>        continue;
>      }//if
>      
> -    filePtr.p->fileClosing = 1;
> -    

why the changing logic?

(again, trailing whitespace)

>      if(filePtr.p->fileRunning == 1){
>        jam();
>  #ifdef DEBUG_ABORT
> @@ -4308,7 +4338,8 @@ Backup::closeFiles(Signal* sig, BackupRe
>        filePtr.p->operation.dataBuffer.eof();
>      } else {
>        jam();
> -      
> +
> +      filePtr.p->fileClosing = 1; 

trailing whitespace.

>        FsCloseReq * req = (FsCloseReq *)sig->getDataPtrSend();
>        req->filePointer = filePtr.p->filePointer;
>        req->userPointer = filePtr.i;
> @@ -4392,10 +4423,24 @@ Backup::closeFilesDone(Signal* signal, B
>  {
>    jam();
>    
> -  jam();

even though obvious cleanup, fix in sep patch. (just comment it "remove
duplicate jam()" and push)

>    BackupFilePtr filePtr;
>    ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
>    
> +  //error when do insert footer or close file
> +  if(ptr.p->checkError())
> +  {
> +    StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr();
> +    ref->backupPtr = ptr.i;
> +    ref->backupId = ptr.p->backupId;
> +    ref->errorCode = AbortBackupOrd::NdbdExitAfsDiskFull;

why hardcoding this error?

> +    sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_REF, signal,
> +             StopBackupConf::SignalLength, JBB);
> +
> +    ptr.p->m_gsn = GSN_STOP_BACKUP_REF;
> +    ptr.p->slaveState.setState(CLEANING);
> +    return;
> +  }
> +

shouldn't we continue to clean up?

>    StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
>    conf->backupId = ptr.p->backupId;
>    conf->backupPtr = ptr.i;
> 
> --- 1.56/ndb/src/ndbapi/ndberror.c	2007-07-23 15:25:21 +00:00
> +++ 1.57/ndb/src/ndbapi/ndberror.c	2007-07-23 15:25:21 +00:00
> @@ -525,7 +525,10 @@ ErrorBundle ErrorCodes[] = {
>    { 4270, IE, "Unknown blob error" },
>    { 4335, AE, "Only one autoincrement column allowed per table. Having a table
> without primary key uses an autoincremented hidden key, i.e. a table without a primary key
> can not have an autoincremented column" },
>    { 4271, AE, "Invalid index object, not retrieved via getIndex()" },
> -  { 4275, AE, "The blob method is incompatible with operation type or lock mode" }
> +  { 4275, AE, "The blob method is incompatible with operation type or lock mode" },
> +  
> +  { 2810, TR, "No space left on the device" },
> +  { 2815, TR, "Error in reading files, please check file system" }
>  };
>  
>  static

doesn't look like we need these either.
-- 
Stewart Smith (stewart@stripped)
http://www.flamingspork.com/


Attachment: [application/pgp-signature] This is a digitally signed message part signature.asc
Thread
bk commit into 5.0 tree (lzhou:1.2477) BUG#28647lzhou23 Jul
  • Re: bk commit into 5.0 tree (lzhou:1.2477) BUG#28647Stewart Smith30 Jul