List:Commits« Previous MessageNext Message »
From:Stewart Smith Date:June 13 2006 2:21pm
Subject:bk commit into 5.1 tree (stewart:1.2186) BUG#17928
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of stewart. When stewart does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2186 06/06/14 00:21:46 stewart@stripped +2 -0
  BUG#17928 testBackup fails in error handling testcases
  
  Reproduced failure of NFMaster on my laptop, this is the fix with some added
  debug information to help people in the future when they trigger asserts in
  AsyncFile (ndbfs helper threads).

  storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp
    1.32 06/06/14 00:21:39 stewart@stripped +7 -1
    It turns out that any asserts in AsyncFile (the thread that NDBFS runs to do 
    its bidding) don't cause anything to be written out anywhere and you're left 
    scratching your head as to what on earth happenned (apart from getting 
    "caught signal 6, aborted"). What really was happenning was we were then 
    calling FSCLOSEREQ one too many times, hitting the assert on trying to close
    an fd of -1 in AsyncFile.
    
    Added DEBUG printouts for every assert in AsyncFile

  storage/ndb/src/kernel/blocks/backup/Backup.cpp
    1.47 06/06/14 00:21:39 stewart@stripped +34 -17
    In Backup::checkFile(Signal*, BackupFilePtr), only send FSCLOSEREQ if file
    is not already being closed.
    
    Add debug printouts if DEBUG_ABORT is defined to help in finding the problem.
    
    Only set filePtr.p->fileClosing when we are actually closing the file, not when
    we're anticipating a close.
    
    In Backup::closeFiles(Signal*,BackupRecordPtr), when we're closing a file,
    make sure we've queued everything to be written out before sending FSCLOSEREQ.
    
    This solves two problems:
    - in testBackup (NFMaster) on my machine (but not in autotest since the end of
    March for whatever reason), we were hitting an assert in the buffer for files
    saying we hadn't written everything out of the buffer before closing. (for the
    interested, it was 10 bytes of data)
    - once I'd fixed the above (by the checkFile before close) I'd then get really
    nonsensical trace dumps in NFMaster for ERROR_INSERT 10003. It turns out that
    any asserts in AsyncFile (the thread that NDBFS runs to do its bidding) don't
    cause anything to be written out anywhere and you're left scratching your head
    as to what on earth happenned (apart from getting "caught signal 6, aborted").
    What really was happenning was we were then calling FSCLOSEREQ one too many times,
    hitting the assert on trying to close an fd of -1 in AsyncFile.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	stewart
# Host:	willster.(none)
# Root:	/home/stewart/Documents/MySQL/5.1/bug17928

--- 1.46/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2006-06-08 20:40:29 +10:00
+++ 1.47/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2006-06-14 00:21:39 +10:00
@@ -3819,19 +3819,37 @@
 	       FsAppendReq::SignalLength, JBA);
     return;
   }//if
-  
-  filePtr.p->fileRunning = 0;
-  filePtr.p->fileClosing = 1;
-  
-  FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
-  req->filePointer = filePtr.p->filePointer;
-  req->userPointer = filePtr.i;
-  req->userReference = reference();
-  req->fileFlag = 0;
+
 #ifdef DEBUG_ABORT
-  ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i);
+  Uint32 running= filePtr.p->fileRunning;
+  Uint32 closing= filePtr.p->fileClosing;
 #endif
-  sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
+
+  if(!filePtr.p->fileClosing)
+  {
+    filePtr.p->fileRunning = 0;
+    filePtr.p->fileClosing = 1;
+
+    FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
+    req->filePointer = filePtr.p->filePointer;
+    req->userPointer = filePtr.i;
+    req->userReference = reference();
+    req->fileFlag = 0;
+#ifdef DEBUG_ABORT
+    ndbout_c("***** a FSCLOSEREQ filePtr.i = %u run=%d cl=%d", filePtr.i,
+             running, closing);
+#endif
+    sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
+  }
+  else
+  {
+#ifdef DEBUG_ABORT
+    ndbout_c("***** a NOT SENDING FSCLOSEREQ filePtr.i = %u run=%d cl=%d",
+             filePtr.i,
+             running, closing);
+#endif
+
+  }
 }
 
 
@@ -4082,9 +4100,7 @@
       jam();
       continue;
     }//if
-    
-    filePtr.p->fileClosing = 1;
-    
+
     if(filePtr.p->fileRunning == 1){
       jam();
 #ifdef DEBUG_ABORT
@@ -4093,7 +4109,10 @@
       filePtr.p->operation.dataBuffer.eof();
     } else {
       jam();
-      
+      filePtr.p->fileClosing = 1;
+      filePtr.p->operation.dataBuffer.eof();
+      checkFile(sig, filePtr); // make sure we write everything before closing
+
       FsCloseReq * req = (FsCloseReq *)sig->getDataPtrSend();
       req->filePointer = filePtr.p->filePointer;
       req->userPointer = filePtr.i;
@@ -4555,7 +4574,6 @@
   jam();
   BackupFilePtr filePtr;
   c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
-  filePtr.p->fileClosing = 1;
   filePtr.p->operation.dataBuffer.eof();
 }
 
@@ -4647,7 +4665,6 @@
   
     BackupFilePtr filePtr;
     c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
-    filePtr.p->fileClosing = 1;
     filePtr.p->operation.dataBuffer.eof();
     return;
   } 

--- 1.31/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp	2006-04-18 23:21:36 +10:00
+++ 1.32/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp	2006-06-14 00:21:39 +10:00
@@ -228,6 +228,7 @@
       endReq();
       return;
     default:
+      DEBUG(ndbout_c("Invalid Request"));
       abort();
       break;
     }//switch
@@ -676,6 +677,7 @@
   return 0;
 #else
   request = request;
+  DEBUG(ndbout_c("no pwrite"));
   abort();
   return -1;
 #endif
@@ -792,6 +794,7 @@
       bytes_written = return_value;
 
       if(bytes_written == 0){
+        DEBUG(ndbout_c("no bytes written"));
 	abort();
       }
       
@@ -830,8 +833,10 @@
 #else
   if (-1 == ::close(theFd)) {
 #ifndef DBUG_OFF
-    if (theFd == -1)
+    if (theFd == -1) {
+      DEBUG(ndbout_c("close on fd = -1"));
       abort();
+    }
 #endif
     request->error = errno;
   }
@@ -899,6 +904,7 @@
       return;
     }
     if(n == 0){
+      DEBUG(ndbout_c("append with n=0"));
       abort();
     }
     size -= n;
Thread
bk commit into 5.1 tree (stewart:1.2186) BUG#17928Stewart Smith13 Jun