MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:February 25 2010 4:19pm
Subject:bzr commit into mysql-5.1-telco-6.3 branch (jonas:3104) Bug#51512
View as plain text  
#At file:///home/jonas/src/telco-6.3/ based on revid:jonas@stripped

 3104 Jonas Oreland	2010-02-25
      ndb - bug#51512 - fix rare GCP stop due to endless 1220

    modified:
      storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2010-02-15 11:50:39 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2010-02-25 16:18:56 +0000
@@ -2273,7 +2273,7 @@ private:
   void setLogTail(Signal* signal, Uint32 keepGci);
   Uint32 remainingLogSize(const LogFileRecordPtr &sltCurrLogFilePtr,
 			  const LogPartRecordPtr &sltLogPartPtr);
-  void checkGcpCompleted(Signal* signal, Uint32 pageWritten, Uint32 wordWritten);
+  bool checkGcpCompleted(Signal* signal, Uint32 pageWritten, Uint32 wordWritten);
   void initFsopenconf(Signal* signal);
   void initFsrwconf(Signal* signal, bool write);
   void initLfo(Signal* signal);
@@ -2527,7 +2527,7 @@ private:
   void execLogComp_extra_files_closed(Signal* signal);
   void closeWriteLogLab(Signal* signal);
   void closeExecLogLab(Signal* signal);
-  void writePageZeroLab(Signal* signal);
+  void writePageZeroLab(Signal* signal, Uint32 from);
   void lastWriteInFileLab(Signal* signal);
   void initWriteEndLab(Signal* signal);
   void initFirstPageLab(Signal* signal);

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2010-02-04 21:12:45 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2010-02-25 16:18:56 +0000
@@ -436,6 +436,7 @@ void Dblqh::execCONTINUEB(Signal* signal
     gcpPtr.i = 0;
     ptrAss(gcpPtr, gcpRecord);
     initGcpRecLab(signal);
+    startTimeSupervision(signal);
     return;
     break;
   case ZCHECK_LCP_STOP_BLOCKED:
@@ -12968,16 +12969,19 @@ void Dblqh::initGcpRecLab(Signal* signal
 /*      DISK WRITE.                                                          */
 /*                                                                           */
 /*       SUBROUTINE SHORT NAME = CGC                                         */
+/* return: true if gcp was completed */
 /* ========================================================================= */
-void Dblqh::checkGcpCompleted(Signal* signal,
-                              Uint32 tcgcPageWritten,
-                              Uint32 tcgcWordWritten) 
+bool
+Dblqh::checkGcpCompleted(Signal* signal,
+                         Uint32 tcgcPageWritten,
+                         Uint32 tcgcWordWritten) 
 {
   UintR tcgcFlag;
   UintR tcgcJ;
 
   gcpPtr.i = logPartPtr.p->gcprec;
-  if (gcpPtr.i != RNIL) {
+  if (gcpPtr.i != RNIL)
+  {
     jam();
 /* ------------------------------------------------------------------------- */
 /* IF THE GLOBAL CHECKPOINT IS NOT WAITING FOR COMPLETION THEN WE CAN QUIT   */
@@ -12994,7 +12998,7 @@ void Dblqh::checkGcpCompleted(Signal* si
 /* ------------------------------------------------------------------------- */
 /* THIS LOG PART HAVE NOT YET WRITTEN THE GLOBAL CHECKPOINT TO DISK.         */
 /* ------------------------------------------------------------------------- */
-        return;
+        return false;
       } else {
         if (tcgcPageWritten == gcpPtr.p->gcpPageNo[logPartPtr.i]) {
           if (tcgcWordWritten < gcpPtr.p->gcpWordNo[logPartPtr.i]) {
@@ -13002,7 +13006,7 @@ void Dblqh::checkGcpCompleted(Signal* si
 /* ------------------------------------------------------------------------- */
 /* THIS LOG PART HAVE NOT YET WRITTEN THE GLOBAL CHECKPOINT TO DISK.         */
 /* ------------------------------------------------------------------------- */
-            return;
+            return false;
           }//if
         }//if
       }//if
@@ -13012,7 +13016,8 @@ void Dblqh::checkGcpCompleted(Signal* si
       logPartPtr.p->gcprec = RNIL;
       gcpPtr.p->gcpLogPartState[logPartPtr.i] = ZON_DISK;
       tcgcFlag = ZTRUE;
-      for (tcgcJ = 0; tcgcJ <= 3; tcgcJ++) {
+      for (tcgcJ = 0; tcgcJ <= 3; tcgcJ++)
+      {
         jam();
         if (gcpPtr.p->gcpLogPartState[tcgcJ] != ZON_DISK) {
           jam();
@@ -13023,7 +13028,13 @@ void Dblqh::checkGcpCompleted(Signal* si
           tcgcFlag = ZFALSE;
         }//if
       }//for
-      if (tcgcFlag == ZTRUE) {
+      if (tcgcFlag == ZFALSE)
+      {
+        return false;
+      }
+
+      if (tcgcFlag == ZTRUE)
+      {
         jam();
 /* ------------------------------------------------------------------------- */
 /*WE HAVE FOUND A COMPLETED GLOBAL CHECKPOINT OPERATION. WE NOW NEED TO SEND */
@@ -13054,10 +13065,11 @@ void Dblqh::checkGcpCompleted(Signal* si
             execFSSYNCCONF(signal);
           }//if
         }//for
-        return;
       }//if
     }//if
+    return true;
   }//if
+  return false;
 }//Dblqh::checkGcpCompleted()
 
 void
@@ -13405,7 +13417,8 @@ void Dblqh::execFSWRITECONF(Signal* sign
     return;
   case LogFileOperationRecord::WRITE_PAGE_ZERO:
     jam();
-    writePageZeroLab(signal);
+    writePageZeroLab(signal, __LINE__);
+    releaseLfo(signal);
     return;
   case LogFileOperationRecord::LAST_WRITE_IN_FILE:
     jam();
@@ -13782,7 +13795,8 @@ void Dblqh::firstPageWriteLab(Signal* si
 /*---------------------------------------------------------------------------*/
 /* IF THE NEW FILE WAS 0 THEN WE HAVE ALREADY WRITTEN PAGE ZERO IN FILE 0.   */
 /*---------------------------------------------------------------------------*/
-      logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
+      // use writePageZeroLab to make sure that same code as normal is run
+      writePageZeroLab(signal, __LINE__);
       return;
     } else {
       jam();
@@ -13871,7 +13885,8 @@ void Dblqh::lastWriteInFileLab(Signal* s
 /*---------------------------------------------------------------------------*/
 /* IF THE NEW FILE WAS 0 THEN WE HAVE ALREADY WRITTEN PAGE ZERO IN FILE 0.   */
 /*---------------------------------------------------------------------------*/
-      logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
+      // use writePageZeroLab to make sure that same code as normal is run
+      writePageZeroLab(signal, __LINE__);
       return;
     } else {
       jam();
@@ -13897,7 +13912,7 @@ void Dblqh::lastWriteInFileLab(Signal* s
   }//if
 }//Dblqh::lastWriteInFileLab()
 
-void Dblqh::writePageZeroLab(Signal* signal) 
+void Dblqh::writePageZeroLab(Signal* signal, Uint32 from) 
 {
   if (logPartPtr.p->logPartState == LogPartRecord::FILE_CHANGE_PROBLEM) 
   {
@@ -13914,15 +13929,23 @@ void Dblqh::writePageZeroLab(Signal* sig
   }
   
   logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
+
 /*---------------------------------------------------------------------------*/
 /* IT COULD HAVE ARRIVED PAGE WRITES TO THE CURRENT FILE WHILE WE WERE       */
 /* WAITING FOR THIS DISK WRITE TO COMPLETE. THEY COULD NOT CHECK FOR         */
 /* COMPLETED GLOBAL CHECKPOINTS. THUS WE SHOULD DO THAT NOW INSTEAD.         */
 /*---------------------------------------------------------------------------*/
-  checkGcpCompleted(signal,
-                    logFilePtr.p->lastPageWritten,
-                    logFilePtr.p->lastWordWritten);
-  releaseLfo(signal);
+  bool res = checkGcpCompleted(signal,
+                               logFilePtr.p->lastPageWritten,
+                               logFilePtr.p->lastWordWritten);
+  if (res && false)
+  {
+    gcpPtr.i = ccurrentGcprec;
+    ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
+    
+    infoEvent("KESO completing GCP %u in writePageZeroLab from %u", 
+              gcpPtr.p->gcpId, from);
+  }
   return;
 }//Dblqh::writePageZeroLab()
 
@@ -19928,11 +19951,15 @@ void Dblqh::writeNextLog(Signal* signal)
     force_lcp(signal);
   }
 
-  if (free_mb <= c_free_mb_tail_problem_limit)
+  if (logPartPtr.p->logPartState == LogPartRecord::ACTIVE ||
+      logPartPtr.p->logPartState == LogPartRecord::IDLE)
   {
-    jam();
-    logPartPtr.p->logPartState = LogPartRecord::TAIL_PROBLEM;
-  }//if
+    if (free_mb <= c_free_mb_tail_problem_limit)
+    {
+      jam();
+      logPartPtr.p->logPartState = LogPartRecord::TAIL_PROBLEM;
+    }
+  }
 }//Dblqh::writeNextLog()
 
 bool
@@ -20365,26 +20392,28 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
   {
     jam();
     Uint32 i;
+    void * logPartPtr;
     GcpRecordPtr gcp; gcp.i = RNIL;
     for(i = 0; i<4; i++)
     {
-      logPartPtr.i = i;
-      ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
+      Ptr<LogPartRecord> lp;
+      lp.i = i;
+      ptrCheckGuard(lp, clogPartFileSize, logPartRecord);
       ndbout_c("LP %d state: %d WW_Gci: %d gcprec: %d flq: %d currfile: %d tailFileNo: %d logTailMbyte: %d", 
 	       i,
-	       logPartPtr.p->logPartState,
-	       logPartPtr.p->waitWriteGciLog,
-	       logPartPtr.p->gcprec,
-	       logPartPtr.p->firstLogQueue,
-	       logPartPtr.p->currentLogfile,
-	       logPartPtr.p->logTailFileNo,
-	       logPartPtr.p->logTailMbyte);
+	       lp.p->logPartState,
+	       lp.p->waitWriteGciLog,
+	       lp.p->gcprec,
+	       lp.p->firstLogQueue,
+	       lp.p->currentLogfile,
+	       lp.p->logTailFileNo,
+	       lp.p->logTailMbyte);
       
-      if(gcp.i == RNIL && logPartPtr.p->gcprec != RNIL)
-	gcp.i = logPartPtr.p->gcprec;
+      if(gcp.i == RNIL && lp.p->gcprec != RNIL)
+	gcp.i = lp.p->gcprec;
 
       LogFileRecordPtr logFilePtr;
-      Uint32 first= logFilePtr.i= logPartPtr.p->firstLogfile;
+      Uint32 first= logFilePtr.i= lp.p->firstLogfile;
       do
       {
 	ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20100225161856-lnwy7a3qucmn474j.bundle
Thread
bzr commit into mysql-5.1-telco-6.3 branch (jonas:3104) Bug#51512Jonas Oreland25 Feb