List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:December 7 2009 11:00am
Subject:bzr commit into mysql-5.1-telco-6.3 branch (jonas:3180) Bug#36500
View as plain text  
#At file:///home/jonas/src/telco-6.3/ based on revid:jonas@stripped

 3180 Jonas Oreland	2009-12-07
      ndb - bug#36500 - 1) force lcp from LQH if redo gets too full 2) try to handle case where tail does not move in setLogTail by changing head forwardquilt top

    modified:
      storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
      storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2009-11-05 20:46:28 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2009-12-07 11:00:33 +0000
@@ -15765,8 +15765,20 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal
     }
   }
 
-  if(arg == DumpStateOrd::DihStartLcpImmediately){
+  if (arg == DumpStateOrd::DihStartLcpImmediately)
+  {
+    jam();
     add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
+
+    /**
+     * If sent from local LQH, forward to master
+     */
+    if (cmasterNodeId != getOwnNodeId() &&
+        refToMain(signal->getSendersBlockRef()) == DBLQH)
+    {
+      jam();
+      sendSignal(cmasterdihref, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+    }
     return;
   }
 

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2009-11-12 13:04:18 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2009-12-07 11:00:33 +0000
@@ -2163,6 +2163,7 @@ private:
   void execTRANSID_AI(Signal* signal);
   void execINCL_NODEREQ(Signal* signal);
 
+  void force_lcp(Signal* signal);
   void execLCP_FRAG_ORD(Signal* signal);
   void execEMPTY_LCP_REQ(Signal* signal);
   
@@ -2754,6 +2755,9 @@ private:
 // ------------------------------------------------------------------------
   Uint32 c_lcpId;
   Uint32 cnoOfFragsCheckpointed;
+  Uint32 c_last_force_lcp_time;
+  Uint32 c_free_mb_force_lcp_limit; // Force lcp when less than this free mb
+  Uint32 c_free_mb_tail_problem_limit; // Set TAIL_PROBLEM when less than this..
 
 /* ------------------------------------------------------------------------- */
 // cmaxWordsAtNodeRec keeps track of how many words that currently are

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2009-11-12 13:04:18 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2009-12-07 11:00:33 +0000
@@ -71,6 +71,10 @@ void Dblqh::initData() 
   for (Uint32 i = 0; i < 1024; i++) {
     ctransidHash[i] = RNIL;
   }//for
+
+  c_last_force_lcp_time = 0;
+  c_free_mb_force_lcp_limit = 16;
+  c_free_mb_tail_problem_limit = 4;
 }//Dblqh::initData()
 
 void Dblqh::initRecords() 

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2009-11-30 09:35:32 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2009-12-07 11:00:33 +0000
@@ -1179,6 +1179,13 @@ void Dblqh::execREAD_CONFIG_REQ(Signal* 
     ndbrequire(cmaxLogFilesInPageZero);
   }
 
+  Uint64 totalmb = Uint64(cnoLogFiles) * Uint64(clogFileSize);
+  Uint64 limit = totalmb / 3;
+  ndbrequire(limit < Uint64(0xFFFFFFFF));
+  c_free_mb_force_lcp_limit = limit; // If less than 33% of REDO free, force LCP
+  c_free_mb_tail_problem_limit = 4;  // If less than 4Mb set TAIL_PROBLEM
+
+
   ndb_mgm_get_int_parameter(p, CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, 
                             &cTransactionDeadlockDetectionTimeout);
   
@@ -11759,11 +11766,27 @@ template class Vector<TraceLCP::Sig>;
 #else
 #endif
 
+void
+Dblqh::force_lcp(Signal* signal)
+{
+  if (cLqhTimeOutCount == c_last_force_lcp_time)
+  {
+    jam();
+    return;
+  }
+
+  c_last_force_lcp_time = cLqhTimeOutCount;
+  signal->theData[0] = 7099;
+  sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
+}
+
 void Dblqh::execLCP_FRAG_ORD(Signal* signal)
 {
   jamEntry();
   CRASH_INSERTION(5010);
-  LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
+
+  LcpFragOrd lcpFragOrdCopy = * (LcpFragOrd *)&signal->theData[0];
+  LcpFragOrd * lcpFragOrd = &lcpFragOrdCopy;
 
   Uint32 lcpId = lcpFragOrd->lcpId;
 
@@ -12351,6 +12374,8 @@ void Dblqh::setLogTail(Signal* signal, U
 
   for (sltLogPartPtr.i = 0; sltLogPartPtr.i < 4; sltLogPartPtr.i++) {
     jam();
+    bool TchangeMB = false;
+retry:
     ptrAss(sltLogPartPtr, logPartRecord);
     findLogfile(signal, sltLogPartPtr.p->logTailFileNo,
                 sltLogPartPtr, &sltLogFilePtr);
@@ -12437,33 +12462,125 @@ void Dblqh::setLogTail(Signal* signal, U
       UintR ToldTailFileNo = sltLogPartPtr.p->logTailFileNo;
       UintR ToldTailMByte = sltLogPartPtr.p->logTailMbyte;
 
-      arrGuard(tsltMbyte, clogFileSize);
-      sltLogPartPtr.p->logTailFileNo = 
-         sltLogFilePtr.p->logLastPrepRef[tsltMbyte] >> 16;
 /* ------------------------------------------------------------------------- */
 /*SINCE LOG_MAX_GCI_STARTED ONLY KEEP TRACK OF COMMIT LOG RECORDS WE ALSO    */
 /*HAVE TO STEP BACK THE TAIL SO THAT WE INCLUDE ALL PREPARE RECORDS          */
 /*NEEDED FOR THOSE COMMIT RECORDS IN THIS MBYTE. THIS IS A RATHER            */
 /*CONSERVATIVE APPROACH BUT IT WORKS.                                        */
 /* ------------------------------------------------------------------------- */
+      arrGuard(tsltMbyte, clogFileSize);
+      sltLogPartPtr.p->logTailFileNo =
+        sltLogFilePtr.p->logLastPrepRef[tsltMbyte] >> 16;
       sltLogPartPtr.p->logTailMbyte = 
         sltLogFilePtr.p->logLastPrepRef[tsltMbyte] & 65535;
-      if ((ToldTailFileNo != sltLogPartPtr.p->logTailFileNo) ||
-          (ToldTailMByte != sltLogPartPtr.p->logTailMbyte)) {
+
+      bool tailmoved = !(ToldTailFileNo == sltLogPartPtr.p->logTailFileNo &&
+                         ToldTailMByte == sltLogPartPtr.p->logTailMbyte);
+
+      LogFileRecordPtr tmpfile;
+      tmpfile.i = sltLogPartPtr.p->currentLogfile;
+      ptrCheckGuard(tmpfile, clogFileFileSize, logFileRecord);
+
+      LogPosition head = { tmpfile.p->fileNo, tmpfile.p->currentMbyte };
+      LogPosition tail = { sltLogPartPtr.p->logTailFileNo,
+                           sltLogPartPtr.p->logTailMbyte};
+      Uint64 mb = free_log(head, tail, sltLogPartPtr.p->noLogFiles,
+                           clogFileSize);
+
+      if (mb <= c_free_mb_force_lcp_limit)
+      {
+        /**
+         * Force a new LCP
+         */
+        force_lcp(signal);
+      }
+
+      if (tailmoved && mb > c_free_mb_tail_problem_limit)
+      {
         jam();
-        if (sltLogPartPtr.p->logPartState == LogPartRecord::TAIL_PROBLEM) {
-          if (sltLogPartPtr.p->firstLogQueue == RNIL) {
+        if (sltLogPartPtr.p->logPartState == LogPartRecord::TAIL_PROBLEM)
+        {
+          if (sltLogPartPtr.p->firstLogQueue == RNIL)
+          {
             jam();
             sltLogPartPtr.p->logPartState = LogPartRecord::IDLE;
-          } else {
+          }
+          else
+          {
             jam();
             sltLogPartPtr.p->logPartState = LogPartRecord::ACTIVE;
-          }//if
-        }//if
-      }//if
+          }
+        }
+      }
+      else if (!tailmoved && mb <= c_free_mb_force_lcp_limit)
+      {
+        jam();
+        /**
+         * Tail didn't move...and we forced a new LCP
+         *   This could be as currentMb, contains backreferences making it
+         *   Check if changing mb forward will help situation
+         */
+        if (mb < 2)
+        {
+          /**
+           * 0 or 1 mb free, no point in trying to changeMbyte forward...
+           */
+          jam();
+          goto next;
+        }
+
+        if (TchangeMB)
+        {
+          jam();
+          /**
+           * We already did move forward...
+           */
+          goto next;
+        }
+
+        TcConnectionrecPtr tmp;
+        tmp.i = sltLogPartPtr.p->firstLogTcrec;
+        if (tmp.i != RNIL)
+        {
+          jam();
+          ptrCheckGuard(tmp, ctcConnectrecFileSize, tcConnectionrec);
+          Uint32 fileNo = tmp.p->logStartFileNo;
+          Uint32 mbyte = tmp.p->logStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE;
+
+          if (fileNo == sltLogPartPtr.p->logTailFileNo &&
+              mbyte == sltLogPartPtr.p->logTailMbyte)
+          {
+            jam();
+            /**
+             * An uncommitted operation...still pending...
+             *   with back-reference to tail...not much to do
+             *   (theoretically we could rewrite log-entry here...
+             *    but this is for future)
+             * skip to next
+             */
+            goto next;
+          }
+        }
+
+        {
+          /**
+           * Try forcing a changeMbyte
+           */
+          jam();
+          logPartPtr = sltLogPartPtr;
+          logFilePtr.i = logPartPtr.p->currentLogfile;
+          ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
+          logPagePtr.i = logFilePtr.p->currentLogpage;
+          ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
+          changeMbyte(signal);
+          TchangeMB = true; // don't try this twice...
+          goto retry;
+        }
+      }
     }
+next:
+    (void)1;
   }//for
-
 }//Dblqh::setLogTail()
 
 /* ######################################################################### */
@@ -19598,8 +19715,6 @@ void Dblqh::writeNextLog(Signal* signal)
 /*       CAN INVOKE THIS SYSTEM CRASH. HOWEVER ONLY   */
 /*       VERY SERIOUS TIMING PROBLEMS.                */
 /* -------------------------------------------------- */
-      signal->theData[0] = 2398;
-      execDUMP_STATE_ORD(signal);
       char buf[100];
       BaseString::snprintf(buf, sizeof(buf), 
                            "Head/Tail met in REDO log, logpart: %u"
@@ -19608,6 +19723,9 @@ void Dblqh::writeNextLog(Signal* signal)
                            logFilePtr.p->fileNo,
                            logFilePtr.p->currentMbyte);
 
+
+      signal->theData[0] = 2398;
+      execDUMP_STATE_ORD(signal);
       progError(__LINE__, NDBD_EXIT_NO_MORE_REDOLOG, buf);
       systemError(signal, __LINE__);
     }//if
@@ -19628,7 +19746,14 @@ void Dblqh::writeNextLog(Signal* signal)
 
   LogPosition head = { twnlNextFileNo, twnlNextMbyte };
   LogPosition tail = { logPartPtr.p->logTailFileNo, logPartPtr.p->logTailMbyte};
-  if (free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize) <= 4)
+  Uint32 free_mb = free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize);
+  if (free_mb <= c_free_mb_force_lcp_limit)
+  {
+    jam();
+    force_lcp(signal);
+  }
+
+  if (free_mb <= c_free_mb_tail_problem_limit)
   {
     jam();
     logPartPtr.p->logPartState = LogPartRecord::TAIL_PROBLEM;


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20091207110033-zdkhafm6lsgroaix.bundle
Thread
bzr commit into mysql-5.1-telco-6.3 branch (jonas:3180) Bug#36500Jonas Oreland7 Dec