MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:August 18 2009 7:01am
Subject:bzr push into mysql-5.1-telco-6.3 branch (jonas:3009 to 3010) Bug#46412
View as plain text  
 3010 Jonas Oreland	2009-08-18
      ndb - bug#46412
        Fix/handle incorrectly set lcp-bits during system restart

    modified:
      storage/ndb/src/kernel/blocks/ERROR_codes.txt
      storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
      storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
      storage/ndb/test/ndbapi/testSystemRestart.cpp
      storage/ndb/test/run-test/daily-basic-tests.txt
 3009 Jonas Oreland	2009-08-17
      ndb - bug#46651 - fix handling of VAR_PART-bit

    modified:
      storage/ndb/src/kernel/blocks/dbtup/DbtupAbort.cpp
      storage/ndb/src/kernel/blocks/dbtup/DbtupCommit.cpp
      storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
      storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp
      storage/ndb/test/ndbapi/testSystemRestart.cpp
      storage/ndb/test/run-test/daily-basic-tests.txt
=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2009-05-26 04:26:02 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2009-08-18 06:57:20 +0000
@@ -5,7 +5,7 @@ Next DBACC 3002
 Next DBTUP 4029
 Next DBLQH 5054
 Next DBDICT 6008
-Next DBDIH 7215
+Next DBDIH 7220
 Next DBTC 8074
 Next CMVMI 9000
 Next BACKUP 10041

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2009-08-18 06:57:20 +0000
@@ -889,7 +889,7 @@ private:
   void calculateHotSpare();
   void checkEscalation();
   void clearRestartInfoBits(Signal *);
-  void invalidateLcpInfoAfterSr();
+  void invalidateLcpInfoAfterSr(Signal*);
 
   bool isMaster();
   bool isActiveMaster();
@@ -914,7 +914,7 @@ private:
   void setNodeGroups();
   void setNodeInfo(Signal *);
   void setNodeLcpActiveStatus();
-  void setNodeRestartInfoBits();
+  void setNodeRestartInfoBits(Signal*);
   void startGcp(Signal *);
   void startGcpMonitor(Signal*);
 

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2009-05-28 06:03:13 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2009-08-18 06:57:20 +0000
@@ -782,7 +782,7 @@ done:  
       //  IF THERE WAS A LOCAL CHECKPOINT ONGOING AT THE CRASH MOMENT WE WILL
       //    INVALIDATE THAT LOCAL CHECKPOINT.
       /* -------------------------------------------------------------------- */
-      invalidateLcpInfoAfterSr();
+      invalidateLcpInfoAfterSr(signal);
     }//if
 
     if (m_micro_gcp.m_enabled == false && 
@@ -4617,7 +4617,7 @@ void Dbdih::execNODE_FAILREP(Signal* sig
   
   if (isMaster()) {
     jam();
-    setNodeRestartInfoBits();
+    setNodeRestartInfoBits(signal);
   }//if
 }//Dbdih::execNODE_FAILREP()
 
@@ -4906,7 +4906,7 @@ void Dbdih::failedNodeLcpHandling(Signal
       break;
     case Sysfile::NS_ActiveMissed_1:
       jam();
-      failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
+      failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
       break;
     case Sysfile::NS_ActiveMissed_2:
       jam();
@@ -6206,13 +6206,15 @@ Dbdih::sendMASTER_LCPCONF(Signal * signa
     return;
   }
 
-  if(c_lcpState.lcpStatus == LCP_COPY_GCI){
+  if(c_lcpState.lcpStatus == LCP_COPY_GCI)
+  {
     jam();
     /**
      * Restart it
      */
     //Uint32 lcpId = SYSFILE->latestLCP_ID;
     SYSFILE->latestLCP_ID--;
+    Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
     c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
 #if 0
     if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
@@ -9168,7 +9170,7 @@ void Dbdih::execCOPY_GCICONF(Signal* sig
   }
 }//Dbdih::execCOPY_GCICONF()
 
-void Dbdih::invalidateLcpInfoAfterSr()
+void Dbdih::invalidateLcpInfoAfterSr(Signal* signal)
 {
   NodeRecordPtr nodePtr;
   SYSFILE->latestLCP_ID--;
@@ -9185,10 +9187,8 @@ void Dbdih::invalidateLcpInfoAfterSr()
       /* ------------------------------------------------------------------- */
       switch (nodePtr.p->activeStatus) {
       case Sysfile::NS_Active:
-	/* ----------------------------------------------------------------- */
-	// When not active in ongoing LCP and still active is a contradiction.
-	/* ----------------------------------------------------------------- */
-        ndbrequire(false);
+        nodePtr.p->activeStatus = Sysfile::NS_Active;
+        break;
       case Sysfile::NS_ActiveMissed_1:
         jam();
         nodePtr.p->activeStatus = Sysfile::NS_Active;
@@ -9201,9 +9201,14 @@ void Dbdih::invalidateLcpInfoAfterSr()
         jam();
         break;
       }//switch
-    }//if
+    }
+    else
+    {
+      jam();
+      ndbassert(nodePtr.p->activeStatus == Sysfile::NS_Active);
+    }
   }//for
-  setNodeRestartInfoBits();
+  setNodeRestartInfoBits(signal);
 }//Dbdih::invalidateLcpInfoAfterSr()
 
 /* ------------------------------------------------------------------------- */
@@ -9222,6 +9227,8 @@ void Dbdih::writingCopyGciLab(Signal* si
   /*     WE HAVE NOW WRITTEN THIS FILE. WRITE ALSO NEXT FILE IF THIS IS NOT  */
   /*     ALREADY THE LAST.                                                   */
   /* ----------------------------------------------------------------------- */
+  CRASH_INSERTION(7219);
+
   filePtr.p->reqStatus = FileRecord::IDLE;
   if (filePtr.i == crestartInfoFile[0]) {
     jam();
@@ -11082,7 +11089,7 @@ void Dbdih::storeNewLcpIdLab(Signal* sig
    *   but this function has been move "up" in the flow
    *   to just before calcKeepGci
    */
-  setNodeRestartInfoBits();
+  setNodeRestartInfoBits(signal);
 
   c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
   //#ifdef VM_TRACE
@@ -11097,6 +11104,8 @@ void Dbdih::storeNewLcpIdLab(Signal* sig
 void Dbdih::startLcpRoundLab(Signal* signal) {
   jam();
 
+  CRASH_INSERTION(7218);
+
   Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
   Callback c = { safe_cast(&Dbdih::startLcpMutex_locked), 0 };
   ndbrequire(mutex.lock(c));
@@ -12069,8 +12078,8 @@ void Dbdih::allNodesLcpCompletedLab(Sign
     }
   }
   
-  setLcpActiveStatusEnd(signal);
   Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
+  setLcpActiveStatusEnd(signal);
 
   if(!isMaster()){
     jam();
@@ -14768,7 +14777,7 @@ void Dbdih::setLcpActiveStatusEnd(Signal
   c_lcpState.m_participatingLQH.clear();
   if (isMaster()) {
     jam();
-    setNodeRestartInfoBits();
+    setNodeRestartInfoBits(signal);
   }//if
 }//Dbdih::setLcpActiveStatusEnd()
 
@@ -14919,7 +14928,7 @@ void Dbdih::sendHOT_SPAREREP(Signal* sig
 /*************************************************************************/
 /* SET THE RESTART INFO BITS BASED ON THE NODES ACTIVE STATUS.           */
 /*************************************************************************/
-void Dbdih::setNodeRestartInfoBits() 
+void Dbdih::setNodeRestartInfoBits(Signal * signal)
 {
   NodeRecordPtr nodePtr;
   Uint32 tsnrNodeGroup;
@@ -14932,7 +14941,11 @@ void Dbdih::setNodeRestartInfoBits() 
     SYSFILE->nodeGroups[i] = 0;
   }//for
   NdbNodeBitmask::clear(SYSFILE->lcpActive);
-  
+
+#ifdef ERROR_INSERT
+  NdbNodeBitmask tmp;
+#endif
+
   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
     ptrAss(nodePtr, nodeRecord);
     switch (nodePtr.p->activeStatus) {
@@ -14979,11 +14992,41 @@ void Dbdih::setNodeRestartInfoBits() 
       tsnrNodeGroup = nodePtr.p->nodeGroup;
     }//if
     Sysfile::setNodeGroup(nodePtr.i, SYSFILE->nodeGroups, tsnrNodeGroup);
-    if (c_lcpState.m_participatingLQH.get(nodePtr.i)){
+    if (c_lcpState.m_participatingLQH.get(nodePtr.i))
+    {
       jam();
       NdbNodeBitmask::set(SYSFILE->lcpActive, nodePtr.i);
     }//if
+#ifdef ERROR_INSERT
+    else if (Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))
+    {
+      jam();
+      if (nodePtr.p->activeStatus == Sysfile::NS_Active)
+        tmp.set(nodePtr.i);
+    }
+#endif
   }//for
+
+#ifdef ERROR_INSERT
+  if (!tmp.isclear())
+  {
+    jam();
+
+    NdbNodeBitmask all;
+    nodePtr.i = cfirstAliveNode;
+    do {
+      jam();
+      ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
+      all.set(nodePtr.i);
+      nodePtr.i = nodePtr.p->nextNode;
+    } while (nodePtr.i != RNIL);
+
+
+    NodeReceiverGroup rg(DBDIH, all);
+    signal->theData[0] = 7219;
+    sendSignal(rg, GSN_NDB_TAMPER, signal,  1, JBA);
+  }
+#endif
 }//Dbdih::setNodeRestartInfoBits()
 
 /*************************************************************************/

=== modified file 'storage/ndb/test/ndbapi/testSystemRestart.cpp'
--- a/storage/ndb/test/ndbapi/testSystemRestart.cpp	2009-08-17 07:25:41 +0000
+++ b/storage/ndb/test/ndbapi/testSystemRestart.cpp	2009-08-18 06:57:20 +0000
@@ -1919,6 +1919,95 @@ int runBug46651(NDBT_Context* ctx, NDBT_
   return NDBT_OK;
 }
 
+int
+runBug46412(NDBT_Context* ctx, NDBT_Step* step)
+{
+  Uint32 loops = ctx->getNumLoops();
+  NdbRestarter res;
+  const Uint32 nodeCount = res.getNumDbNodes();
+  if(nodeCount < 2)
+  {
+    return NDBT_OK;
+  }
+
+  for (Uint32 l = 0; l<loops; l++)
+  {
+loop:
+    printf("checking nodegroups of getNextMasterNodeId(): ");
+    int nodes[256];
+    bzero(nodes, sizeof(nodes));
+    nodes[0] = res.getMasterNodeId();
+    printf("%d ", nodes[0]);
+    for (Uint32 i = 1; i<nodeCount; i++)
+    {
+      nodes[i] = res.getNextMasterNodeId(nodes[i-1]);
+      printf("%d ", nodes[i]);
+    }
+    printf("\n");
+
+    Bitmask<256/32> ng;
+    int cnt = 0;
+    int restartnodes[256];
+
+    Uint32 limit = (nodeCount / 2);
+    for (Uint32 i = 0; i<limit; i++)
+    {
+      int tmp = res.getNodeGroup(nodes[i]);
+      printf("node %d ng: %d", nodes[i], tmp);
+      if (ng.get(tmp))
+      {
+        restartnodes[cnt++] = nodes[i];
+        ndbout_c(" COLLISION");
+        limit++;
+        if (limit > nodeCount)
+          limit = nodeCount;
+      }
+      else
+      {
+        ng.set(tmp);
+        ndbout_c(" OK");
+      }
+    }
+
+    if (cnt)
+    {
+      printf("restarting nodes: ");
+      for (int i = 0; i<cnt; i++)
+        printf("%d ", restartnodes[i]);
+      printf("\n");
+      for (int i = 0; i<cnt; i++)
+      {
+        res.restartOneDbNode(restartnodes[i], false, true, true);
+      }
+      res.waitNodesNoStart(restartnodes, cnt);
+      res.startNodes(restartnodes, cnt);
+      if (res.waitClusterStarted())
+        return NDBT_FAILED;
+
+      goto loop;
+    }
+
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    res.dumpStateAllNodes(val2, 2);
+
+    for (Uint32 i = 0; i<(nodeCount / 2); i++)
+    {
+      res.insertErrorInNode(nodes[(nodeCount / 2) - (i + 1)], 7218);
+    }
+
+    int lcp = 7099;
+    res.dumpStateAllNodes(&lcp, 1);
+
+    res.waitClusterNoStart();
+    res.startAll();
+    if (res.waitClusterStarted())
+      return NDBT_FAILED;
+  }
+
+  return NDBT_OK;
+}
+
+
 NDBT_TESTSUITE(testSystemRestart);
 TESTCASE("SR1", 
 	 "Basic system restart test. Focus on testing restart from REDO log.\n"
@@ -2222,6 +2311,10 @@ TESTCASE("Bug46651", "")
 {
   INITIALIZER(runBug46651);
 }
+TESTCASE("Bug46412", "")
+{
+  INITIALIZER(runBug46412);
+}
 NDBT_TESTSUITE_END(testSystemRestart);
 
 int main(int argc, const char** argv){

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2009-08-17 07:25:41 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2009-08-18 06:57:20 +0000
@@ -1324,3 +1324,8 @@ max-time: 300
 cmd: testSystemRestart
 args: -n Bug46651 T1
 
+max-time: 300
+cmd: testSystemRestart
+args: -n Bug46412 T1
+
+# end of 6.3


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20090818065720-6sjs8n3dejt00htl.bundle
Thread
bzr push into mysql-5.1-telco-6.3 branch (jonas:3009 to 3010) Bug#46412Jonas Oreland18 Aug