List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:December 8 2008 12:36pm
Subject:bzr commit into mysql-5.1 branch (jonas:2764) Bug#41295 Bug#41296 Bug#41297
View as plain text  
#At file:///home/jonas/src/telco-6.2/

 2764 Jonas Oreland	2008-12-08
      ndb - bug#41295 bug#41296 bug#41297
modified:
  storage/ndb/src/kernel/blocks/ERROR_codes.txt
  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
  storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
  storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp
  storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
  storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
  storage/ndb/test/include/NdbRestarter.hpp
  storage/ndb/test/ndbapi/testNodeRestart.cpp
  storage/ndb/test/run-test/daily-basic-tests.txt
  storage/ndb/test/src/NdbRestarter.cpp

=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2008-08-11 10:41:11 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2008-12-08 12:35:55 +0000
@@ -6,7 +6,7 @@ Next DBTUP 4029
 Next DBLQH 5051
 Next DBDICT 6008
 Next DBDIH 7215
-Next DBTC 8064
+Next DBTC 8074
 Next CMVMI 9000
 Next BACKUP 10041
 Next DBUTIL 11002

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2008-08-11 10:41:11 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2008-12-08 12:35:55 +0000
@@ -5188,16 +5188,32 @@ void Dbdih::checkGcpOutstanding(Signal* 
                GCPPrepareConf::SignalLength, JBB);
   }//if
 
-  if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId)) {
+  if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId)) 
+  {
     jam();
-    GCPNodeFinished* conf = (GCPNodeFinished*)signal->getDataPtrSend();
-    conf->nodeId = failedNodeId;
-    conf->gci_hi = Uint32(m_micro_gcp.m_old_gci >> 32);
-    conf->gci_lo = Uint32(m_micro_gcp.m_old_gci);
-    conf->failno = cfailurenr;
-    sendSignal(reference(), GSN_GCP_NODEFINISH, signal, 
-               GCPNodeFinished::SignalLength, JBB);
-  }//if
+    
+    /**
+     * Waiting for GSN_GCP_NODEFINISH
+     *   TC-take-over can generate new transactions
+     *   that will be in this epoch
+     *   re-run GCP_NOMORETRANS to master-TC (self) that will run
+     *   take-over
+     */
+    c_GCP_COMMIT_Counter.clearWaitingFor(failedNodeId);
+    if (!c_GCP_COMMIT_Counter.isWaitingFor(getOwnNodeId()))
+    {
+      jam();
+      c_GCP_COMMIT_Counter.setWaitingFor(getOwnNodeId());
+      m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
+    }
+     
+    GCPNoMoreTrans* req = (GCPNoMoreTrans*)signal->getDataPtrSend();
+    req->senderData = m_micro_gcp.m_master_ref;
+    req->gci_hi = m_micro_gcp.m_old_gci >> 32;
+    req->gci_lo = m_micro_gcp.m_old_gci & 0xFFFFFFFF;
+    sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
+               GCPNoMoreTrans::SignalLength, JBB);
+  }
 
   if (c_GCP_SAVEREQ_Counter.isWaitingFor(failedNodeId)) {
     jam();
@@ -15589,7 +15605,6 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal
     SET_ERROR_INSERT_VALUE2(7214, signal->theData[1]);
     return;
   }
-
 }//Dbdih::execDUMP_STATE_ORD()
 
 void

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2008-11-14 11:17:53 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp	2008-12-08 12:35:55 +0000
@@ -944,7 +944,9 @@ public:
   typedef Ptr<GcpRecord> GcpRecordPtr;
 
   struct HostRecord {
-    bool inPackedList;
+    Uint8 inPackedList;
+    Uint8 nodestatus;
+    Uint8 _unused[2];
     UintR noOfPackedWordsLqh;
     UintR packedWordsLqh[30];
     UintR noOfPackedWordsTc;

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2007-11-19 10:04:24 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhInit.cpp	2008-12-08 12:35:55 +0000
@@ -63,6 +63,10 @@ void Dblqh::initData() 
   m_backup_ptr = RNIL;
   clogFileSize = 16;
   cmaxLogFilesInPageZero = 40;
+
+  for (Uint32 i = 0; i < 1024; i++) {
+    ctransidHash[i] = RNIL;
+  }//for
 }//Dblqh::initData()
 
 void Dblqh::initRecords() 

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2008-11-14 11:17:53 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2008-12-08 12:35:55 +0000
@@ -496,6 +496,14 @@ void Dblqh::execINCL_NODEREQ(Signal* sig
       cnodeStatus[i] = ZNODE_UP;
     }//if
   }//for
+
+  {
+    HostRecordPtr Thostptr;
+    Thostptr.i = nodeId;
+    ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+    Thostptr.p->nodestatus = ZNODE_UP;
+  }
+
   signal->theData[0] = nodeId;
   signal->theData[1] = cownref; 
   sendSignal(retRef, GSN_INCL_NODECONF, signal, 2, JBB);
@@ -712,6 +720,7 @@ void Dblqh::startphase1Lab(Signal* signa
     ThostPtr.p->inPackedList = false;
     ThostPtr.p->noOfPackedWordsLqh = 0;
     ThostPtr.p->noOfPackedWordsTc  = 0;
+    ThostPtr.p->nodestatus = ZNODE_DOWN;
   }//for
   cpackedListIndex = 0;
   sendNdbSttorryLab(signal);
@@ -898,6 +907,14 @@ void Dblqh::execREAD_NODESCONF(Signal* s
       jam();
       cnodeData[ind]    = i;
       cnodeStatus[ind]  = NdbNodeBitmask::get(readNodes->inactiveNodes, i);
+
+      {
+        HostRecordPtr Thostptr;
+        Thostptr.i = i;
+        ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+        Thostptr.p->nodestatus = cnodeStatus[ind];
+      }
+
       //readNodes->getVersionId(i, readNodes->theVersionIds) not used
       if (!NodeBitmask::get(readNodes->inactiveNodes, i))
       {
@@ -2280,7 +2297,9 @@ void Dblqh::noFreeRecordLab(Signal* sign
   const Uint32 reqInfo   = lqhKeyReq->requestInfo;
   
   if(errCode == ZNO_FREE_MARKER_RECORDS_ERROR ||
-     errCode == ZNODE_SHUTDOWN_IN_PROGESS){
+     errCode == ZNODE_SHUTDOWN_IN_PROGESS ||
+     errCode == ZNODE_FAILURE_ERROR){
+    jam();
     releaseTcrec(signal, tcConnectptr);
   }
 
@@ -3503,6 +3522,17 @@ void Dblqh::execLQHKEYREQ(Signal* signal
     noFreeRecordLab(signal, lqhKeyReq, ZNODE_SHUTDOWN_IN_PROGESS);
     return;
   }
+
+  {
+    HostRecordPtr Thostptr;
+    Thostptr.i = refToNode(sig5); // TC-ref
+    ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+    if (unlikely(Thostptr.p->nodestatus != ZNODE_UP))
+    {
+      noFreeRecordLab(signal, lqhKeyReq, ZNODE_FAILURE_ERROR);
+      return;
+    }
+  }
   
   Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
 
@@ -7468,6 +7498,22 @@ void Dblqh::continueAbortLab(Signal* sig
 void Dblqh::continueAfterLogAbortWriteLab(Signal* signal) 
 {
   TcConnectionrec * const regTcPtr = tcConnectptr.p;
+
+  const Uint32 commitAckMarker = regTcPtr->commitAckMarker;
+  if(commitAckMarker != RNIL)
+  {
+    jam();
+#ifdef MARKER_TRACE
+    {
+      CommitAckMarkerPtr tmp;
+      m_commitAckMarkerHash.getPtr(tmp, commitAckMarker);
+      ndbout_c("Ab2 marker[%.8x %.8x]", tmp.p->transid1, tmp.p->transid2);
+    }
+#endif
+    m_commitAckMarkerHash.release(commitAckMarker);
+    regTcPtr->commitAckMarker = RNIL;
+  }
+
   if (regTcPtr->operation == ZREAD && regTcPtr->dirtyOp)
   {
     jam();
@@ -7591,6 +7637,14 @@ void Dblqh::execNODE_FAILREP(Signal* sig
   ndbrequire(cnoOfNodes - 1 < MAX_NDB_NODES);
   for (i = 0; i < TnoOfNodes; i++) {
     const Uint32 nodeId = Tdata[i];
+
+    {
+      HostRecordPtr Thostptr;
+      Thostptr.i = nodeId;
+      ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
+      Thostptr.p->nodestatus = ZNODE_DOWN;
+    }
+
     lcpPtr.p->m_EMPTY_LCP_REQ.clear(nodeId);
     
     for (Uint32 j = 0; j < cnoOfNodes; j++) {
@@ -17196,9 +17250,6 @@ void Dblqh::initialiseRecordsLab(Signal*
     m_sr_nodes.clear();
     m_sr_exec_sr_req.clear();
     m_sr_exec_sr_conf.clear();
-    for (i = 0; i < 1024; i++) {
-      ctransidHash[i] = RNIL;
-    }//for
     for (i = 0; i < 4; i++) {
       cactiveCopy[i] = RNIL;
     }//for
@@ -18493,6 +18544,18 @@ void Dblqh::sendLqhTransconf(Signal* sig
   signal->theData[0] = ZLQH_TRANS_NEXT;
   signal->theData[1] = tcNodeFailptr.i;
   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
+
+  if (0)
+  {
+    ndbout_c("sending LQH_TRANSCONF %u transid: H'%.8x, H'%.8x op: %u state: %u(%u) marker: %u",
+             tcConnectptr.i, 
+             tcConnectptr.p->transid[0],
+             tcConnectptr.p->transid[1],
+             tcConnectptr.p->operation,           
+             tcConnectptr.p->transactionState,
+             stat,
+             tcConnectptr.p->commitAckMarker);
+  }
 }//Dblqh::sendLqhTransconf()
 
 /* --------------------------------------------------------------------------
@@ -18887,7 +18950,10 @@ Dblqh::validate_filter(Signal* signal)
   if (start == end)
   {
     infoEvent("No filter specified, not listing...");
-    return false;
+    if (!ERROR_INSERTED(4002))
+      return false;
+    else
+      return true;
   }
 
   while(start < end)
@@ -19079,7 +19145,7 @@ Dblqh::match_and_print(Signal* signal, P
   char buf[100];
   BaseString::snprintf(buf, sizeof(buf),
 		       "OP[%u]: Tab: %d frag: %d TC: %u API: %d(0x%x)"
-		       "transid: 0x%x 0x%x op: %s state: %s",
+		       "transid: H'%.8x H'%.8x op: %s state: %s",
 		       tcRec.i,
 		       tcRec.p->tableref, 
 		       tcRec.p->fragmentid,
@@ -19090,7 +19156,10 @@ Dblqh::match_and_print(Signal* signal, P
 		       op,
 		       state);
   
-  infoEvent(buf);
+  if (!ERROR_INSERTED(4002))
+    infoEvent(buf);
+  else
+    ndbout_c(buf);
   
   memcpy(signal->theData, temp, 4*len);
   return true;
@@ -19115,7 +19184,7 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
     CommitAckMarkerIterator iter;
     for(m_commitAckMarkerHash.first(iter); iter.curr.i != RNIL;
 	m_commitAckMarkerHash.next(iter)){
-      infoEvent("CommitAckMarker: i = %d (0x%x, 0x%x)"
+      infoEvent("CommitAckMarker: i = %d (H'%.8x, H'%.8x)"
 		" ApiRef: 0x%x apiOprec: 0x%x TcNodeId: %d",
 		iter.curr.i,
 		iter.curr.p->transid1,
@@ -19636,7 +19705,9 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
       else
       {
 	jam();
-	infoEvent("End of operation dump");
+        infoEvent("End of operation dump");
+        if (ERROR_INSERTED(4002))
+          ndbrequire(false);
       }
 
       return;
@@ -19677,7 +19748,9 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
       else
       {
 	jam();
-	infoEvent("End of operation dump");
+        infoEvent("End of operation dump");
+        if (ERROR_INSERTED(4002))
+          ndbrequire(false);
       }
       
       return;
@@ -19743,6 +19816,50 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
     RSS_AP_SNAPSHOT_CHECK(c_fragment_pool);
     return;
   }
+
+  if (arg == 4002)
+  {
+    bool ops = false;
+    for (Uint32 i = 0; i<1024; i++)
+    {
+      if (ctransidHash[i] != RNIL)
+      {
+        jam();
+        ops = true;
+        break;
+      }
+    }
+
+    bool markers = m_commitAckMarkerPool.getNoOfFree() != 
+      m_commitAckMarkerPool.getSize();
+    if (unlikely(ops || markers))
+    {
+
+      if (markers)
+      {
+        ndbout_c("LQH: m_commitAckMarkerPool: %d free size: %d",
+                 m_commitAckMarkerPool.getNoOfFree(),
+                 m_commitAckMarkerPool.getSize());
+        
+        CommitAckMarkerIterator iter;
+        for(m_commitAckMarkerHash.first(iter); iter.curr.i != RNIL;
+            m_commitAckMarkerHash.next(iter))
+        {
+          ndbout_c("CommitAckMarker: i = %d (H'%.8x, H'%.8x)"
+                   " ApiRef: 0x%x apiOprec: 0x%x TcNodeId: %d",
+                   iter.curr.i,
+                   iter.curr.p->transid1,
+                   iter.curr.p->transid2,
+                   iter.curr.p->apiRef,
+                   iter.curr.p->apiOprec,
+                   iter.curr.p->tcNodeId);
+        }
+      }
+      SET_ERROR_INSERT_VALUE(4002);
+      signal->theData[0] = 2350;
+      EXECUTE_DIRECT(DBLQH, GSN_DUMP_STATE_ORD, signal, 1);
+    }
+  }
 }//Dblqh::execDUMP_STATE_ORD()
 
 /* **************************************************************** */

=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2008-12-03 19:44:54 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2008-12-08 12:35:55 +0000
@@ -3302,6 +3302,8 @@ void Dbtc::sendlqhkeyreq(Signal* signal,
   const Uint32 noOfLqhs = regTcPtr->noOfNodes;
   if(commitAckMarker != RNIL){
     jam();
+    ndbassert(regApiPtr->commitAckMarker == commitAckMarker);
+
     LqhKeyReq::setMarkerFlag(Tdata10, 1);
 
     CommitAckMarker * tmp = m_commitAckMarkerHash.getPtr(commitAckMarker);
@@ -3594,6 +3596,7 @@ void Dbtc::releaseTcCon() 
   UintR TconcurrentOp = c_counters.cconcurrentOp;
   UintR TtcConnectptrIndex = tcConnectptr.i;
 
+  ndbrequire(regTcPtr->commitAckMarker == RNIL);
   regTcPtr->tcConnectstate = OS_CONNECTED;
   regTcPtr->nextTcConnect = TfirstfreeTcConnect;
   regTcPtr->apiConnect = RNIL;
@@ -3786,6 +3789,7 @@ void Dbtc::execLQHKEYCONF(Signal* signal
   regTcPtr->lastLqhCon = tlastLqhConnect;
   regTcPtr->lastLqhNodeId = refToNode(tlastLqhBlockref);
   regTcPtr->noFiredTriggers = noFired;
+  regTcPtr->commitAckMarker = RNIL;
 
   UintR Ttckeyrec = (UintR)regApiPtr.p->tckeyrec;
   UintR TclientData = regTcPtr->clientData;
@@ -4519,7 +4523,7 @@ void Dbtc::commit020Lab(Signal* signal) 
 
     if (localTcConnectptr.i != RNIL) {
       Tcount = Tcount + 1;
-      if (Tcount < 16 && !ERROR_INSERTED(8057)) {
+      if (Tcount < 16 && !ERROR_INSERTED(8057) && !ERROR_INSERTED(8073)) {
         ptrCheckGuard(localTcConnectptr,
                       TtcConnectFilesize, localTcConnectRecord);
         jam();
@@ -4530,6 +4534,14 @@ void Dbtc::commit020Lab(Signal* signal) 
           CLEAR_ERROR_INSERT_VALUE;
           return;
         }//if
+        
+        if (ERROR_INSERTED(8073))
+        {
+          execSEND_PACKED(signal);
+          signal->theData[0] = 9999;
+          sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 100, 1);
+          return;
+        }
         signal->theData[0] = TcContinueB::ZSEND_COMMIT_LOOP;
         signal->theData[1] = apiConnectptr.i;
         signal->theData[2] = localTcConnectptr.i;
@@ -5372,8 +5384,10 @@ void Dbtc::clearCommitAckMarker(ApiConne
   if (regApiPtr->commitAckMarker == RNIL)
     ndbassert(commitAckMarker == RNIL);
   if (commitAckMarker != RNIL)
-    ndbassert(regApiPtr->commitAckMarker != RNIL);
-  if(commitAckMarker != RNIL){
+    ndbassert(regApiPtr->commitAckMarker == commitAckMarker);
+  
+  if(commitAckMarker != RNIL)
+  {
     jam();
     m_commitAckMarkerHash.release(commitAckMarker);
     regTcPtr->commitAckMarker = RNIL;
@@ -7965,7 +7979,7 @@ Dbtc::sendTCKEY_FAILCONF(Signal* signal,
   const Uint32 nodeId = refToNode(ref);
   if(ref != 0)
   {
-    jam()
+    jam();
     failConf->apiConnectPtr = regApiPtr->ndbapiConnect | (marker != RNIL);
     failConf->transId1 = regApiPtr->transid[0];
     failConf->transId2 = regApiPtr->transid[1];
@@ -8800,6 +8814,7 @@ void Dbtc::updateApiStateFail(Signal* si
       tmp.p->noOfLqhs      = 1;
       tmp.p->lqhNodeId[0]  = tnodeid;
       tmp.p->apiConnectPtr = apiConnectptr.i;
+
 #if defined VM_TRACE || defined ERROR_INSERT
       {
 	CommitAckMarkerPtr check;
@@ -8814,10 +8829,23 @@ void Dbtc::updateApiStateFail(Signal* si
       tmp.i = marker;
       tmp.p = m_commitAckMarkerHash.getPtr(marker);
 
+      ndbassert(tmp.p->transid1 == ttransid1);
+      ndbassert(tmp.p->transid2 == ttransid2);
+
       const Uint32 noOfLqhs = tmp.p->noOfLqhs;
+      for (Uint32 i = 0; i<noOfLqhs && i < MAX_REPLICAS; i++)
+      {
+        if (tmp.p->lqhNodeId[i] == tnodeid)
+        {
+          jam();
+          goto found;
+        }
+      }
       ndbrequire(noOfLqhs < MAX_REPLICAS);
       tmp.p->lqhNodeId[noOfLqhs] = tnodeid;
       tmp.p->noOfLqhs = (noOfLqhs + 1);
+  found:
+      (void)1;
     }
   }
 
@@ -10809,6 +10837,7 @@ void Dbtc::initialiseTcConnect(Signal* s
     tcConnectptr.p->apiConnect = RNIL;
     tcConnectptr.p->noOfNodes = 0;
     tcConnectptr.p->nextTcConnect = tcConnectptr.i + 1;
+    tcConnectptr.p->commitAckMarker = RNIL;
   }//for
   tcConnectptr.i = titcTmp - 1;
   ptrAss(tcConnectptr, tcConnectRecord);
@@ -10825,6 +10854,7 @@ void Dbtc::initialiseTcConnect(Signal* s
     tcConnectptr.p->apiConnect = RNIL;
     tcConnectptr.p->noOfNodes = 0;
     tcConnectptr.p->nextTcConnect = tcConnectptr.i + 1;
+    tcConnectptr.p->commitAckMarker = RNIL;
   }//for
   tcConnectptr.i = ctcConnectFilesize - 1;
   ptrAss(tcConnectptr, tcConnectRecord);
@@ -10918,6 +10948,15 @@ void Dbtc::releaseAbortResources(Signal*
     releaseTcCon();
     tcConnectptr.i = rarTcConnectptr.i;
   }//while
+
+  Uint32 marker = apiConnectptr.p->commitAckMarker;
+  if (marker != RNIL)
+  {
+    jam();
+    m_commitAckMarkerHash.release(marker);
+    apiConnectptr.p->commitAckMarker = RNIL;
+  }
+
   apiConnectptr.p->firstTcConnect = RNIL;
   apiConnectptr.p->lastTcConnect = RNIL;
   apiConnectptr.p->m_transaction_nodes.clear();
@@ -11620,6 +11659,12 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal)
     }
     return;
   }
+
+  if (arg == 4002)
+  {
+    ndbrequire(m_commitAckMarkerPool.getNoOfFree() == 
+               m_commitAckMarkerPool.getSize());
+  }
 }//Dbtc::execDUMP_STATE_ORD()
 
 bool

=== modified file 'storage/ndb/test/include/NdbRestarter.hpp'
--- a/storage/ndb/test/include/NdbRestarter.hpp	2008-02-21 13:57:42 +0000
+++ b/storage/ndb/test/include/NdbRestarter.hpp	2008-12-08 12:35:55 +0000
@@ -69,6 +69,7 @@ public:
   int waitNodesNoStart(const int * _nodes, int _num_nodes,
 		       unsigned int _timeout = 120); 
 
+  int checkClusterAlive(const int * deadnodes, int num_nodes);
 
   int getNumDbNodes();
   int insertErrorInNode(int _nodeId, int error);

=== modified file 'storage/ndb/test/ndbapi/testNodeRestart.cpp'
--- a/storage/ndb/test/ndbapi/testNodeRestart.cpp	2008-12-03 19:44:54 +0000
+++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp	2008-12-08 12:35:55 +0000
@@ -2929,7 +2929,6 @@ loop2:
 int 
 runHammer(NDBT_Context* ctx, NDBT_Step* step)
 { 
-  int result = NDBT_OK;
   int records = ctx->getNumRecords();
   Ndb* pNdb = GETNDB(step);
   HugoOperations hugoOps(*ctx->getTab());
@@ -2937,7 +2936,7 @@ runHammer(NDBT_Context* ctx, NDBT_Step* 
   {
     int r = rand() % records;
     if (hugoOps.startTransaction(pNdb) != 0)
-      goto err;
+      continue;
     
     if ((rand() % 100) < 50)
     {
@@ -2985,6 +2984,139 @@ runHammer(NDBT_Context* ctx, NDBT_Step* 
   return NDBT_OK;
 }
 
+int 
+runMixedLoad(NDBT_Context* ctx, NDBT_Step* step)
+{ 
+  int res = 0;
+  int records = ctx->getNumRecords();
+  Ndb* pNdb = GETNDB(step);
+  HugoOperations hugoOps(*ctx->getTab());
+  unsigned id = (unsigned)rand();
+  while (!ctx->isTestStopped())
+  {
+    if (ctx->getProperty("Pause", (Uint32)0))
+    {
+      ndbout_c("thread %u stopped", id);
+      ctx->sync_down("WaitThreads");
+      while (ctx->getProperty("Pause", (Uint32)0) && !ctx->isTestStopped())
+        NdbSleep_MilliSleep(15);
+      
+      if (ctx->isTestStopped())
+        break;
+      ndbout_c("thread %u continue", id);
+    }
+
+    if ((res = hugoOps.startTransaction(pNdb)) != 0)
+    {
+      if (res == 4009)
+        return NDBT_FAILED;
+      continue;
+    }
+    
+    for (int i = 0; i < 10; i++)
+    {
+      int r = rand() % records;
+      if ((rand() % 100) < 50)
+      {
+        if (hugoOps.pkUpdateRecord(pNdb, r, 1, rand()) != 0)
+          goto err;
+      }
+      else
+      {
+        if (hugoOps.pkWriteRecord(pNdb, r, 1, rand()) != 0)
+          goto err;
+      }
+    }      
+    
+    if ((rand() % 100) < 90)
+    {
+      res = hugoOps.execute_Commit(pNdb);
+    }
+    else
+    {
+  err:
+      res = hugoOps.execute_Rollback(pNdb);
+    }
+    
+    hugoOps.closeTransaction(pNdb);
+
+    if (res == 4009)
+    {
+      return NDBT_FAILED;
+    }
+  }
+  return NDBT_OK;
+}
+
+int
+runBug41295(NDBT_Context* ctx, NDBT_Step* step)
+{
+  NdbRestarter res;
+
+  if (res.getNumDbNodes() < 2)
+  {
+    ctx->stopTest();
+    return NDBT_OK;
+  }
+
+
+  int leak = 4002;
+  const int cases = 1;
+  int loops = ctx->getNumLoops();
+  if (loops <= cases)
+    loops = cases + 1;
+
+  for (int i = 0; i<loops; i++)
+  {
+    int master = res.getMasterNodeId();
+    int next = res.getNextMasterNodeId(master);
+    
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
+    if (res.dumpStateOneNode(next, val2, 2))
+      return NDBT_FAILED;
+    
+    ndbout_c("stopping %u, err 8073", next);
+    res.insertErrorInNode(next, 8073);
+    ndbout_c("waiting for %u", next);
+    res.waitNodesNoStart(&next, 1);
+    
+    ndbout_c("pausing all threads");
+    ctx->setProperty("Pause", 1);
+    ctx->sync_up_and_wait("WaitThreads", ctx->getProperty("Threads", 1));
+    ndbout_c("all threads paused");
+    NdbSleep_MilliSleep(5000);
+    res.dumpStateAllNodes(&leak, 1);
+    NdbSleep_MilliSleep(1000);
+    if (res.checkClusterAlive(&next, 1))
+    {
+      return NDBT_FAILED;
+    }
+    ndbout_c("restarting threads");
+    ctx->setProperty("Pause", (Uint32)0);
+    
+    ndbout_c("starting %u", next);
+    res.startNodes(&next, 1);
+    ndbout_c("waiting for cluster started");
+    if (res.waitClusterStarted())
+    {
+      return NDBT_FAILED;
+    }
+
+    ndbout_c("pausing all threads");
+    ctx->setProperty("Pause", 1);
+    ctx->sync_up_and_wait("WaitThreads", ctx->getProperty("Threads", 1));
+    ndbout_c("all threads paused");
+    NdbSleep_MilliSleep(5000);
+    res.dumpStateAllNodes(&leak, 1);
+    NdbSleep_MilliSleep(1000);
+    ndbout_c("restarting threads");
+    ctx->setProperty("Pause", (Uint32)0);
+  }
+  
+  ctx->stopTest();
+  return NDBT_OK;
+}
+
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
 	 "Test that one node at a time can be stopped and then restarted "\
@@ -3424,6 +3556,14 @@ TESTCASE("NF_Hammer", ""){
   STEP(runRestarter);
   VERIFIER(runClearTable);
 }
+TESTCASE("Bug41295", "")
+{
+  TC_PROPERTY("Threads", 25);
+  INITIALIZER(runLoadTable);
+  STEPS(runMixedLoad, 25);
+  STEP(runBug41295);
+  FINALIZER(runClearTable);
+}
 NDBT_TESTSUITE_END(testNodeRestart);
 
 int main(int argc, const char** argv){

=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt	2008-12-03 20:11:52 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt	2008-12-08 12:35:55 +0000
@@ -1172,3 +1172,7 @@ cmd: test_event
 args -r 5000 -n Bug30780 T1
 
 #EOF 2008-08-11
+max-time: 1200
+cmd: testNodeRestart
+args -n Bug41295 T1
+

=== modified file 'storage/ndb/test/src/NdbRestarter.cpp'
--- a/storage/ndb/test/src/NdbRestarter.cpp	2008-03-26 14:34:39 +0000
+++ b/storage/ndb/test/src/NdbRestarter.cpp	2008-12-08 12:35:55 +0000
@@ -22,6 +22,7 @@
 #include <random.h>
 #include <kernel/ndb_limits.h>
 #include <ndb_version.h>
+#include <NodeBitmask.hpp>
 
 #define MGMERR(h) \
   ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
@@ -541,7 +542,7 @@ NdbRestarter::getStatus(){
         }
       }
       const int err = ndb_mgm_get_latest_error(handle);
-      ndbout << "status==NULL, retries="<<retries<<endl;
+      ndbout << "status==NULL, retries="<<retries<< " err=" << err << endl;
       MGMERR(handle);
       retries++;
       continue;
@@ -802,5 +803,26 @@ NdbRestarter::setReconnect(bool val){
   m_reconnect= val;
 }
 
+int
+NdbRestarter::checkClusterAlive(const int * deadnodes, int num_nodes)
+{
+  if (getStatus() != 0)
+    return -1;
+  
+  NdbNodeBitmask mask;
+  for (int i = 0; i<num_nodes; i++)
+    mask.set(deadnodes[i]);
+  
+  for (size_t n = 0; n < ndbNodes.size(); n++)
+  {
+    if (mask.get(ndbNodes[n].node_id))
+      continue;
+
+    if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED)
+      return ndbNodes[n].node_id;
+  }
+  
+  return 0;
+}
 
 template class Vector<ndb_mgm_node_state>;

Thread
bzr commit into mysql-5.1 branch (jonas:2764) Bug#41295 Bug#41296 Bug#41297Jonas Oreland8 Dec