MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:jonas Date:March 22 2006 10:44am
Subject:bk commit into 4.1 tree (jonas:1.2475) BUG#18414
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2475 06/03/22 11:44:31 jonas@stripped +6 -0
  ndb - bug#18414
    Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding

  ndb/test/run-test/daily-basic-tests.txt
    1.28 06/03/22 11:44:29 jonas@stripped +4 -0
    Add testcase for bug18414

  ndb/test/ndbapi/testTimeout.cpp
    1.13 06/03/22 11:44:29 jonas@stripped +5 -2
    Fix error code checking

  ndb/test/ndbapi/testNodeRestart.cpp
    1.15 06/03/22 11:44:29 jonas@stripped +73 -0
    Add testcase for bug18414

  ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
    1.64 06/03/22 11:44:29 jonas@stripped +44 -8
    Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding

  ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.35 06/03/22 11:44:29 jonas@stripped +0 -4
    remove dumping of LCP info during NF

  ndb/src/kernel/blocks/ERROR_codes.txt
    1.15 06/03/22 11:44:29 jonas@stripped +2 -0
    New error code

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/41-work

--- 1.27/ndb/test/run-test/daily-basic-tests.txt	2006-03-21 14:47:08 +01:00
+++ 1.28/ndb/test/run-test/daily-basic-tests.txt	2006-03-22 11:44:29 +01:00
@@ -458,6 +458,10 @@
 cmd: testSystemRestart
 args: -n Bug18385 T1
 
+max-time: 500
+cmd: testNodeRestart
+args: -n Bug18414 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

--- 1.14/ndb/src/kernel/blocks/ERROR_codes.txt	2006-03-21 14:47:08 +01:00
+++ 1.15/ndb/src/kernel/blocks/ERROR_codes.txt	2006-03-22 11:44:29 +01:00
@@ -226,6 +226,8 @@
 8045: (ABORTCONF only as part of take-over)
 Delay execution of ABORTCONF signal 2 seconds to generate time-out.
 
+8050: Send ZABORT_TIMEOUT_BREAK delayed
+
 ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
 -------------------------------------------------
 

--- 1.34/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-03-21 15:13:39 +01:00
+++ 1.35/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-03-22 11:44:29 +01:00
@@ -5982,10 +5982,6 @@
   signal->theData[0] = 7012;
   execDUMP_STATE_ORD(signal);
 
-  signal->theData[0] = 7015;
-  signal->theData[1] = 0;
-  execDUMP_STATE_ORD(signal);
-
   c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
 
   checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);

--- 1.63/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-03-20 14:53:27 +01:00
+++ 1.64/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-03-22 11:44:29 +01:00
@@ -6386,6 +6386,7 @@
     return;
   }
   
+  bool found = false;
   OperationState tmp[16];
   
   Uint32 TloopCount = 0;
@@ -6393,7 +6394,31 @@
     jam();
     if (tcConnectptr.i == RNIL) {
       jam();
-      if (Tcheck == 0) {
+
+#ifdef VM_TRACE
+      ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
+	       found, Tcheck, apiConnectptr.p->counter);
+#endif
+      if (found || apiConnectptr.p->counter)
+      {
+	jam();
+	/**
+	 * We sent atleast one ABORT/ABORTED
+	 *   or ZABORT_TIMEOUT_BREAK is in job buffer
+	 *   wait for reception...
+	 */
+	return;
+      }
+      
+      if (Tcheck == 1)
+      {
+	jam();
+	releaseAbortResources(signal);
+	return;
+      }
+      
+      if (Tcheck == 0)
+      {
         jam();
 	/*------------------------------------------------------------------
 	 * All nodes had already reported ABORTED for all tcConnect records.
@@ -6402,9 +6427,11 @@
 	 *------------------------------------------------------------------*/
 	char buf[96]; buf[0] = 0;
 	char buf2[96];
-	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:",
-		 __LINE__, apiConnectptr.i);
-	for(Uint32 i = 0; i<TloopCount; i++){
+	BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
+			     __LINE__, apiConnectptr.i,
+			     apiConnectptr.p->counter);
+	for(Uint32 i = 0; i<TloopCount; i++)
+	{
 	  BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
 	  BaseString::snprintf(buf, sizeof(buf), buf2);
 	}
@@ -6412,7 +6439,9 @@
 	ndbout_c(buf);
 	ndbrequire(false);
 	releaseAbortResources(signal);
+	return;
       }
+      
       return;
     }//if
     TloopCount++;
@@ -6427,7 +6456,16 @@
       signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
       signal->theData[1] = tcConnectptr.i;
       signal->theData[2] = apiConnectptr.i;      
-      sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+      if (ERROR_INSERTED(8050))
+      {
+	ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)", 
+		 Tcheck, apiConnectptr.p->counter);
+	sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
+      }
+      else
+      {
+	sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
+      }
       return;
     }//if
     ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
@@ -6450,7 +6488,7 @@
         jam();
         if (tcConnectptr.p->tcNodedata[Ti] != 0) {
           TloopCount += 31;
-          Tcheck = 1;
+	  found = true;
           hostptr.i = tcConnectptr.p->tcNodedata[Ti];
           ptrCheckGuard(hostptr, chostFilesize, hostRecord);
           if (hostptr.p->hostStatus == HS_ALIVE) {
@@ -7007,8 +7045,6 @@
   hostptr.i = tfailedNodeId;
   ptrCheckGuard(hostptr, chostFilesize, hostRecord);
 
-  ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
-	   tfailedNodeId, signal->getSendersBlockRef(), reference());
   if (signal->getSendersBlockRef() != reference())
   {
     jam();

--- 1.14/ndb/test/ndbapi/testNodeRestart.cpp	2006-03-17 10:55:00 +01:00
+++ 1.15/ndb/test/ndbapi/testNodeRestart.cpp	2006-03-22 11:44:29 +01:00
@@ -581,6 +581,73 @@
   return ret ? NDBT_OK : NDBT_FAILED;
 }
 
+int 
+runBug18414(NDBT_Context* ctx, NDBT_Step* step){
+
+  NdbRestarter restarter;
+  if (restarter.getNumDbNodes() < 2)
+  {
+    ctx->stopTest();
+    return NDBT_OK;
+  }
+
+  Ndb* pNdb = GETNDB(step);
+  HugoOperations hugoOps(*ctx->getTab());
+  HugoTransactions hugoTrans(*ctx->getTab());
+  int loop = 0;
+  do 
+  {
+    if(hugoOps.startTransaction(pNdb) != 0)
+      goto err;
+    
+    if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
+      goto err;
+    
+    if(hugoOps.execute_NoCommit(pNdb) != 0)
+      goto err;
+
+    int node1 = hugoOps.getTransaction()->getConnectedNodeId();
+    int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
+    
+    if (node1 == -1 || node2 == -1)
+      break;
+    
+    if (loop & 1)
+    {
+      if (restarter.insertErrorInNode(node1, 8050))
+	goto err;
+    }
+    
+    if (restarter.insertErrorInNode(node2, 5003))
+      goto err;
+    
+    int res= hugoOps.execute_Rollback(pNdb);
+  
+    if (restarter.waitNodesNoStart(&node2, 1) != 0)
+      goto err;
+    
+    if (restarter.insertErrorInAllNodes(0))
+      goto err;
+    
+    if (restarter.startNodes(&node2, 1) != 0)
+      goto err;
+    
+    if (restarter.waitClusterStarted() != 0)
+      goto err;
+    
+    if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
+      goto err;
+
+    hugoOps.closeTransaction(pNdb);
+    
+  } while(++loop < 5);
+  
+  return NDBT_OK;
+  
+err:
+  hugoOps.closeTransaction(pNdb);
+  return NDBT_FAILED;    
+}
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -869,6 +936,12 @@
 TESTCASE("Bug16772",
 	 "Test bug with restarting before NF handling is complete"){
   STEP(runBug16772);
+}
+TESTCASE("Bug18414",
+	 "Test bug with NF during NR"){
+  INITIALIZER(runLoadTable);
+  STEP(runBug18414);
+  FINALIZER(runClearTable);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
 

--- 1.12/ndb/test/ndbapi/testTimeout.cpp	2006-03-20 14:49:44 +01:00
+++ 1.13/ndb/test/ndbapi/testTimeout.cpp	2006-03-22 11:44:29 +01:00
@@ -173,8 +173,11 @@
       NdbSleep_MilliSleep(sleep);
       
       // Expect that transaction has timed-out
-      CHECK(hugoOps.execute_Commit(pNdb) == 237); 
-
+      int ret = hugoOps.execute_Commit(pNdb);
+      CHECK(ret != 0);
+      NdbError err = pNdb->getNdbError(ret);
+      CHECK(err.classification == NdbError::TimeoutExpired);
+      
     } while(false);
 
     hugoOps.closeTransaction(pNdb);
Thread
bk commit into 4.1 tree (jonas:1.2475) BUG#18414jonas22 Mar