List:Commits« Previous MessageNext Message »
From:tomas Date:June 1 2006 10:46am
Subject:bk commit into 5.1 tree (tomas:1.2025) BUG#20185
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2025 06/06/01 10:46:09 tomas@stripped +5 -0
  Bug #20185  	Node failure might cause other node failure

  storage/ndb/test/run-test/daily-basic-tests.txt
    1.36 06/06/01 10:46:00 tomas@stripped +4 -0
    Bug #20185  	Node failure might cause other node failure

  storage/ndb/test/ndbapi/testNodeRestart.cpp
    1.24 06/06/01 10:46:00 tomas@stripped +56 -0
    Bug #20185  	Node failure might cause other node failure

  storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
    1.99 06/06/01 10:46:00 tomas@stripped +5 -3
    Bug #20185  	Node failure might cause other node failure

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
    1.46 06/06/01 10:46:00 tomas@stripped +24 -0
    Bug #20185  	Node failure might cause other node failure

  storage/ndb/src/kernel/blocks/ERROR_codes.txt
    1.15 06/06/01 10:46:00 tomas@stripped +3 -0
    Bug #20185  	Node failure might cause other node failure

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	poseidon.ndb.mysql.com
# Root:	/home/tomas/wl2325-alcatel

--- 1.35/storage/ndb/test/run-test/daily-basic-tests.txt	2006-04-05 16:05:36 +02:00
+++ 1.36/storage/ndb/test/run-test/daily-basic-tests.txt	2006-06-01 10:46:00 +02:00
@@ -437,6 +437,10 @@
 cmd: testNodeRestart
 args: -n Bug18414 T1
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug20185 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

--- 1.14/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2006-03-24 17:11:35 +01:00
+++ 1.15/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2006-06-01 10:46:00 +02:00
@@ -61,6 +61,9 @@
 5007:
 Delay GCP_SAVEREQ by 10 secs
 
+7030: Delay in GCP_PREPARE until node has completed a node failure
+7031: Delay in GCP_PREPARE and die 3s later
+
 ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------
 

--- 1.45/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-04-07 12:24:10 +02:00
+++ 1.46/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2006-06-01 10:46:00 +02:00
@@ -5398,6 +5398,12 @@
     return;
   }
 
+  if (ERROR_INSERTED(7030))
+  {
+    ndbout_c("Reenable GCP_PREPARE");
+    CLEAR_ERROR_INSERT_VALUE;
+  }
+  
   NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
   nf->blockNo = DBDIH;
   nf->nodeId = cownNodeId;
@@ -7501,6 +7507,16 @@
 {
   jamEntry();
   CRASH_INSERTION(7005);
+
+  if (ERROR_INSERTED(7030))
+  {
+    cgckptflag = true;
+    ndbout_c("Delayed GCP_PREPARE 5s");
+    sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000,
+			signal->getLength());
+    return;
+  }
+  
   Uint32 masterNodeId = signal->theData[0];
   Uint32 gci = signal->theData[1];
   BlockReference retRef = calcDihBlockRef(masterNodeId);
@@ -7513,6 +7529,14 @@
   cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED;
   cnewgcp = gci;
 
+  if (ERROR_INSERTED(7031))
+  {
+    ndbout_c("Crashing delayed in GCP_PREPARE 3s");
+    signal->theData[0] = 9999;
+    sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1);
+    return;
+  }
+  
   signal->theData[0] = cownNodeId;
   signal->theData[1] = gci;  
   sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA);

--- 1.98/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-04-26 09:46:00 +02:00
+++ 1.99/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2006-06-01 10:46:00 +02:00
@@ -7084,18 +7084,20 @@
 {
   jam();
   Ptr<ApiConnectRecord> transPtr;
+  Uint32 TtcTimer = ctcTimer;
+  Uint32 TapplTimeout = c_appl_timeout_value;
   for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
   {
     ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); 
     if (transPtr.p->m_transaction_nodes.get(failedNodeId))
     {
       jam();
+
       // Force timeout regardless of state      
-      Uint32 save = c_appl_timeout_value;
       c_appl_timeout_value = 1;
-      setApiConTimer(transPtr.i, 0, __LINE__);
+      setApiConTimer(transPtr.i, TtcTimer - 2, __LINE__);
       timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
-      c_appl_timeout_value = save;
+      c_appl_timeout_value = TapplTimeout;
     }
     
     // Send CONTINUEB to continue later

--- 1.23/storage/ndb/test/ndbapi/testNodeRestart.cpp	2006-04-05 16:05:36 +02:00
+++ 1.24/storage/ndb/test/ndbapi/testNodeRestart.cpp	2006-06-01 10:46:00 +02:00
@@ -868,6 +868,56 @@
   return NDBT_OK;
 }
 
+int runBug20185(NDBT_Context* ctx, NDBT_Step* step){
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  NdbRestarter restarter;
+  HugoOperations hugoOps(*ctx->getTab());
+  Ndb* pNdb = GETNDB(step);
+  
+  int dump[] = { 7090, 20 } ;
+  if (restarter.dumpStateAllNodes(dump, 2))
+    return NDBT_FAILED;
+  
+  NdbSleep_MilliSleep(3000);
+
+  if(hugoOps.startTransaction(pNdb) != 0)
+    return NDBT_FAILED;
+  
+  if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0)
+    return NDBT_FAILED;
+  
+  if (hugoOps.execute_NoCommit(pNdb) != 0)
+    return NDBT_FAILED;
+  
+  int nodeId;
+  const int node = hugoOps.getTransaction()->getConnectedNodeId();
+  do {
+    nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
+  } while (nodeId == node);
+  
+  if (restarter.insertErrorInAllNodes(7030))
+    return NDBT_FAILED;
+  
+  if (restarter.insertErrorInNode(nodeId, 7031))
+    return NDBT_FAILED;
+  
+  NdbSleep_MilliSleep(500);
+  
+  if (hugoOps.execute_Commit(pNdb) == 0)
+    return NDBT_FAILED;
+
+  NdbSleep_MilliSleep(3000);
+
+  restarter.waitClusterStarted();
+  
+  if (restarter.dumpStateAllNodes(dump, 1))
+    return NDBT_FAILED;
+  
+  return NDBT_OK;
+}
+
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -1173,6 +1223,12 @@
 	 "Test bug with partitioned clusters"){
   INITIALIZER(runLoadTable);
   STEP(runBug18612SR);
+  FINALIZER(runClearTable);
+}
+TESTCASE("Bug20185",
+	 ""){
+  INITIALIZER(runLoadTable);
+  STEP(runBug20185);
   FINALIZER(runClearTable);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
Thread
bk commit into 5.1 tree (tomas:1.2025) BUG#20185tomas1 Jun