List:Commits« Previous MessageNext Message »
From:jonas Date:March 5 2007 4:16pm
Subject:bk commit into 5.1 tree (jonas:1.2106) BUG#25984
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-05 16:16:01+01:00, jonas@stripped +3 -0
  ndb - wl2325-5.0
    Bug #25984 8 failed node restart kills alive cluster

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-03-05 16:16:00+01:00,
jonas@stripped +27 -2
    Bug #25984 8 failed node restart kills alive cluster

  storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-03-05 16:16:00+01:00,
jonas@stripped +98 -0
    Bug #25984 8 failed node restart kills alive cluster

  storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-03-05 16:16:00+01:00,
jonas@stripped +4 -0
    Bug #25984 8 failed node restart kills alive cluster

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop5

--- 1.44/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-05 16:16:04 +01:00
+++ 1.45/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-05 16:16:04 +01:00
@@ -524,6 +524,10 @@
 cmd: testDict
 args: -n TemporaryTables T1 T6 T7 T8 
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug25984
+
 #
 # TEST NDBAPI
 #

--- 1.58/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-05 16:16:04 +01:00
+++ 1.59/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-05 16:16:04 +01:00
@@ -1530,10 +1530,26 @@
        */
       SYSFILE->lastCompletedGCI[nodePtr.i] = 0;
       ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE);
-      warningEvent("Making filesystem for node %d unusable",
+      warningEvent("Making filesystem for node %d unusable (need --initial)",
 		   nodePtr.i);
     }
+    else if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
+	     SYSFILE->lastCompletedGCI[nodePtr.i] == 0)
+    {
+      jam();
+      CRASH_INSERTION(7170);
+      char buf[255];
+      BaseString::snprintf(buf, sizeof(buf), 
+			   "Cluster requires this node to be started "
+			   " with --initial as partial start has been performed"
+			   " and this filesystem is unusable");
+      progError(__LINE__, 
+		NDBD_EXIT_SR_RESTARTCONFLICT,
+		buf);
+      ndbrequire(false);
+    }
   }
+
   /**
    * This set which GCI we will try to restart to
    */
@@ -12232,14 +12248,23 @@
   /*       THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/
   /*       SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET.                 */
   /*----------------------------------------------------------------------*/
+  Uint32 lastGCI = SYSFILE->lastCompletedGCI[nodeId];
   arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, 8,
               NDBD_EXIT_MAX_CRASHED_REPLICAS);
   ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] = 
-    SYSFILE->lastCompletedGCI[nodeId];
+    lastGCI;
   ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
   ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] = 0;
   ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] = 
     (Uint32)-1;
+
+  if (ncrReplicaPtr.p->noCrashedReplicas == 7 && lastGCI)
+  {
+    jam();
+    SYSFILE->lastCompletedGCI[nodeId] = 0;
+    warningEvent("Making filesystem for node %d unusable (need --initial)",
+		 nodeId);
+  }
 }//Dbdih::newCrashedReplica()
 
 /*************************************************************************/

--- 1.31/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-05 16:16:05 +01:00
+++ 1.32/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-05 16:16:05 +01:00
@@ -277,6 +277,101 @@
 }
 
 
+int runBug25984(NDBT_Context* ctx, NDBT_Step* step){
+  
+  int result = NDBT_OK;
+  int loops = ctx->getNumLoops();
+  int records = ctx->getNumRecords();
+  NdbRestarter restarter;
+
+  if (restarter.getNumDbNodes() < 2)
+    return NDBT_OK;
+
+  if (restarter.restartAll(true, true, true))
+    return NDBT_FAILED;
+
+  if (restarter.waitClusterNoStart())
+    return NDBT_FAILED;
+
+  if (restarter.startAll())
+    return NDBT_FAILED;
+
+  if (restarter.waitClusterStarted())
+    return NDBT_FAILED;
+
+  int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };    
+  int master = restarter.getMasterNodeId();
+  int victim = restarter.getRandomNodeOtherNodeGroup(master, rand());
+  if (victim == -1)
+    victim = restarter.getRandomNodeSameNodeGroup(master, rand());
+
+  restarter.restartOneDbNode(victim, false, true, true);
+
+  for (Uint32 i = 0; i<6; i++)
+  {
+    ndbout_c("Loop: %d", i);
+    if (restarter.waitNodesNoStart(&victim, 1))
+      return NDBT_FAILED;
+    
+    if (restarter.dumpStateOneNode(victim, val2, 2))
+      return NDBT_FAILED;
+    
+    if (restarter.insertErrorInNode(victim, 7016))
+      return NDBT_FAILED;
+    
+    if (restarter.startNodes(&victim, 1))
+      return NDBT_FAILED;
+
+    if (restarter.waitNodesStartPhase(&victim, 1, 2))
+      return NDBT_FAILED;
+  }
+
+  if (restarter.waitNodesNoStart(&victim, 1))
+    return NDBT_FAILED;
+
+  if (restarter.dumpStateOneNode(victim, val2, 2))
+    return NDBT_FAILED;
+  
+  if (restarter.insertErrorInNode(victim, 7170))
+    return NDBT_FAILED;
+
+  if (restarter.startNodes(&victim, 1))
+    return NDBT_FAILED;
+
+  if (restarter.waitNodesNoStart(&victim, 1))
+    return NDBT_FAILED;
+  
+  if (restarter.restartAll(false, true, true))
+    return NDBT_FAILED;
+
+  if (restarter.insertErrorInAllNodes(932))
+    return NDBT_FAILED;
+
+  if (restarter.insertErrorInNode(master, 7170))
+    return NDBT_FAILED;
+
+  if (restarter.dumpStateAllNodes(val2, 2))
+    return NDBT_FAILED;
+  
+  restarter.startNodes(&master, 1);
+  NdbSleep_MilliSleep(3000);
+  restarter.startAll();
+
+  if (restarter.waitClusterNoStart())
+    return NDBT_FAILED;
+
+  if (restarter.restartOneDbNode(victim, true, true, true))
+    return NDBT_FAILED;
+
+  if (restarter.startAll())
+    return NDBT_FAILED;
+
+  if (restarter.waitClusterStarted())
+    return NDBT_FAILED;
+
+  return NDBT_OK;
+}
+
 
 int runRestarts(NDBT_Context* ctx, NDBT_Step* step){
   int result = NDBT_OK;
@@ -1536,6 +1631,9 @@
 }
 TESTCASE("Bug25554", ""){
   INITIALIZER(runBug25554);
+}
+TESTCASE("Bug25984", ""){
+  INITIALIZER(runBug25984);
 }
 TESTCASE("Bug26457", ""){
   INITIALIZER(runBug26457);
Thread
bk commit into 5.1 tree (jonas:1.2106) BUG#25984jonas5 Mar