List:Commits« Previous MessageNext Message »
From:jonas Date:March 5 2007 2:54pm
Subject:bk commit into 5.1 tree (jonas:1.2104) BUG#26457
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-05 15:54:06+01:00, jonas@stripped +6 -0
  ndb - wl2325-5.0
    Bug#26457 Master failure during master failure

  storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2007-03-05 15:54:05+01:00, jonas@stripped +3 -1
    Bug #26457 Master failure duing master failure

  storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp@stripped, 2007-03-05 15:54:05+01:00, jonas@stripped +26 -1
    Bug #26457 Master failure duing master failure

  storage/ndb/test/include/NdbRestarter.hpp@stripped, 2007-03-05 15:54:05+01:00, jonas@stripped +2 -0
    Bug #26457 Master failure duing master failure

  storage/ndb/test/ndbapi/testNodeRestart.cpp@stripped, 2007-03-05 15:54:05+01:00, jonas@stripped +42 -0
    Bug #26457 Master failure duing master failure

  storage/ndb/test/run-test/daily-basic-tests.txt@stripped, 2007-03-05 15:54:05+01:00, jonas@stripped +4 -0
    Bug #26457 Master failure duing master failure

  storage/ndb/test/src/NdbRestarter.cpp@stripped, 2007-03-05 15:54:05+01:00, jonas@stripped +62 -0
    Bug #26457 Master failure duing master failure

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop5

--- 1.42/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-05 15:54:10 +01:00
+++ 1.43/storage/ndb/test/run-test/daily-basic-tests.txt	2007-03-05 15:54:10 +01:00
@@ -465,6 +465,10 @@
 cmd: testNodeRestart
 args: -n Bug25554 T1
 
+max-time: 1000
+cmd: testNodeRestart
+args: -n Bug26457 T1
+
 # OLD FLEX
 max-time: 500
 cmd: flexBench

--- 1.19/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-03-05 15:54:10 +01:00
+++ 1.20/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2007-03-05 15:54:10 +01:00
@@ -5,7 +5,7 @@
 Next DBTUP 4013
 Next DBLQH 5042
 Next DBDICT 6007
-Next DBDIH 7178
+Next DBDIH 7181
 Next DBTC 8039
 Next CMVMI 9000
 Next BACKUP 10022
@@ -71,6 +71,8 @@
 7031: Delay in GCP_PREPARE and die 3s later
 
 7177: Delay copying of sysfileData in execCOPY_GCIREQ
+
+7180: Crash master during master-take-over in execMASTER_LCPCONF
 
 ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
 -----------------------------------------------------------------

--- 1.56/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-05 15:54:10 +01:00
+++ 1.57/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2007-03-05 15:54:10 +01:00
@@ -4585,6 +4585,8 @@
 Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
   jam();
 
+  Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
+
   c_lcpMasterTakeOverState.minTableId = ~0;
   c_lcpMasterTakeOverState.minFragId = ~0;
   c_lcpMasterTakeOverState.failedNodeId = nodeId;
@@ -4603,7 +4605,20 @@
     /**
      * Node failure during master take over...
      */
-    ndbout_c("Nodefail during master take over");
+    ndbout_c("Nodefail during master take over (old: %d)", oldNode);
+  }
+  
+  NodeRecordPtr nodePtr;
+  nodePtr.i = oldNode;
+  if (oldNode > 0 && oldNode < MAX_NDB_NODES)
+  {
+    jam();
+    ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
+    if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
+    {
+      jam();
+      checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
+    }
   }
   
   setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
@@ -5619,6 +5634,14 @@
   jamEntry();
   const BlockReference newMasterBlockref = req->masterRef;
 
+  if (newMasterBlockref != cmasterdihref)
+  {
+    jam();
+    ndbout_c("resending GSN_MASTER_LCPREQ");
+    sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
+			signal->getLength(), 50);
+    return;
+  }
   Uint32 failedNodeId = req->failedNodeId;
 
   /**
@@ -5914,6 +5937,8 @@
   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
   nodePtr.p->lcpStateAtTakeOver = lcpState;
 
+  CRASH_INSERTION(7180);
+  
 #ifdef VM_TRACE
   ndbout_c("MASTER_LCPCONF");
   printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);

--- 1.6/storage/ndb/test/include/NdbRestarter.hpp	2007-03-05 15:54:10 +01:00
+++ 1.7/storage/ndb/test/include/NdbRestarter.hpp	2007-03-05 15:54:10 +01:00
@@ -62,6 +62,8 @@
   int dumpStateAllNodes(int * _args, int _num_args);
 
   int getMasterNodeId();
+  int getNextMasterNodeId(int nodeId);
+  int getNodeGroup(int nodeId);
   int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
   int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
   int getRandomNotMasterNodeId(int randomNumber);

--- 1.29/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-05 15:54:10 +01:00
+++ 1.30/storage/ndb/test/ndbapi/testNodeRestart.cpp	2007-03-05 15:54:10 +01:00
@@ -1129,6 +1129,45 @@
   return NDBT_OK;
 }
 
+int
+runBug26457(NDBT_Context* ctx, NDBT_Step* step)
+{
+  NdbRestarter res;
+  if (res.getNumDbNodes() < 4)
+    return NDBT_OK;
+
+  int loops = ctx->getNumLoops();
+  while (loops --)
+  {
+retry:
+    int master = res.getMasterNodeId();
+    int next = res.getNextMasterNodeId(master);
+
+    ndbout_c("master: %d next: %d", master, next);
+
+    if (res.getNodeGroup(master) == res.getNodeGroup(next))
+    {
+      res.restartOneDbNode(next, false, false, true);
+      if (res.waitClusterStarted())
+	return NDBT_FAILED;
+      goto retry;
+    }
+
+    int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 };
+    
+    if (res.dumpStateOneNode(next, val2, 2))
+      return NDBT_FAILED;
+    
+    if (res.insertErrorInNode(next, 7180))
+      return NDBT_FAILED;
+    
+    res.restartOneDbNode(master, false, false, true);
+    if (res.waitClusterStarted())
+      return NDBT_FAILED;
+  }
+  
+  return NDBT_OK;
+}
 
 NDBT_TESTSUITE(testNodeRestart);
 TESTCASE("NoLoad", 
@@ -1457,6 +1496,9 @@
 }
 TESTCASE("Bug25554", ""){
   INITIALIZER(runBug25554);
+}
+TESTCASE("Bug26457", ""){
+  INITIALIZER(runBug26457);
 }
 NDBT_TESTSUITE_END(testNodeRestart);
 

--- 1.13/storage/ndb/test/src/NdbRestarter.cpp	2007-03-05 15:54:10 +01:00
+++ 1.14/storage/ndb/test/src/NdbRestarter.cpp	2007-03-05 15:54:10 +01:00
@@ -129,6 +129,68 @@
 }
 
 int
+NdbRestarter::getNodeGroup(int nodeId){
+  if (!isConnected())
+    return -1;
+  
+  if (getStatus() != 0)
+    return -1;
+  
+  for(size_t i = 0; i < ndbNodes.size(); i++)
+  {
+    if(ndbNodes[i].node_id == nodeId)
+    {
+      return ndbNodes[i].node_group;
+    }
+  }
+  
+  return -1;
+}
+
+int
+NdbRestarter::getNextMasterNodeId(int nodeId){
+  if (!isConnected())
+    return -1;
+  
+  if (getStatus() != 0)
+    return -1;
+  
+  size_t i;
+  for(i = 0; i < ndbNodes.size(); i++)
+  {
+    if(ndbNodes[i].node_id == nodeId)
+    {
+      break;
+    }
+  }
+  assert(i < ndbNodes.size());
+  if (i == ndbNodes.size())
+    return -1;
+
+  int dynid = ndbNodes[i].dynamic_id;
+  int minid = dynid;
+  for (i = 0; i<ndbNodes.size(); i++)
+    if (ndbNodes[i].dynamic_id > minid)
+      minid = ndbNodes[i].dynamic_id;
+  
+  for (i = 0; i<ndbNodes.size(); i++)
+    if (ndbNodes[i].dynamic_id > dynid && 
+	ndbNodes[i].dynamic_id < minid)
+    {
+      minid = ndbNodes[i].dynamic_id;
+    }
+  
+  if (minid != ~0)
+  {
+    for (i = 0; i<ndbNodes.size(); i++)
+      if (ndbNodes[i].dynamic_id == minid)
+	return ndbNodes[i].node_id;
+  }
+  
+  return getMasterNodeId();
+}
+
+int
 NdbRestarter::getRandomNotMasterNodeId(int rand){
   int master = getMasterNodeId();
   if(master == -1)
Thread
bk commit into 5.1 tree (jonas:1.2104) BUG#26457jonas5 Mar