List:Commits« Previous MessageNext Message »
From:jonas Date:March 30 2006 12:20pm
Subject:bk commit into 4.1 tree (jonas:1.2483) BUG#18612
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2483 06/03/30 14:20:54 jonas@stripped +3 -0
  ndb - bug#15695 bug#16447 bug#18612
    For various reasone have a partitioned cluster been created
    This patch makes sure that when they connect
    1) it's detected
    2) shutdown is forced

  ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
    1.17 06/03/30 14:20:52 jonas@stripped +251 -30
    1) Activly detect paritioned cluster(s)
    2) add some documentation

  ndb/src/kernel/blocks/qmgr/Qmgr.hpp
    1.5 06/03/30 14:20:52 jonas@stripped +17 -2
    1) Activly detect paritioned cluster(s)
    2) add some documentation

  ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp
    1.20 06/03/30 14:20:52 jonas@stripped +53 -13
    New dump/error insert for simulating network failure

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/41-work

--- 1.19/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp	2006-03-27 10:18:46 +02:00
+++ 1.20/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp	2006-03-30 14:20:52 +02:00
@@ -133,6 +133,9 @@
 {
 }
 
+#ifdef ERROR_INSERT
+NodeBitmask c_error_9000_nodes_mask;
+#endif
 
 void Cmvmi::execNDB_TAMPER(Signal* signal) 
 {
@@ -390,21 +393,33 @@
 
   const Uint32 len = signal->getLength();
   if(len == 2){
-    globalTransporterRegistry.do_connect(tStartingNode);
-    globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
 
-    //-----------------------------------------------------
-    // Report that the connection to the node is opened
-    //-----------------------------------------------------
-    signal->theData[0] = EventReport::CommunicationOpened;
-    signal->theData[1] = tStartingNode;
-    sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-    //-----------------------------------------------------
+#ifdef ERROR_INSERT
+    if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
+#endif
+    {
+      globalTransporterRegistry.do_connect(tStartingNode);
+      globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
+      
+      //-----------------------------------------------------
+      // Report that the connection to the node is opened
+      //-----------------------------------------------------
+      signal->theData[0] = EventReport::CommunicationOpened;
+      signal->theData[1] = tStartingNode;
+      sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
+      //-----------------------------------------------------
+    }
   } else {
     for(unsigned int i = 1; i < MAX_NODES; i++ ) {
       jam();
       if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
 	jam();
+
+#ifdef ERROR_INSERT
+	if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
+	  continue;
+#endif
+	
 	globalTransporterRegistry.do_connect(i);
 	globalTransporterRegistry.setIOState(i, HaltIO);
 	
@@ -1010,7 +1025,8 @@
   }
 
   DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
-  if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){
+  Uint32 arg = dumpState->args[0];
+  if (arg == DumpStateOrd::CmvmiDumpConnections){
     for(unsigned int i = 1; i < MAX_NODES; i++ ){
       const char* nodeTypeStr = "";
       switch(getNodeInfo(i).m_type){
@@ -1043,13 +1059,13 @@
     }
   }
   
-  if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){
+  if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
     infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
 	      g_sectionSegmentPool.getSize(),
 	      g_sectionSegmentPool.getNoOfFree());
   }
   
-  if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
+  if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
   {
     if(signal->getLength() == 1)
     {
@@ -1069,7 +1085,7 @@
     }
   }
 
-  if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) {
+  if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
     unsigned i;
     Uint32 loopCount = dumpState->args[1];
     const unsigned len0 = 11;
@@ -1096,6 +1112,30 @@
     ptr[1].sz = len1;
     sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
   }
+
+#ifdef ERROR_INSERT
+  if (arg == 9000)
+  {
+    SET_ERROR_INSERT_VALUE(9000);
+    for (Uint32 i = 1; i<signal->getLength(); i++)
+      c_error_9000_nodes_mask.set(signal->theData[i]);
+  }
+  
+  if (arg == 9001)
+  {
+    CLEAR_ERROR_INSERT_VALUE;
+    for (Uint32 i = 0; i<MAX_NODES; i++)
+    {
+      if (c_error_9000_nodes_mask.get(i))
+      {
+	signal->theData[0] = 0;
+	signal->theData[1] = i;
+	EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
+      }
+    }
+    c_error_9000_nodes_mask.clear();
+  }
+#endif
 
 #ifdef VM_TRACE
 #if 0

--- 1.4/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2005-08-18 14:04:50 +02:00
+++ 1.5/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2006-03-30 14:20:52 +02:00
@@ -100,7 +100,12 @@
   };
 
   struct StartRecord {
-    void reset(){ m_startKey++; m_startNode = 0;}
+    void reset(){ 
+      m_startKey++; 
+      m_startNode = 0; 
+      m_gsn = RNIL; 
+      m_nodes.clearWaitingFor();
+    }
     Uint32 m_startKey;
     Uint32 m_startNode;
     Uint64 m_startTimeout;
@@ -112,6 +117,14 @@
   NdbNodeBitmask c_definedNodes; // DB nodes in config
   NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
   NodeBitmask c_connectedNodes;  // All kinds of connected nodes
+
+  /**
+   * Nodes which we're checking for partitioned cluster
+   *
+   * i.e. nodes that connect to use, when we already have elected president
+   */
+  NdbNodeBitmask c_cmregreq_nodes;
+  
   Uint32 c_maxDynamicId;
   
   // Records
@@ -251,8 +264,10 @@
 
   // Generated statement blocks
   void startphase1(Signal* signal);
-  void electionWon();
+  void electionWon(Signal* signal);
   void cmInfoconf010Lab(Signal* signal);
+  bool check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
+  
   void apiHbHandlingLab(Signal* signal);
   void timerHandlingLab(Signal* signal);
   void hbReceivedLab(Signal* signal);

--- 1.16/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2006-03-17 10:55:00 +01:00
+++ 1.17/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2006-03-30 14:20:52 +02:00
@@ -56,6 +56,33 @@
 #define DEBUG_START3(signal, msg)
 #endif
 
+/**
+ * c_start.m_gsn = GSN_CM_REGREQ
+ *   Possible for all nodes
+ *   c_start.m_nodes contains all nodes in config
+ *
+ * c_start.m_gsn = GSN_CM_NODEINFOREQ;
+ *   Set when receiving CM_REGCONF
+ *   State possible for starting node only (not in cluster)
+ *
+ *   c_start.m_nodes contains all node in alive cluster that
+ *                   that has not replied to GSN_CM_NODEINFOREQ
+ *                   passed by president in GSN_CM_REGCONF
+ *
+ * c_start.m_gsn = GSN_CM_ADD
+ *   Possible for president only
+ *   Set when receiving and accepting CM_REGREQ (to include node)
+ *
+ *   c_start.m_nodes contains all nodes in alive cluster + starting node
+ *                   that has not replied to GSN_CM_ADD
+ *                   by sending GSN_CM_ACKADD
+ *
+ * c_start.m_gsn = GSN_CM_NODEINFOCONF
+ *   Possible for non presidents only
+ *     c_start.m_nodes contains a node that has been accepted by president
+ *     but has not connected to us yet
+ */
+
 // Signal entries and statement blocks
 /* 4  P R O G R A M        */
 /*******************************/
@@ -259,18 +286,24 @@
 {
   jamEntry();
   const Uint32 nodeId = signal->theData[0];
+
+  if (ERROR_INSERTED(931))
+  {
+    jam();
+    ndbout_c("Discarding CONNECT_REP(%d)", nodeId);
+    infoEvent("Discarding CONNECT_REP(%d)", nodeId);
+    return;
+  }
+  
   c_connectedNodes.set(nodeId);
   NodeRecPtr nodePtr;
   nodePtr.i = getOwnNodeId();
   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
   switch(nodePtr.p->phase){
-  case ZSTARTING:
   case ZRUNNING:
+    ndbrequire(!c_clusterNodes.get(nodeId));
+  case ZSTARTING:
     jam();
-    if(!c_start.m_nodes.isWaitingFor(nodeId)){
-      jam();
-      return;
-    }
     break;
   case ZPREPARE_FAIL:
   case ZFAIL_CLOSING:
@@ -282,32 +315,64 @@
   case ZAPI_INACTIVE:
     return;
   }
-  
+
+  if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
+  {
+    jam();
+    return;
+  }
+
   switch(c_start.m_gsn){
   case GSN_CM_REGREQ:
     jam();
     sendCmRegReq(signal, nodeId);
+
+    /**
+     * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
+     *   nodes
+     */
+    ndbrequire(nodePtr.p->phase == ZSTARTING);
+    ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
     return;
   case GSN_CM_NODEINFOREQ:
     jam();
-    sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
+
+    if (c_start.m_nodes.isWaitingFor(nodeId))
+    {
+      jam();
+      ndbrequire(getOwnNodeId() != cpresident);
+      ndbrequire(nodePtr.p->phase == ZSTARTING);
+      sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
+      return;
+    }
     return;
-  case GSN_CM_ADD:{
+  case GSN_CM_NODEINFOCONF:{
     jam();
-
-    ndbrequire(getOwnNodeId() != cpresident);
-    c_start.m_nodes.clearWaitingFor(nodeId);
-    c_start.m_gsn = RNIL;
     
-    NodeRecPtr addNodePtr;
-    addNodePtr.i = nodeId;
-    ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
-    cmAddPrepare(signal, addNodePtr, nodePtr.p);
-    return;
+    ndbrequire(getOwnNodeId() != cpresident);
+    ndbrequire(nodePtr.p->phase == ZRUNNING);
+    if (c_start.m_nodes.isWaitingFor(nodeId))
+    {
+      jam();
+      c_start.m_nodes.clearWaitingFor(nodeId);
+      c_start.m_gsn = RNIL;
+      
+      NodeRecPtr addNodePtr;
+      addNodePtr.i = nodeId;
+      ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
+      cmAddPrepare(signal, addNodePtr, nodePtr.p);
+      return;
+    }
   }
   default:
-    return;
+    (void)1;
   }
+  
+  ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
+  ndbrequire(!c_cmregreq_nodes.get(nodeId));
+  c_cmregreq_nodes.set(nodeId);
+  sendCmRegReq(signal, nodeId);  
+  c_regReqReqSent--;
   return;
 }//Qmgr::execCONNECT_REP()
 
@@ -601,22 +666,39 @@
   jamEntry();
 
   const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
+  Uint32 presidentNodeId = cmRegConf->presidentNodeId;
+
+  if (check_cmregreq_reply(signal, presidentNodeId, GSN_CM_REGCONF))
+  {
+    jam();
+    return;
+  }
 
   if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
     jam();
     char buf[128];
-    BaseString::snprintf(buf,sizeof(buf),"incompatible version own=0x%x other=0x%x, shutting down", NDB_VERSION, cmRegConf->presidentVersion);
+    BaseString::snprintf(buf,sizeof(buf), 
+			 "incompatible version own=0x%x other=0x%x, "
+			 " shutting down", 
+			 NDB_VERSION, cmRegConf->presidentVersion);
     systemErrorLab(signal, __LINE__, buf);
     return;
   }
 
-
+  myNodePtr.i = getOwnNodeId();
+  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
+  
+  ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
+  ndbrequire(myNodePtr.p->phase = ZSTARTING);
+  
   cpdistref    = cmRegConf->presidentBlockRef;
   cpresident   = cmRegConf->presidentNodeId;
   UintR TdynamicId   = cmRegConf->dynamicId;
   c_maxDynamicId = TdynamicId;
   c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
 
+  myNodePtr.p->ndynamicId = TdynamicId;
+  
 /*--------------------------------------------------------------*/
 // Send this as an EVENT REPORT to inform about hearing about
 // other NDB node proclaiming to be president.
@@ -627,10 +709,6 @@
   signal->theData[3] = TdynamicId;
   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
 
-  myNodePtr.i = getOwnNodeId();
-  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
-  myNodePtr.p->ndynamicId = TdynamicId;
-
   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
     jam();
     if (c_clusterNodes.get(nodePtr.i)){
@@ -653,6 +731,134 @@
   return;
 }//Qmgr::execCM_REGCONF()
 
+bool
+Qmgr::check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
+{
+  NodeRecPtr myNodePtr;
+  myNodePtr.i = getOwnNodeId();
+  ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
+  
+  NodeRecPtr nodePtr;
+  nodePtr.i = nodeId;
+  ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
+  
+  /**
+   * Try to decide if replying node
+   *   knows who is president
+   */
+  Uint32 president_reply = RNIL;
+  switch(gsn){
+  case GSN_CM_REGREF:{
+    jam();
+    CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
+    switch(ref->errorCode){
+    case CmRegRef::ZBUSY:
+    case CmRegRef::ZBUSY_PRESIDENT:
+    case CmRegRef::ZBUSY_TO_PRES:
+      jam();
+      /**
+       * Only president replies this
+       */
+      ndbrequire(nodeId == ref->presidentCandidate);
+      president_reply = nodeId;
+      break;
+    case CmRegRef::ZNOT_PRESIDENT:
+      jam();
+      president_reply = ref->presidentCandidate;
+      break;
+    case CmRegRef::ZNOT_IN_CFG:
+    case CmRegRef::ZNOT_DEAD:
+    case CmRegRef::ZELECTION:
+      // Neither of these replies give certain president knowledge
+      jam();
+    }
+    break;
+  }
+  case GSN_CM_REGCONF:
+    jam();
+    president_reply = nodeId;
+    break;
+  }
+  
+  char buf[256];
+  switch(c_start.m_gsn){
+  case GSN_CM_REGREQ:
+    jam();
+    ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
+    ndbrequire(c_cmregreq_nodes.isclear());    
+    ndbrequire(myNodePtr.p->phase == ZSTARTING);
+    return false;
+  case GSN_CM_NODEINFOREQ:
+    jam();
+
+    ndbrequire(myNodePtr.p->phase == ZSTARTING);
+    if (c_start.m_nodes.isWaitingFor(nodeId))
+    {
+      jam();
+      /**
+       * We're waiting for CM_NODEINFO
+       */
+      if (gsn == GSN_CM_REGREF)
+      {
+	jam();
+	return false;
+      }
+      
+      jam();
+      BaseString::snprintf(buf, sizeof(buf), 
+			   "Partitioned cluster! check StartPartialTimeout, "
+			   " received CM_REGCONF from %d"
+			   " while waiting for GSN_CM_NODEINFOCONF."
+			   " president=%d", 
+			   nodeId, cpresident);
+      goto die_direct;
+    }
+    
+    goto check_reply;
+  default:
+  case GSN_CM_NODEINFOCONF:
+    jam();
+    ndbrequire(myNodePtr.p->phase == ZRUNNING);
+    goto check_reply;
+  }
+  
+check_reply:
+  jam();
+  c_cmregreq_nodes.clear(nodeId);
+  
+  if (gsn == GSN_CM_REGCONF)
+  {
+    jam();
+    BaseString::snprintf(buf, sizeof(buf),
+			 "Partitioned cluster! check StartPartialTimeout, "
+			 " received CM_REGCONF"
+			 " from %d I think president: %d",
+			 nodeId, cpresident);
+    goto die_direct;
+  }
+  
+  if (president_reply != RNIL && president_reply != cpresident)
+  {
+    jam();
+    BaseString::snprintf(buf, sizeof(buf),
+			 "Partitioned cluster! check StartPartialTimeout, "
+			 " received CM_REGREF from %d specifying president as"
+			 " %d, president: %d",
+			 nodeId, president_reply, cpresident);
+    goto die_direct;
+  }
+  
+  return false;
+
+die_direct:
+  ndbout_c(buf);
+  progError(__LINE__, 
+	    ERR_ARBIT_SHUTDOWN, 
+	    buf);
+  
+  ndbrequire(false);
+}
+
 void
 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
@@ -685,13 +891,21 @@
 void Qmgr::execCM_REGREF(Signal* signal) 
 {
   jamEntry();
-  c_regReqReqRecv++;
 
-  // Ignore block reference in data[0]
   UintR TaddNodeno = signal->theData[1];
   UintR TrefuseReason = signal->theData[2];
   Uint32 candidate = signal->theData[3];
   DEBUG_START3(signal, TrefuseReason);
+
+  if (check_cmregreq_reply(signal, TaddNodeno, GSN_CM_REGREF))
+  {
+    jam();
+    return;
+  }
+
+  c_regReqReqRecv++;
+
+  // Ignore block reference in data[0]
   
   if(candidate != cpresidentCandidate){
     jam();
@@ -779,7 +993,7 @@
   Uint64 now = NdbTick_CurrentMillisecond();
   if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){
     jam();
-    electionWon();
+    electionWon(signal);
     sendSttorryLab(signal);
     
     /**
@@ -793,7 +1007,7 @@
 }//Qmgr::execCM_REGREF()
 
 void
-Qmgr::electionWon(){
+Qmgr::electionWon(Signal* signal){
   NodeRecPtr myNodePtr;
   cpresident = getOwnNodeId(); /* This node becomes president. */
   myNodePtr.i = getOwnNodeId();
@@ -812,6 +1026,12 @@
   cpresidentAlive = ZTRUE;
   c_stopElectionTime = ~0;
   c_start.reset();
+
+  signal->theData[0] = EventReport::CM_REGCONF;
+  signal->theData[1] = getOwnNodeId();
+  signal->theData[2] = cpresident;
+  signal->theData[3] = 1;
+  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
 }
 
 /*
@@ -946,7 +1166,7 @@
     ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
     c_start.m_nodes.clearWaitingFor();
     c_start.m_nodes.setWaitingFor(nodePtr.i);
-    c_start.m_gsn = GSN_CM_ADD;
+    c_start.m_gsn = GSN_CM_NODEINFOCONF;
 #else
     warningEvent("Enabling communication to CM_ADD node %u state=%d", 
 		 nodePtr.i,
@@ -1847,7 +2067,8 @@
   const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
   const Uint32 nodeId = rep->nodeId;
   c_connectedNodes.clear(nodeId);
-
+  c_cmregreq_nodes.clear(nodeId);
+  
   NodeRecPtr nodePtr;
   nodePtr.i = getOwnNodeId();
   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
Thread
bk commit into 4.1 tree (jonas:1.2483) BUG#18612jonas30 Mar