List:Commits« Previous MessageNext Message »
From:jonas Date:May 30 2007 10:01pm
Subject:bk commit into 5.1 tree (jonas:1.2140)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-05-30 22:01:43+02:00, jonas@stripped +2 -0
  Merge perch.ndb.mysql.com:/home/jonas/src/drop5
  into  perch.ndb.mysql.com:/home/jonas/src/drop6
  MERGE: 1.2071.1.79

  storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp@stripped, 2007-05-30 22:01:41+02:00,
jonas@stripped +0 -0
    Auto merged
    MERGE: 1.12.1.6

  storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2007-05-30 22:01:41+02:00,
jonas@stripped +0 -0
    Auto merged
    MERGE: 1.30.1.11

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop6/RESYNC

--- 1.19/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2007-05-30 22:01:46 +02:00
+++ 1.20/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2007-05-30 22:01:46 +02:00
@@ -266,6 +266,8 @@
   void execALLOC_NODEID_CONF(Signal *);
   void execALLOC_NODEID_REF(Signal *);
   void completeAllocNodeIdReq(Signal *);
+  
+  void execSTART_ORD(Signal*);
 
   // Arbitration signals
   void execARBIT_CFG(Signal* signal);
@@ -282,6 +284,7 @@
   void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
   Uint32 check_startup(Signal* signal);
 
+  void api_failed(Signal* signal, Uint32 aFailedNode);
   void node_failed(Signal* signal, Uint16 aFailedNode);
   void checkStartInterface(Signal* signal);
   void failReport(Signal* signal,

--- 1.39/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2007-05-30 22:01:46 +02:00
+++ 1.40/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2007-05-30 22:01:46 +02:00
@@ -237,6 +237,16 @@
 	     ReadConfigConf::SignalLength, JBB);
 }
 
+void
+Qmgr::execSTART_ORD(Signal* signal)
+{
+  /**
+   * Start timer handling 
+   */
+  signal->theData[0] = ZTIMER_HANDLING;
+  sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 1, JBB);
+}
+
 /*
 4.2  ADD NODE MODULE*/
 /*##########################################################################*/
@@ -1179,12 +1189,6 @@
   {
     jam();
     electionWon(signal);
-    
-    /**
-     * Start timer handling 
-     */
-    signal->theData[0] = ZTIMER_HANDLING;
-    sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB);
   }
   
   return;
@@ -1823,12 +1827,6 @@
   
   sendSttorryLab(signal);
   
-  /**
-   * Start timer handling 
-   */
-  signal->theData[0] = ZTIMER_HANDLING;
-  sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB);
-  
   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
 }
 
@@ -2217,20 +2215,22 @@
       hb_check_timer.reset();
     }
   }
-
+  
   if (interface_check_timer.check(TcurrentTime)) {
     jam();
     interface_check_timer.reset();
     checkStartInterface(signal);
   }
 
+  if (hb_api_timer.check(TcurrentTime)) 
+  {
+    jam();
+    hb_api_timer.reset();
+    apiHbHandlingLab(signal);
+  }
+
   if (cactivateApiCheck != 0) {
     jam();
-    if (hb_api_timer.check(TcurrentTime)) {
-      jam();
-      hb_api_timer.reset();
-      apiHbHandlingLab(signal);
-    }//if
     if (clatestTransactionCheck == 0) {
       //-------------------------------------------------------------
       // Initialise the Transaction check timer.
@@ -2347,18 +2347,21 @@
     if(type == NodeInfo::INVALID)
       continue;
 
-    if (TnodePtr.p->phase == ZAPI_ACTIVE){
+    if (c_connectedNodes.get(nodeId))
+    {
       jam();
       setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
       
-      if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2){
+      if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2)
+      {
 	signal->theData[0] = NDB_LE_MissedHeartbeat;
 	signal->theData[1] = nodeId;
 	signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
       }
       
-      if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) {
+      if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) 
+      {
         jam();
 	/*------------------------------------------------------------------*/
 	/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS. 
@@ -2370,8 +2373,8 @@
 	signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
 	signal->theData[1] = nodeId;
 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
-        node_failed(signal, nodeId);
+        
+        api_failed(signal, nodeId);
       }//if
     }//if
   }//for
@@ -2461,26 +2464,6 @@
   sendSignal(DBTC_REF, GSN_API_FAILREQ, signal, 2, JBA);
   sendSignal(DBDICT_REF, GSN_API_FAILREQ, signal, 2, JBA);
   sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
-
-  /**-------------------------------------------------------------------------
-   * THE OTHER NODE WAS AN API NODE. THE COMMUNICATION LINK IS ALREADY 
-   * BROKEN AND THUS NO ACTION IS NEEDED TO BREAK THE CONNECTION. 
-   * WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW 
-   * SECONDS. 
-   *-------------------------------------------------------------------------*/
-  setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
-  setNodeInfo(failedNodePtr.i).m_version = 0;
-  recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
-  
-  CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
-
-  closeCom->xxxBlockRef = reference();
-  closeCom->failNo      = 0;
-  closeCom->noOfNodes   = 1;
-  NodeBitmask::clear(closeCom->theNodes);
-  NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
-  sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, 
-	     CloseComReqConf::SignalLength, JBA);
 }//Qmgr::sendApiFailReq()
 
 void Qmgr::execAPI_FAILREQ(Signal* signal)
@@ -2493,20 +2476,7 @@
   
   ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
 
-  // ignore if api not active
-  if (failedNodePtr.p->phase != ZAPI_ACTIVE)
-  {
-    jam();
-    // But send to SUMA anyway...
-    sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
-    return;
-  }
-
-  signal->theData[0] = NDB_LE_Disconnected;
-  signal->theData[1] = failedNodePtr.i;
-  sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
-
-  node_failed(signal, failedNodePtr.i);
+  api_failed(signal, signal->theData[0]);
 }
 
 void Qmgr::execAPI_FAILCONF(Signal* signal) 
@@ -2635,6 +2605,13 @@
     ndbrequire(false);
   }
   
+  if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
+  {
+    jam();
+    api_failed(signal, nodeId);
+    return;
+  }
+
   switch(nodePtr.p->phase){
   case ZRUNNING:
     jam();
@@ -2671,66 +2648,109 @@
   failedNodePtr.i = aFailedNode;
   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
 
-  if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB){
+  ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
+  
+  /**---------------------------------------------------------------------
+   *   THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT 
+   *   FAILURE WAS DISCOVERED.
+   *---------------------------------------------------------------------*/
+  switch(failedNodePtr.p->phase){
+  case ZRUNNING:
     jam();
-    /**---------------------------------------------------------------------
-     *   THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT 
-     *   FAILURE WAS DISCOVERED.
-     *---------------------------------------------------------------------*/
-    switch(failedNodePtr.p->phase){
-    case ZRUNNING:
-      jam();
-      failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
-      return;
-    case ZFAIL_CLOSING:
-      jam();
-      return;
-    case ZSTARTING:
-      c_start.reset();
-      // Fall-through
-    default:
-      jam();
-      /*---------------------------------------------------------------------*/
-      // The other node is still not in the cluster but disconnected. 
-      // We must restart communication in three seconds.
-      /*---------------------------------------------------------------------*/
-      failedNodePtr.p->failState = NORMAL;
-      failedNodePtr.p->phase = ZFAIL_CLOSING;
-      setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
-
-      CloseComReqConf * const closeCom = 
-	(CloseComReqConf *)&signal->theData[0];
-
-      closeCom->xxxBlockRef = reference();
-      closeCom->failNo      = 0;
-      closeCom->noOfNodes   = 1;
-      NodeBitmask::clear(closeCom->theNodes);
-      NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
-      sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, 
-		 CloseComReqConf::SignalLength, JBA);
-    }//if
+    failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE);
     return;
-  }
-
-  /**
-   * API code
-   */
-  jam();
-  if (failedNodePtr.p->phase != ZFAIL_CLOSING){
+  case ZFAIL_CLOSING:
+    jam();
+    return;
+  case ZSTARTING:
+    c_start.reset();
+    // Fall-through
+  default:
     jam();
-    //-------------------------------------------------------------------------
-    // The API was active and has now failed. We need to initiate API failure
-    // handling. If the API had already failed then we can ignore this
-    // discovery.
-    //-------------------------------------------------------------------------
+    /*---------------------------------------------------------------------*/
+    // The other node is still not in the cluster but disconnected. 
+    // We must restart communication in three seconds.
+    /*---------------------------------------------------------------------*/
+    failedNodePtr.p->failState = NORMAL;
     failedNodePtr.p->phase = ZFAIL_CLOSING;
-    
-    sendApiFailReq(signal, aFailedNode);
-    arbitRec.code = ArbitCode::ApiFail;
-    handleArbitApiFail(signal, aFailedNode);
+    setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
+
+    CloseComReqConf * const closeCom = 
+      (CloseComReqConf *)&signal->theData[0];
+
+    closeCom->xxxBlockRef = reference();
+    closeCom->failNo      = 0;
+    closeCom->noOfNodes   = 1;
+    NodeBitmask::clear(closeCom->theNodes);
+    NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
+    sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, 
+               CloseComReqConf::SignalLength, JBA);
   }//if
   return;
-}//Qmgr::node_failed()
+}
+
+void
+Qmgr::api_failed(Signal* signal, Uint32 nodeId)
+{
+  NodeRecPtr failedNodePtr;
+  /**------------------------------------------------------------------------
+   *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
+   *   DUE TO THIS.
+   *-----------------------------------------------------------------------*/
+  failedNodePtr.i = nodeId;
+  ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
+  
+  if (failedNodePtr.p->phase == ZFAIL_CLOSING)
+  {
+    /**
+     * Failure handling already in progress
+     */
+    jam();
+    return;
+  }
+
+  if (failedNodePtr.p->phase == ZAPI_ACTIVE)
+  {
+    jam();
+    sendApiFailReq(signal, nodeId);
+    arbitRec.code = ArbitCode::ApiFail;
+    handleArbitApiFail(signal, nodeId);
+  }
+  else
+  {
+    /**
+     * Always inform SUMA
+     */
+    jam();
+    signal->theData[0] = nodeId;
+    signal->theData[1] = QMGR_REF;
+    sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA);
+    failedNodePtr.p->failState = NORMAL;
+  }
+
+  failedNodePtr.p->phase = ZFAIL_CLOSING;
+  setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
+  setNodeInfo(failedNodePtr.i).m_version = 0;
+  recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
+  
+  CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
+  closeCom->xxxBlockRef = reference();
+  closeCom->failNo      = 0;
+  closeCom->noOfNodes   = 1;
+  NodeBitmask::clear(closeCom->theNodes);
+  NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
+  sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, 
+             CloseComReqConf::SignalLength, JBA);
+
+  if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
+  {
+    /**
+     * Allow MGM do reconnect "directly"
+     */
+    jam();
+    setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3;
+  }
+}
 
 /**--------------------------------------------------------------------------
  * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE 
Thread
bk commit into 5.1 tree (jonas:1.2140)jonas30 May