List:Commits« Previous MessageNext Message »
From:jonas Date:April 25 2008 6:30am
Subject:bk commit into 5.1 tree (jonas:1.2203) BUG#36245
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2008-04-25 08:30:39+02:00, jonas@stripped +4 -0
  ndb - bug#36245
    NF_COMPLETEREP can get lost on cascading master failure
    causing *big* pain and misery

  storage/ndb/src/kernel/blocks/ERROR_codes.txt@stripped, 2008-04-25 08:30:37+02:00, jonas@stripped +2 -2
    add new error code

  storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp@stripped, 2008-04-25 08:30:37+02:00, jonas@stripped +3 -3
    new error insert

  storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp@stripped, 2008-04-25 08:30:37+02:00, jonas@stripped +39 -22
    fix bug by sending NF_COMPLETEREP from all nodes

  storage/ndb/src/ndbapi/ClusterMgr.cpp@stripped, 2008-04-25 08:30:38+02:00, jonas@stripped +5 -2
    only signal NF_COMPLETEREP once to TransporterFacade

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/drop6

--- 1.32/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2008-04-25 08:30:41 +02:00
+++ 1.33/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2008-04-25 08:30:41 +02:00
@@ -1,4 +1,4 @@
-Next QMGR 1
+Next QMGR 937
 Next NDBCNTR 1002
 Next NDBFS 2000
 Next DBACC 3002
@@ -6,7 +6,7 @@
 Next DBLQH 5051
 Next DBDICT 6007
 Next DBDIH 7211
-Next DBTC 8063
+Next DBTC 8064
 Next CMVMI 9000
 Next BACKUP 10022
 Next DBUTIL 11002

--- 1.112/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2008-04-25 08:30:41 +02:00
+++ 1.113/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2008-04-25 08:30:41 +02:00
@@ -2713,6 +2713,8 @@
   if (seizeCacheRecord(signal) != 0) {
     return;
   }//if
+
+  CRASH_INSERTION(8063);
   
   TcConnectRecord * const regTcPtr = tcConnectptr.p;
   CacheRecord * const regCachePtr = cachePtr.p;
@@ -4571,9 +4573,7 @@
     CLEAR_ERROR_INSERT_VALUE;
     return;
   }//if
-  if (ERROR_INSERTED(8030)) {
-    systemErrorLab(signal, __LINE__);
-  }//if
+  CRASH_INSERTION(8030);
   if (ERROR_INSERTED(8025)) {
     SET_ERROR_INSERT_VALUE(8026);
     return;

--- 1.43/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2008-04-25 08:30:41 +02:00
+++ 1.44/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2008-04-25 08:30:41 +02:00
@@ -2605,28 +2605,37 @@
     progError(__LINE__, 0, buf);
     systemErrorLab(signal, __LINE__);
   }//if
-  if (cpresident == getOwnNodeId()) {
+
+  if (cpresident == getOwnNodeId()) 
+  {
     jam();
-    /** 
-     * Prepare a NFCompleteRep and send to all connected API's
-     * They can then abort all transaction waiting for response from 
-     * the failed node
-     */
-    NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
-    nfComp->blockNo = QMGR_REF;
-    nfComp->nodeId = getOwnNodeId();
-    nfComp->failedNodeId = failedNodePtr.i;
+    
+    CRASH_INSERTION(936);
+  }
 
-    for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
+  /** 
+   * Prepare a NFCompleteRep and send to all connected API's
+   * They can then abort all transaction waiting for response from 
+   * the failed node
+   *
+   * NOTE: This is sent from all nodes, as otherwise we would need
+   *       take-over if cpresident dies befor sending this
+   */
+  NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
+  nfComp->blockNo = QMGR_REF;
+  nfComp->nodeId = getOwnNodeId();
+  nfComp->failedNodeId = failedNodePtr.i;
+  
+  for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) 
+  {
+    jam();
+    ptrAss(nodePtr, nodeRec);
+    if (nodePtr.p->phase == ZAPI_ACTIVE){
       jam();
-      ptrAss(nodePtr, nodeRec);
-      if (nodePtr.p->phase == ZAPI_ACTIVE){
-        jam();
-        sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal, 
-                   NFCompleteRep::SignalLength, JBA);
-      }//if
-    }//for
-  }
+      sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal, 
+                 NFCompleteRep::SignalLength, JBA);
+    }//if
+  }//for
   return;
 }//Qmgr::execNDB_FAILCONF()
 
@@ -3651,9 +3660,17 @@
       jam();
       NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
     }//if	
-    sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal, 
-	       NodeFailRep::SignalLength, JBB);
-
+    
+    if (ERROR_INSERTED(936))
+    {
+      sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal, 
+                          200, NodeFailRep::SignalLength);
+    }
+    else
+    {
+      sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal, 
+                 NodeFailRep::SignalLength, JBB);
+    }
     guard0 = cnoCommitFailedNodes - 1;
     arrGuard(guard0, MAX_NDB_NODES);
     /**--------------------------------------------------------------------

--- 1.32/storage/ndb/src/ndbapi/ClusterMgr.cpp	2008-04-25 08:30:41 +02:00
+++ 1.33/storage/ndb/src/ndbapi/ClusterMgr.cpp	2008-04-25 08:30:41 +02:00
@@ -417,8 +417,11 @@
   const NodeId nodeId = nfComp->failedNodeId;
   assert(nodeId > 0 && nodeId < MAX_NDB_NODES);
   
-  theFacade.ReportNodeFailureComplete(nodeId);
-  theNodes[nodeId].nfCompleteRep = true;
+  if (theNodes[nodeId].nfCompleteRep == false)
+  {
+    theFacade.ReportNodeFailureComplete(nodeId);
+    theNodes[nodeId].nfCompleteRep = true;
+  }
 }
 
 void
Thread
bk commit into 5.1 tree (jonas:1.2203) BUG#36245jonas25 Apr