List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:January 30 2009 1:07pm
Subject:bzr commit into mysql-5.1-telco-6.4 branch (jonas:3240) Bug#42450
View as plain text  
#At file:///home/jonas/src/telco-6.4/

 3240 Jonas Oreland	2009-01-30
      ndb - bug#42450 - fix incorrect assumptions about NODE_FAILREP/INCL_NODEREQ and rewrite NF_COMPLETE handling in LocalProxy. Note: more work is needed cause testNodeRestart -n MNF fails consistently in mt-lqh
modified:
  storage/ndb/src/kernel/blocks/LocalProxy.cpp
  storage/ndb/src/kernel/blocks/LocalProxy.hpp

=== modified file 'storage/ndb/src/kernel/blocks/LocalProxy.cpp'
--- a/storage/ndb/src/kernel/blocks/LocalProxy.cpp	2008-12-03 19:49:40 +0000
+++ b/storage/ndb/src/kernel/blocks/LocalProxy.cpp	2009-01-30 13:07:48 +0000
@@ -543,34 +543,39 @@ LocalProxy::execREAD_NODESREF(Signal* si
 void
 LocalProxy::execNODE_FAILREP(Signal* signal)
 {
-  Ss_NODE_FAILREP& ss = ssSeize<Ss_NODE_FAILREP>(1);
+  Ss_NODE_FAILREP& ss = ssFindSeize<Ss_NODE_FAILREP>(1, 0);
   const NodeFailRep* req = (const NodeFailRep*)signal->getDataPtr();
   ss.m_req = *req;
   ndbrequire(signal->getLength() == NodeFailRep::SignalLength);
 
+  NdbNodeBitmask mask;
+  mask.assign(NdbNodeBitmask::Size, req->theNodes);
+
   // proxy itself
   NodePtr nodePtr;
   c_nodeList.first(nodePtr);
   ndbrequire(nodePtr.i != RNIL);
-  while (nodePtr.i != RNIL) {
-    if (NdbNodeBitmask::get(req->theNodes, nodePtr.p->m_nodeId)) {
+  while (nodePtr.i != RNIL)
+  {
+    if (NdbNodeBitmask::get(req->theNodes, nodePtr.p->m_nodeId))
+    {
       jam();
-      ndbrequire(nodePtr.p->m_alive);
       nodePtr.p->m_alive = false;
     }
     c_nodeList.next(nodePtr);
   }
 
   // from each worker wait for ack for each failed node
-  Uint32 i;
-  for (i = 0; i < c_workers; i++) {
+  for (Uint32 i = 0; i < c_workers; i++)
+  {
     jam();
     NdbNodeBitmask& waitFor = ss.m_waitFor[i];
-    waitFor.assign(NdbNodeBitmask::Size, req->theNodes);
+    waitFor.bitOR(mask);
   }
 
   sendREQ(signal, ss);
-  if (ss.noReply(number())) {
+  if (ss.noReply(number()))
+  {
     jam();
     ssRelease<Ss_NODE_FAILREP>(ss);
   }
@@ -600,46 +605,40 @@ LocalProxy::sendNF_COMPLETEREP(Signal* s
 {
   Ss_NODE_FAILREP& ss = ssFind<Ss_NODE_FAILREP>(ssId);
 
-  {
-    const NFCompleteRep* conf = (const NFCompleteRep*)signal->getDataPtr();
+  const NFCompleteRep* conf = (const NFCompleteRep*)signal->getDataPtr();
+  Uint32 node = conf->failedNodeId;
 
+  {
     NdbNodeBitmask& waitFor = ss.m_waitFor[ss.m_worker];
-    ndbrequire(waitFor.get(conf->failedNodeId));
-    waitFor.clear(conf->failedNodeId);
-    
-    if (!waitFor.isclear()) {
-      // worker has not replied for all failed nodes
-      skipConf(ss);
-    }
+    ndbrequire(waitFor.get(node));
+    waitFor.clear(node);
   }
 
-  if (!lastReply(ss))
-    return;
-
-  NdbNodeBitmask theNodes;
-  theNodes.assign(NdbNodeBitmask::Size, ss.m_req.theNodes);
-
-  NodePtr nodePtr;
-  c_nodeList.first(nodePtr);
-  ndbrequire(nodePtr.i != RNIL);
-  while (nodePtr.i != RNIL) {
-    if (theNodes.get(nodePtr.p->m_nodeId)) {
+  for (Uint32 i = 0; i < c_workers; i++)
+  {
+    jam();
+    NdbNodeBitmask& waitFor = ss.m_waitFor[i];
+    if (waitFor.get(node))
+    {
       jam();
-      NFCompleteRep* conf = (NFCompleteRep*)signal->getDataPtrSend();
-      conf->blockNo = number();
-      conf->nodeId = getOwnNodeId();
-      conf->failedNodeId = nodePtr.p->m_nodeId;
-      conf->unused = 0;
-      conf->from = __LINE__;
-
-      sendSignal(DBDIH_REF, GSN_NF_COMPLETEREP,
-                 signal, NFCompleteRep::SignalLength, JBB);
+      /**
+       * Not all threads are done with this failed node
+       */
+      return;
     }
-
-    c_nodeList.next(nodePtr);
   }
 
-  ssRelease<Ss_NODE_FAILREP>(ssId);
+  {
+    NFCompleteRep* conf = (NFCompleteRep*)signal->getDataPtrSend();
+    conf->blockNo = number();
+    conf->nodeId = getOwnNodeId();
+    conf->failedNodeId = node;
+    conf->unused = 0;
+    conf->from = __LINE__;
+
+    sendSignal(DBDIH_REF, GSN_NF_COMPLETEREP,
+               signal, NFCompleteRep::SignalLength, JBB);
+  }
 }
 
 // GSN_INCL_NODEREQ

=== modified file 'storage/ndb/src/kernel/blocks/LocalProxy.hpp'
--- a/storage/ndb/src/kernel/blocks/LocalProxy.hpp	2008-12-03 19:49:40 +0000
+++ b/storage/ndb/src/kernel/blocks/LocalProxy.hpp	2009-01-30 13:07:48 +0000
@@ -240,7 +240,6 @@ protected:
 
   template <class Ss>
   Ss& ssFind(Uint32 ssId) {
-    SsPool<Ss>& sp = Ss::pool(this);
     ndbrequire(ssId != 0);
     Ss* ssptr = ssSearch<Ss>(ssId);
     ndbrequire(ssptr != 0);
@@ -254,14 +253,15 @@ protected:
    */
   template <class Ss>
   Ss& ssFindSeize(Uint32 ssId, bool* found) {
-    SsPool<Ss>& sp = Ss::pool(this);
     ndbrequire(ssId != 0);
     Ss* ssptr = ssSearch<Ss>(ssId);
     if (ssptr != 0) {
-      *found = true;
+      if (found)
+        *found = true;
       return *ssptr;
     }
-    *found = false;
+    if (found)
+      *found = false;
     return ssSeize<Ss>(ssId);
   }
 

Thread
bzr commit into mysql-5.1-telco-6.4 branch (jonas:3240) Bug#42450Jonas Oreland30 Jan