#At file:///home/jonas/src/telco-6.4/
3240 Jonas Oreland 2009-01-30
ndb - bug#42450 - fix incorrect assumptions about NODE_FAILREP/INCL_NODEREQ and rewrite NF_COMPLETE handling in LocalProxy. Note: more work is needed cause testNodeRestart -n MNF fails consistently in mt-lqh
modified:
storage/ndb/src/kernel/blocks/LocalProxy.cpp
storage/ndb/src/kernel/blocks/LocalProxy.hpp
=== modified file 'storage/ndb/src/kernel/blocks/LocalProxy.cpp'
--- a/storage/ndb/src/kernel/blocks/LocalProxy.cpp 2008-12-03 19:49:40 +0000
+++ b/storage/ndb/src/kernel/blocks/LocalProxy.cpp 2009-01-30 13:07:48 +0000
@@ -543,34 +543,39 @@ LocalProxy::execREAD_NODESREF(Signal* si
void
LocalProxy::execNODE_FAILREP(Signal* signal)
{
- Ss_NODE_FAILREP& ss = ssSeize<Ss_NODE_FAILREP>(1);
+ Ss_NODE_FAILREP& ss = ssFindSeize<Ss_NODE_FAILREP>(1, 0);
const NodeFailRep* req = (const NodeFailRep*)signal->getDataPtr();
ss.m_req = *req;
ndbrequire(signal->getLength() == NodeFailRep::SignalLength);
+ NdbNodeBitmask mask;
+ mask.assign(NdbNodeBitmask::Size, req->theNodes);
+
// proxy itself
NodePtr nodePtr;
c_nodeList.first(nodePtr);
ndbrequire(nodePtr.i != RNIL);
- while (nodePtr.i != RNIL) {
- if (NdbNodeBitmask::get(req->theNodes, nodePtr.p->m_nodeId)) {
+ while (nodePtr.i != RNIL)
+ {
+ if (NdbNodeBitmask::get(req->theNodes, nodePtr.p->m_nodeId))
+ {
jam();
- ndbrequire(nodePtr.p->m_alive);
nodePtr.p->m_alive = false;
}
c_nodeList.next(nodePtr);
}
// from each worker wait for ack for each failed node
- Uint32 i;
- for (i = 0; i < c_workers; i++) {
+ for (Uint32 i = 0; i < c_workers; i++)
+ {
jam();
NdbNodeBitmask& waitFor = ss.m_waitFor[i];
- waitFor.assign(NdbNodeBitmask::Size, req->theNodes);
+ waitFor.bitOR(mask);
}
sendREQ(signal, ss);
- if (ss.noReply(number())) {
+ if (ss.noReply(number()))
+ {
jam();
ssRelease<Ss_NODE_FAILREP>(ss);
}
@@ -600,46 +605,40 @@ LocalProxy::sendNF_COMPLETEREP(Signal* s
{
Ss_NODE_FAILREP& ss = ssFind<Ss_NODE_FAILREP>(ssId);
- {
- const NFCompleteRep* conf = (const NFCompleteRep*)signal->getDataPtr();
+ const NFCompleteRep* conf = (const NFCompleteRep*)signal->getDataPtr();
+ Uint32 node = conf->failedNodeId;
+ {
NdbNodeBitmask& waitFor = ss.m_waitFor[ss.m_worker];
- ndbrequire(waitFor.get(conf->failedNodeId));
- waitFor.clear(conf->failedNodeId);
-
- if (!waitFor.isclear()) {
- // worker has not replied for all failed nodes
- skipConf(ss);
- }
+ ndbrequire(waitFor.get(node));
+ waitFor.clear(node);
}
- if (!lastReply(ss))
- return;
-
- NdbNodeBitmask theNodes;
- theNodes.assign(NdbNodeBitmask::Size, ss.m_req.theNodes);
-
- NodePtr nodePtr;
- c_nodeList.first(nodePtr);
- ndbrequire(nodePtr.i != RNIL);
- while (nodePtr.i != RNIL) {
- if (theNodes.get(nodePtr.p->m_nodeId)) {
+ for (Uint32 i = 0; i < c_workers; i++)
+ {
+ jam();
+ NdbNodeBitmask& waitFor = ss.m_waitFor[i];
+ if (waitFor.get(node))
+ {
jam();
- NFCompleteRep* conf = (NFCompleteRep*)signal->getDataPtrSend();
- conf->blockNo = number();
- conf->nodeId = getOwnNodeId();
- conf->failedNodeId = nodePtr.p->m_nodeId;
- conf->unused = 0;
- conf->from = __LINE__;
-
- sendSignal(DBDIH_REF, GSN_NF_COMPLETEREP,
- signal, NFCompleteRep::SignalLength, JBB);
+ /**
+ * Not all threads are done with this failed node
+ */
+ return;
}
-
- c_nodeList.next(nodePtr);
}
- ssRelease<Ss_NODE_FAILREP>(ssId);
+ {
+ NFCompleteRep* conf = (NFCompleteRep*)signal->getDataPtrSend();
+ conf->blockNo = number();
+ conf->nodeId = getOwnNodeId();
+ conf->failedNodeId = node;
+ conf->unused = 0;
+ conf->from = __LINE__;
+
+ sendSignal(DBDIH_REF, GSN_NF_COMPLETEREP,
+ signal, NFCompleteRep::SignalLength, JBB);
+ }
}
// GSN_INCL_NODEREQ
=== modified file 'storage/ndb/src/kernel/blocks/LocalProxy.hpp'
--- a/storage/ndb/src/kernel/blocks/LocalProxy.hpp 2008-12-03 19:49:40 +0000
+++ b/storage/ndb/src/kernel/blocks/LocalProxy.hpp 2009-01-30 13:07:48 +0000
@@ -240,7 +240,6 @@ protected:
template <class Ss>
Ss& ssFind(Uint32 ssId) {
- SsPool<Ss>& sp = Ss::pool(this);
ndbrequire(ssId != 0);
Ss* ssptr = ssSearch<Ss>(ssId);
ndbrequire(ssptr != 0);
@@ -254,14 +253,15 @@ protected:
*/
template <class Ss>
Ss& ssFindSeize(Uint32 ssId, bool* found) {
- SsPool<Ss>& sp = Ss::pool(this);
ndbrequire(ssId != 0);
Ss* ssptr = ssSearch<Ss>(ssId);
if (ssptr != 0) {
- *found = true;
+ if (found)
+ *found = true;
return *ssptr;
}
- *found = false;
+ if (found)
+ *found = false;
return ssSeize<Ss>(ssId);
}
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.4 branch (jonas:3240) Bug#42450 | Jonas Oreland | 30 Jan |