#At file:///home/jonas/src/telco-6.3/ based on revid:jonas@stripped
3055 Jonas Oreland 2009-09-21
ndb - bug#37688 - fix race condition between TAKE_OVERTCCONF and NODE_FAILREP by introducing a generic route-facility
added:
storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp
storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp
modified:
storage/ndb/include/kernel/GlobalSignalNumbers.h
storage/ndb/include/kernel/signaldata/SignalData.hpp
storage/ndb/src/common/debugger/signaldata/Makefile.am
storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp
storage/ndb/src/common/debugger/signaldata/SignalNames.cpp
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
storage/ndb/src/kernel/vm/SimulatedBlock.cpp
storage/ndb/src/kernel/vm/SimulatedBlock.hpp
=== modified file 'storage/ndb/include/kernel/GlobalSignalNumbers.h'
--- a/storage/ndb/include/kernel/GlobalSignalNumbers.h 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/include/kernel/GlobalSignalNumbers.h 2009-09-21 08:26:49 +0000
@@ -388,7 +388,8 @@ extern const GlobalSignalNumber NO_OF_SI
#define GSN_UPGRADE_PROTOCOL_ORD 285
/* 286 not unused */
-/* 287 unused */
+#define GSN_LOCAL_ROUTE_ORD 287 /* local */
+
#define GSN_GETGCICONF 288
#define GSN_GETGCIREQ 289
#define GSN_HOT_SPAREREP 290
=== added file 'storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp'
--- a/storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp 1970-01-01 00:00:00 +0000
+++ b/storage/ndb/include/kernel/signaldata/LocalRouteOrd.hpp 2009-09-21 08:26:49 +0000
@@ -0,0 +1,34 @@
+/*
+ Copyright (C) 2003 MySQL AB
+ All rights reserved. Use is subject to license terms.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef LOCAL_ROUTE_ORD_HPP
+#define LOCAL_ROUTE_ORD_HPP
+
+#include "SignalData.hpp"
+
+struct LocalRouteOrd
+{
+ STATIC_CONST( StaticLen = 3 );
+
+ Uint32 cnt; // 16-bit path, 16-bit destination
+ Uint32 gsn; // Final gsn
+ Uint32 prio;// Final prio
+ Uint32 path[1];
+};
+
+#endif
=== modified file 'storage/ndb/include/kernel/signaldata/SignalData.hpp'
--- a/storage/ndb/include/kernel/signaldata/SignalData.hpp 2009-05-26 18:53:34 +0000
+++ b/storage/ndb/include/kernel/signaldata/SignalData.hpp 2009-09-21 08:26:49 +0000
@@ -226,4 +226,6 @@ GSN_PRINT_SIGNATURE(printCONTINUEB_NDBFS
GSN_PRINT_SIGNATURE(printCONTINUEB_DBDIH);
GSN_PRINT_SIGNATURE(printSTART_FRAG_REQ);
+GSN_PRINT_SIGNATURE(printLOCAL_ROUTE_ORD);
+
#endif
=== added file 'storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp 1970-01-01 00:00:00 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/LocalRouteOrd.cpp 2009-09-21 08:26:49 +0000
@@ -0,0 +1,63 @@
+/*
+ Copyright (C) 2003 MySQL AB
+ All rights reserved. Use is subject to license terms.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <signaldata/LocalRouteOrd.hpp>
+#include <DebuggerNames.hpp>
+#include <RefConvert.hpp>
+
+bool
+printLOCAL_ROUTE_ORD(FILE* output,
+ const Uint32* theData, Uint32 len,
+ Uint16 rbn)
+{
+ const LocalRouteOrd * sig = (const LocalRouteOrd*)theData;
+ Uint32 pathcnt = sig->cnt >> 16;
+ Uint32 dstcnt = sig->cnt & 0xFFFF;
+
+ fprintf(output, " pathcnt: %u dstcnt: %u\n", pathcnt, dstcnt);
+ fprintf(output, " gsn: %u(%s) prio: %u\n",
+ sig->gsn, getSignalName(sig->gsn), sig->prio);
+
+ const Uint32 * ptr = sig->path;
+ fprintf(output, " path:");
+ for (Uint32 i = 0; i<pathcnt; i++)
+ {
+ fprintf(output, " [ hop: 0x%x(%s) prio: %u ]",
+ ptr[0], getBlockName(refToMain(ptr[0])), ptr[1]);
+ ptr += 2;
+ }
+
+ fprintf(output, "\n dst:");
+ for (Uint32 i = 0; i<dstcnt; i++)
+ {
+ fprintf(output, " [ 0x%x(%s) ]",
+ ptr[0], getBlockName(refToMain(ptr[0])));
+ }
+ fprintf(output, "\n");
+
+ if (ptr < (theData + len))
+ {
+ fprintf(output, " data:");
+ while (ptr < (theData + len))
+ {
+ fprintf(output, " %.8x", * ptr++);
+ }
+ fprintf(output, "\n");
+ }
+ return true;
+}
=== modified file 'storage/ndb/src/common/debugger/signaldata/Makefile.am'
--- a/storage/ndb/src/common/debugger/signaldata/Makefile.am 2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/Makefile.am 2009-09-21 08:26:49 +0000
@@ -39,7 +39,8 @@ libsignaldataprint_la_SOURCES = \
SumaImpl.cpp NdbSttor.cpp CreateFragmentation.cpp \
UtilLock.cpp TuxMaint.cpp AccLock.cpp \
LqhTrans.cpp ReadNodesConf.cpp CntrStart.cpp \
- ScanFrag.cpp
+ ScanFrag.cpp \
+ LocalRouteOrd.cpp
include $(top_srcdir)/storage/ndb/config/common.mk.am
include $(top_srcdir)/storage/ndb/config/type_ndbapi.mk.am
=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp 2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalDataPrint.cpp 2009-09-21 08:26:49 +0000
@@ -201,6 +201,8 @@ SignalDataPrintFunctions[] = {
,{ GSN_LQH_TRANSCONF, printLQH_TRANSCONF }
,{ GSN_SCAN_FRAGREQ, printSCAN_FRAGREQ }
,{ GSN_START_FRAGREQ, printSTART_FRAG_REQ }
+
+ ,{ GSN_LOCAL_ROUTE_ORD, printLOCAL_ROUTE_ORD }
,{ 0, 0 }
};
=== modified file 'storage/ndb/src/common/debugger/signaldata/SignalNames.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/SignalNames.cpp 2009-09-21 08:26:49 +0000
@@ -655,5 +655,7 @@ const GsnName SignalNames [] = {
,{ GSN_START_TOREF, "START_TOREF" }
,{ GSN_END_TOREF, "END_TOREF" }
,{ GSN_START_PERMREP, "START_PERMREP" }
+
+ ,{ GSN_LOCAL_ROUTE_ORD, "LOCAL_ROUTE_ORD" }
};
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2009-09-14 12:51:36 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2009-09-21 08:26:49 +0000
@@ -7533,11 +7533,25 @@ Dbtc::checkScanFragList(Signal* signal,
void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
{
jamEntry();
+
+ if (!checkNodeFailSequence(signal))
+ {
+ jam();
+ return;
+ }
+
tfailedNodeId = signal->theData[0];
hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord);
- if (signal->getSendersBlockRef() != reference())
+ Uint32 senderRef = signal->theData[1];
+ if (signal->getLength() < 2)
+ {
+ jam();
+ senderRef = 0; // currently only used to see if it's from self
+ }
+
+ if (senderRef != reference())
{
jam();
@@ -7854,7 +7868,8 @@ void Dbtc::completeTransAtTakeOverDoLast
/*------------------------------------------------------------*/
NodeReceiverGroup rg(DBTC, c_alive_nodes);
signal->theData[0] = tcNodeFailptr.p->takeOverNode;
- sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 1, JBB);
+ signal->theData[1] = reference();
+ sendSignal(rg, GSN_TAKE_OVERTCCONF, signal, 2, JBB);
if (tcNodeFailptr.p->queueIndex > 0) {
jam();
=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-09-01 11:07:11 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp 2009-09-21 08:26:49 +0000
@@ -3801,7 +3801,7 @@ Qmgr::sendCommitFailReq(Signal* signal)
ptrAss(nodePtr, nodeRec);
#ifdef ERROR_INSERT
- if (ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
+ if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
{
ndbout_c("skipping node %d", c_error_insert_extra);
CLEAR_ERROR_INSERT_VALUE;
@@ -3918,6 +3918,9 @@ void Qmgr::execCOMMIT_FAILREQ(Signal* si
{
NodeRecPtr nodePtr;
jamEntry();
+
+ CRASH_INSERTION(935);
+
BlockReference Tblockref = signal->theData[0];
UintR TfailureNr = signal->theData[1];
if (Tblockref != cpdistref) {
=== modified file 'storage/ndb/src/kernel/vm/SimulatedBlock.cpp'
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.cpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.cpp 2009-09-21 08:26:49 +0000
@@ -34,6 +34,7 @@
#include <signaldata/NodeStateSignalData.hpp>
#include <signaldata/FsRef.hpp>
#include <signaldata/SignalDroppedRep.hpp>
+#include <signaldata/LocalRouteOrd.hpp>
#include <DebuggerNames.hpp>
#include "LongSignal.hpp"
@@ -151,6 +152,7 @@ SimulatedBlock::installSimulatedBlockFun
a[GSN_FSAPPENDREF] = &SimulatedBlock::execFSAPPENDREF;
a[GSN_NODE_START_REP] = &SimulatedBlock::execNODE_START_REP;
a[GSN_API_START_REP] = &SimulatedBlock::execAPI_START_REP;
+ a[GSN_LOCAL_ROUTE_ORD] = &SimulatedBlock::execLOCAL_ROUTE_ORD;
}
void
@@ -2377,3 +2379,250 @@ SimulatedBlock::create_distr_key(Uint32
}
CArray<KeyDescriptor> g_key_descriptor_pool;
+
+void
+SimulatedBlock::sendRoutedSignal(RoutePath path[], Uint32 pathcnt,
+ Uint32 dst[],
+ Uint32 dstcnt,
+ Uint32 gsn,
+ Signal * signal,
+ Uint32 sigLen,
+ JobBufferLevel prio,
+ SectionHandle * userhandle)
+{
+ ndbrequire(pathcnt > 0); // don't support (now) directly multi-cast
+ pathcnt--; // first hop is made from here
+
+
+ Uint32 len = LocalRouteOrd::StaticLen + (2 * pathcnt) + dstcnt;
+ ndbrequire(len <= 25);
+
+ SectionHandle handle(this, signal);
+ if (userhandle)
+ {
+ ljam();
+ handle.m_cnt = userhandle->m_cnt;
+ for (Uint32 i = 0; i<handle.m_cnt; i++)
+ handle.m_ptr[i] = userhandle->m_ptr[i];
+ userhandle->m_cnt = 0;
+ }
+
+ if (len + sigLen > 25)
+ {
+ ljam();
+
+ /**
+ * we need to store theData in a section
+ */
+ ndbrequire(handle.m_cnt < 3);
+ handle.m_ptr[2] = handle.m_ptr[1];
+ handle.m_ptr[1] = handle.m_ptr[0];
+ Ptr<SectionSegment> tmp;
+ ndbrequire(import(tmp, signal->theData, sigLen));
+ handle.m_ptr[0].p = tmp.p;
+ handle.m_ptr[0].i = tmp.i;
+ handle.m_ptr[0].sz = sigLen;
+ handle.m_cnt ++;
+ }
+ else
+ {
+ ljam();
+ memmove(signal->theData + len, signal->theData, 4 * sigLen);
+ len += sigLen;
+ }
+
+ LocalRouteOrd * ord = (LocalRouteOrd*)signal->getDataPtrSend();
+ ord->cnt = (pathcnt << 16) | (dstcnt);
+ ord->gsn = gsn;
+ ord->prio = Uint32(prio);
+
+ Uint32 * dstptr = ord->path;
+ for (Uint32 i = 1; i <= pathcnt; i++)
+ {
+ ndbrequire(refToNode(path[i].ref) == 0 ||
+ refToNode(path[i].ref) == getOwnNodeId());
+
+ * dstptr++ = path[i].ref;
+ * dstptr++ = Uint32(path[i].prio);
+ }
+
+ for (Uint32 i = 0; i<dstcnt; i++)
+ {
+ ndbrequire(refToNode(dst[i]) == 0 ||
+ refToNode(dst[i]) == getOwnNodeId());
+
+ * dstptr++ = dst[i];
+ }
+
+ sendSignal(path[0].ref, GSN_LOCAL_ROUTE_ORD, signal, len,
+ path[0].prio, &handle);
+}
+
+void
+SimulatedBlock::execLOCAL_ROUTE_ORD(Signal* signal)
+{
+ ljamEntry();
+
+ if (!assembleFragments(signal))
+ {
+ ljam();
+ return;
+ }
+
+ if (ERROR_INSERTED(1001))
+ {
+ /**
+ * This NDBCNTR error code 1001
+ */
+ ljam();
+ SectionHandle handle(this, signal);
+ sendSignalWithDelay(reference(), GSN_LOCAL_ROUTE_ORD, signal, 200,
+ signal->getLength(), &handle);
+ return;
+ }
+
+ LocalRouteOrd* ord = (LocalRouteOrd*)signal->getDataPtr();
+ Uint32 pathcnt = ord->cnt >> 16;
+ Uint32 dstcnt = ord->cnt & 0xFFFF;
+ Uint32 sigLen = signal->getLength();
+
+ if (pathcnt == 0)
+ {
+ /**
+ * Send to final destination(s);
+ */
+ ljam();
+ Uint32 gsn = ord->gsn;
+ Uint32 prio = ord->prio;
+ memcpy(signal->theData+25, ord->path, 4*dstcnt);
+ SectionHandle handle(this, signal);
+ if (sigLen > LocalRouteOrd::StaticLen + dstcnt)
+ {
+ ljam();
+ /**
+ * Data is at end of this...
+ */
+ memmove(signal->theData,
+ signal->theData + LocalRouteOrd::StaticLen + dstcnt,
+ 4 * (sigLen - (LocalRouteOrd::StaticLen + dstcnt)));
+ sigLen = sigLen - (LocalRouteOrd::StaticLen + dstcnt);
+ }
+ else
+ {
+ ljam();
+ /**
+ * Put section 0 in signal->theData
+ */
+ sigLen = handle.m_ptr[0].sz;
+ ndbrequire(sigLen <= 25);
+ copy(signal->theData, handle.m_ptr[0]);
+ release(handle.m_ptr[0]);
+
+ for (Uint32 i = 0; i < handle.m_cnt - 1; i++)
+ handle.m_ptr[i] = handle.m_ptr[i+1];
+ handle.m_cnt--;
+ }
+
+ /*
+ * The extra if-statement is as sendSignalNoRelease will copy sections
+ * which is not necessary is only sending to one destination
+ */
+ if (dstcnt > 1)
+ {
+ jam();
+ /** 6.3 has not yet impl. sendSignalNoRelease */
+#if NDB_VERRSION_D >= NDB_MAKE_VERSION(6,4,0)
+ for (Uint32 i = 0; i<dstcnt; i++)
+ {
+ ljam();
+ sendSignalNoRelease(signal->theData[25+i], gsn, signal, sigLen,
+ JobBufferLevel(prio), &handle);
+ }
+ releaseSections(handle);
+#else
+ if (handle.m_cnt == 0)
+ {
+ ljam();
+ for (Uint32 i = 0; i<dstcnt; i++)
+ {
+ ljam();
+ sendSignal(signal->theData[25+i], gsn, signal, sigLen,
+ JobBufferLevel(prio));
+ }
+ }
+ else
+ {
+ /**
+ * This path is (out of laziness?) not implemented in 6.3
+ */
+ ndbrequire(false);
+ }
+#endif
+ }
+ else
+ {
+ jam();
+ sendSignal(signal->theData[25+0], gsn, signal, sigLen,
+ JobBufferLevel(prio), &handle);
+ }
+ }
+ else
+ {
+ /**
+ * Reroute
+ */
+ ljam();
+ SectionHandle handle(this, signal);
+ Uint32 ref = ord->path[0];
+ Uint32 prio = ord->path[1];
+ Uint32 len = sigLen - 2;
+ ord->cnt = ((pathcnt - 1) << 16) | dstcnt;
+ memmove(ord->path, ord->path+2, 4 * (len - LocalRouteOrd::StaticLen));
+ sendSignal(ref, GSN_LOCAL_ROUTE_ORD, signal, len,
+ JobBufferLevel(prio), &handle);
+ }
+}
+
+
+bool
+SimulatedBlock::checkNodeFailSequence(Signal* signal)
+{
+ Uint32 ref = signal->getSendersBlockRef();
+
+ /**
+ * Make sure that a signal being part of node-failure handling
+ * from a remote node, does not get to us before we got the NODE_FAILREP
+ * (this to avoid tricky state handling)
+ *
+ * To ensure this, we send the signal via QMGR (GSN_COMMIT_FAILREQ)
+ * and NDBCNTR (which sends NODE_FAILREP)
+ *
+ * The extra time should be negilable
+ *
+ * Note, make an exception for signals sent by our self
+ * as they are only sent as a consequence of NODE_FAILREP
+ */
+ if (ref == reference() ||
+ (refToNode(ref) == getOwnNodeId() &&
+ refToMain(ref) == NDBCNTR))
+ {
+ ljam();
+ return true;
+ }
+
+ RoutePath path[2];
+ path[0].ref = QMGR_REF;
+ path[0].prio = JBB;
+ path[1].ref = NDBCNTR_REF;
+ path[1].prio = JBB;
+
+ Uint32 dst[1];
+ dst[0] = reference();
+
+ SectionHandle handle(this, signal);
+ Uint32 gsn = signal->header.theVerId_signalNumber;
+ Uint32 len = signal->getLength();
+
+ sendRoutedSignal(path, 2, dst, 1, gsn, signal, len, JBB, &handle);
+ return false;
+}
=== modified file 'storage/ndb/src/kernel/vm/SimulatedBlock.hpp'
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp 2009-05-27 12:11:46 +0000
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp 2009-09-21 08:26:49 +0000
@@ -218,6 +218,33 @@ protected:
void handle_lingering_sections_after_execute(Signal*) const;
void handle_lingering_sections_after_execute(SectionHandle*) const;
+ /**
+ * Send routed signals (ONLY LOCALLY)
+ *
+ * NOTE: Only localhost is allowed!
+ */
+ struct RoutePath
+ {
+ Uint32 ref;
+ JobBufferLevel prio;
+ };
+ void sendRoutedSignal(RoutePath path[],
+ Uint32 pathcnt, // #hops
+ Uint32 dst[], // Final destination(s)
+ Uint32 dstcnt, // #final destination(s)
+ Uint32 gsn, // Final GSN
+ Signal*,
+ Uint32 len,
+ JobBufferLevel prio, // Final prio
+ SectionHandle * handle = 0);
+
+
+ /**
+ * Check that signal sent from remote node
+ * is guaranteed to be correctly serialized wrt to NODE_FAILREP
+ */
+ bool checkNodeFailSequence(Signal*);
+
/**********************************************************
* Fragmented signals
*/
@@ -485,6 +512,8 @@ protected:
void execCONTINUE_FRAGMENTED(Signal* signal);
void execAPI_START_REP(Signal* signal);
void execNODE_START_REP(Signal* signal);
+
+ void execLOCAL_ROUTE_ORD(Signal*);
private:
/**
* Node state
Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20090921082649-rgbc1jftgr8r114l.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.3 branch (jonas:3055) Bug#37688 | Jonas Oreland | 21 Sep |