3523 Frazer Clement 2013-01-23
Bug#14193623 CLUSTER CRASH - SURVIING NODE FAILURE IN HANDLING LQHKEYCONF FROM RECOVERING NOD
This patch adds some self-checking to packed signals, which hopefully
helps pinpoint the source of corrupt Packed Signals observed in this bug.
Note that this is *not* a fix for this bug, but effectively a
new assertion.
Both pre-send and post-receive checks are coded, but only the pre-send
checks are activated in non-debug builds, to save CPU.
modified:
storage/ndb/include/kernel/signaldata/PackedSignal.hpp
storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
3522 Martin Skold 2012-12-18
ndb - bump version to 6.3.52
modified:
configure.in
=== modified file 'storage/ndb/include/kernel/signaldata/PackedSignal.hpp'
--- a/storage/ndb/include/kernel/signaldata/PackedSignal.hpp 2011-06-30 15:55:35 +0000
+++ b/storage/ndb/include/kernel/signaldata/PackedSignal.hpp 2013-01-23 16:58:38 +0000
@@ -29,8 +29,21 @@
#define ZLQHKEYCONF 4
#define ZREMOVE_MARKER 5
+// Definitions for verification of packed signals
+static const int VERIFY_PACKED_SEND = 1;
+#ifdef VM_TRACE
+static const int VERIFY_PACKED_RECEIVE = 1;
+#else
+static const int VERIFY_PACKED_RECEIVE = 0;
+#endif
+static const int LQH_RECEIVE_TYPES = ((1 << ZCOMMIT) + (1 << ZCOMPLETE) + (1 << ZLQHKEYCONF) + (1 << ZREMOVE_MARKER));
+static const int TC_RECEIVE_TYPES = ((1 << ZCOMMITTED) + (1 << ZCOMPLETED) + (1 << ZLQHKEYCONF));
+
class PackedSignal {
+public:
+ static bool verify(const Uint32* data, Uint32 len, Uint32 typesExpected, Uint32 commitLen, Uint32 receiverBlockNo);
+private:
static Uint32 getSignalType(Uint32 data);
/**
=== modified file 'storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp 2011-06-30 15:55:35 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp 2013-01-23 16:58:38 +0000
@@ -106,3 +106,74 @@ printPACKED_SIGNAL(FILE * output, const
fprintf(output, "--------- End Packed Signals ----------\n");
return true;
}
+
+bool
+PackedSignal::verify(const Uint32* data, Uint32 len, Uint32 receiverBlockNo,
+ Uint32 typesExpected, Uint32 commitLen)
+{
+ Uint32 pos = 0;
+ bool bad = false;
+
+ if (unlikely(len > 25))
+ {
+ fprintf(stderr, "Bad PackedSignal length : %u\n", len);
+ bad = true;
+ }
+ else
+ {
+ while ((pos < len) && ! bad)
+ {
+ Uint32 sigType = data[pos] >> 28;
+ if (unlikely(((1 << sigType) & typesExpected) == 0))
+ {
+ fprintf(stderr, "Unexpected sigtype in packed signal : %u at pos %u. Expected : %u\n",
+ sigType, pos, typesExpected);
+ bad = true;
+ break;
+ }
+ switch (sigType)
+ {
+ case ZCOMMIT:
+ assert(commitLen > 0);
+ pos += commitLen;
+ break;
+ case ZCOMPLETE:
+ pos+= 3;
+ break;
+ case ZCOMMITTED:
+ pos+= 3;
+ break;
+ case ZCOMPLETED:
+ pos+= 3;
+ break;
+ case ZLQHKEYCONF:
+ pos+= LqhKeyConf::SignalLength;
+ break;
+ case ZREMOVE_MARKER:
+ pos+= 3;
+ break;
+ default :
+ fprintf(stderr, "Unrecognised signal type %u at pos %u\n",
+ sigType, pos);
+ bad = true;
+ break;
+ }
+ }
+
+ if (likely(pos == len))
+ {
+ /* Looks ok */
+ return true;
+ }
+
+ if (!bad)
+ {
+ fprintf(stderr, "Packed signal component length (%u) != total length (%u)\n",
+ pos, len);
+ }
+ }
+
+ printPACKED_SIGNAL(stderr, data, len, receiverBlockNo);
+
+ return false;
+}
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-09-12 13:24:49 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2013-01-23 16:58:38 +0000
@@ -2787,6 +2787,16 @@ void Dblqh::execPACKED_SIGNAL(Signal* si
ndbrequire(Tlength <= 25);
MEMCOPY_NO_WORDS(&TpackedData[0], &signal->theData[0], Tlength);
+
+ if (VERIFY_PACKED_RECEIVE)
+ {
+ ndbrequire(PackedSignal::verify(&TpackedData[0],
+ Tlength,
+ cownref,
+ LQH_RECEIVE_TYPES,
+ TcommitLen));
+ }
+
while (Tlength > Tstep) {
switch (TpackedData[Tstep] >> 28) {
case ZCOMMIT:
@@ -3115,6 +3125,14 @@ void Dblqh::sendPackedSignalLqh(Signal*
MEMCOPY_NO_WORDS(&signal->theData[0],
&ahostptr->packedWordsLqh[0],
noOfWords);
+ if (VERIFY_PACKED_SEND)
+ {
+ ndbrequire(PackedSignal::verify(&signal->theData[0],
+ noOfWords,
+ hostRef,
+ LQH_RECEIVE_TYPES,
+ 5)); /* Commit signal length */
+ }
sendSignal(hostRef, GSN_PACKED_SIGNAL, signal, noOfWords, JBB);
ahostptr->noOfPackedWordsLqh = 0;
}//Dblqh::sendPackedSignalLqh()
@@ -3126,6 +3144,14 @@ void Dblqh::sendPackedSignalTc(Signal* s
MEMCOPY_NO_WORDS(&signal->theData[0],
&ahostptr->packedWordsTc[0],
noOfWords);
+ if (VERIFY_PACKED_SEND)
+ {
+ ndbrequire(PackedSignal::verify(&signal->theData[0],
+ noOfWords,
+ hostRef,
+ TC_RECEIVE_TYPES,
+ 0)); /* Irrelevant for TC */
+ }
sendSignal(hostRef, GSN_PACKED_SIGNAL, signal, noOfWords, JBB);
ahostptr->noOfPackedWordsTc = 0;
}//Dblqh::sendPackedSignalTc()
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2012-11-20 14:29:14 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2013-01-23 16:58:38 +0000
@@ -3748,6 +3748,15 @@ void Dbtc::execPACKED_SIGNAL(Signal* sig
TpackDataPtr[2] = Tdata3;
TpackDataPtr[3] = Tdata4;
}//for
+
+ if (VERIFY_PACKED_RECEIVE)
+ {
+ ndbrequire(PackedSignal::verify(&TpackedData[0],
+ Tlength,
+ cownref,
+ TC_RECEIVE_TYPES,
+ 0)); /* Irrelevant */
+ }
while (Tlength > Tstep) {
TpackDataPtr = &TpackedData[Tstep];
@@ -4325,6 +4334,15 @@ void Dbtc::sendPackedSignalLqh(Signal* s
signal->theData[Tj + 3] = sig3;
}//for
ahostptr->noOfPackedWordsLqh = 0;
+ if (VERIFY_PACKED_SEND)
+ {
+ ndbrequire(Tj >= TnoOfWords - 1);
+ ndbrequire(PackedSignal::verify(&signal->theData[0],
+ TnoOfWords,
+ ahostptr->hostLqhBlockRef,
+ LQH_RECEIVE_TYPES,
+ 5)); /* Commit signal length */
+ }
sendSignal(ahostptr->hostLqhBlockRef,
GSN_PACKED_SIGNAL,
signal,
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-6.3 branch (frazer.clement:3522 to 3523)Bug#14193623 | Frazer Clement | 11 Mar 2013 |