From: Frazer Clement Date: January 23 2013 5:59pm Subject: bzr push into mysql-5.1-telco-6.3 branch (frazer.clement:3522 to 3523) Bug#14193623 List-Archive: http://lists.mysql.com/commits/145632 X-Bug: 14193623 Message-Id: <201301231759.r0NHxric016648@acsmt356.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3523 Frazer Clement 2013-01-23 Bug#14193623 CLUSTER CRASH - SURVIING NODE FAILURE IN HANDLING LQHKEYCONF FROM RECOVERING NOD This patch adds some self-checking to packed signals, which hopefully helps pinpoint the source of corrupt Packed Signals observed in this bug. Note that this is *not* a fix for this bug, but effectively a new assertion. Both pre-send and post-receive checks are coded, but only the pre-send checks are activated in non-debug builds, to save CPU. modified: storage/ndb/include/kernel/signaldata/PackedSignal.hpp storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 3522 Martin Skold 2012-12-18 ndb - bump version to 6.3.52 modified: configure.in === modified file 'storage/ndb/include/kernel/signaldata/PackedSignal.hpp' --- a/storage/ndb/include/kernel/signaldata/PackedSignal.hpp 2011-06-30 15:55:35 +0000 +++ b/storage/ndb/include/kernel/signaldata/PackedSignal.hpp 2013-01-23 16:58:38 +0000 @@ -29,8 +29,21 @@ #define ZLQHKEYCONF 4 #define ZREMOVE_MARKER 5 +// Definitions for verification of packed signals +static const int VERIFY_PACKED_SEND = 1; +#ifdef VM_TRACE +static const int VERIFY_PACKED_RECEIVE = 1; +#else +static const int VERIFY_PACKED_RECEIVE = 0; +#endif +static const int LQH_RECEIVE_TYPES = ((1 << ZCOMMIT) + (1 << ZCOMPLETE) + (1 << ZLQHKEYCONF) + (1 << ZREMOVE_MARKER)); +static const int TC_RECEIVE_TYPES = ((1 << ZCOMMITTED) + (1 << ZCOMPLETED) + (1 << ZLQHKEYCONF)); + class PackedSignal { +public: + static bool verify(const Uint32* data, Uint32 len, Uint32 typesExpected, Uint32 commitLen, Uint32 receiverBlockNo); +private: static Uint32 getSignalType(Uint32 data); /** === modified file 'storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp' --- a/storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp 2011-06-30 15:55:35 +0000 +++ b/storage/ndb/src/common/debugger/signaldata/PackedSignal.cpp 2013-01-23 16:58:38 +0000 @@ -106,3 +106,74 @@ printPACKED_SIGNAL(FILE * output, const fprintf(output, "--------- End Packed Signals ----------\n"); return true; } + +bool +PackedSignal::verify(const Uint32* data, Uint32 len, Uint32 receiverBlockNo, + Uint32 typesExpected, Uint32 commitLen) +{ + Uint32 pos = 0; + bool bad = false; + + if (unlikely(len > 25)) + { + fprintf(stderr, "Bad PackedSignal length : %u\n", len); + bad = true; + } + else + { + while ((pos < len) && ! bad) + { + Uint32 sigType = data[pos] >> 28; + if (unlikely(((1 << sigType) & typesExpected) == 0)) + { + fprintf(stderr, "Unexpected sigtype in packed signal : %u at pos %u. Expected : %u\n", + sigType, pos, typesExpected); + bad = true; + break; + } + switch (sigType) + { + case ZCOMMIT: + assert(commitLen > 0); + pos += commitLen; + break; + case ZCOMPLETE: + pos+= 3; + break; + case ZCOMMITTED: + pos+= 3; + break; + case ZCOMPLETED: + pos+= 3; + break; + case ZLQHKEYCONF: + pos+= LqhKeyConf::SignalLength; + break; + case ZREMOVE_MARKER: + pos+= 3; + break; + default : + fprintf(stderr, "Unrecognised signal type %u at pos %u\n", + sigType, pos); + bad = true; + break; + } + } + + if (likely(pos == len)) + { + /* Looks ok */ + return true; + } + + if (!bad) + { + fprintf(stderr, "Packed signal component length (%u) != total length (%u)\n", + pos, len); + } + } + + printPACKED_SIGNAL(stderr, data, len, receiverBlockNo); + + return false; +} === modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp' --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-09-12 13:24:49 +0000 +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2013-01-23 16:58:38 +0000 @@ -2787,6 +2787,16 @@ void Dblqh::execPACKED_SIGNAL(Signal* si ndbrequire(Tlength <= 25); MEMCOPY_NO_WORDS(&TpackedData[0], &signal->theData[0], Tlength); + + if (VERIFY_PACKED_RECEIVE) + { + ndbrequire(PackedSignal::verify(&TpackedData[0], + Tlength, + cownref, + LQH_RECEIVE_TYPES, + TcommitLen)); + } + while (Tlength > Tstep) { switch (TpackedData[Tstep] >> 28) { case ZCOMMIT: @@ -3115,6 +3125,14 @@ void Dblqh::sendPackedSignalLqh(Signal* MEMCOPY_NO_WORDS(&signal->theData[0], &ahostptr->packedWordsLqh[0], noOfWords); + if (VERIFY_PACKED_SEND) + { + ndbrequire(PackedSignal::verify(&signal->theData[0], + noOfWords, + hostRef, + LQH_RECEIVE_TYPES, + 5)); /* Commit signal length */ + } sendSignal(hostRef, GSN_PACKED_SIGNAL, signal, noOfWords, JBB); ahostptr->noOfPackedWordsLqh = 0; }//Dblqh::sendPackedSignalLqh() @@ -3126,6 +3144,14 @@ void Dblqh::sendPackedSignalTc(Signal* s MEMCOPY_NO_WORDS(&signal->theData[0], &ahostptr->packedWordsTc[0], noOfWords); + if (VERIFY_PACKED_SEND) + { + ndbrequire(PackedSignal::verify(&signal->theData[0], + noOfWords, + hostRef, + TC_RECEIVE_TYPES, + 0)); /* Irrelevant for TC */ + } sendSignal(hostRef, GSN_PACKED_SIGNAL, signal, noOfWords, JBB); ahostptr->noOfPackedWordsTc = 0; }//Dblqh::sendPackedSignalTc() === modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp' --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2012-11-20 14:29:14 +0000 +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2013-01-23 16:58:38 +0000 @@ -3748,6 +3748,15 @@ void Dbtc::execPACKED_SIGNAL(Signal* sig TpackDataPtr[2] = Tdata3; TpackDataPtr[3] = Tdata4; }//for + + if (VERIFY_PACKED_RECEIVE) + { + ndbrequire(PackedSignal::verify(&TpackedData[0], + Tlength, + cownref, + TC_RECEIVE_TYPES, + 0)); /* Irrelevant */ + } while (Tlength > Tstep) { TpackDataPtr = &TpackedData[Tstep]; @@ -4325,6 +4334,15 @@ void Dbtc::sendPackedSignalLqh(Signal* s signal->theData[Tj + 3] = sig3; }//for ahostptr->noOfPackedWordsLqh = 0; + if (VERIFY_PACKED_SEND) + { + ndbrequire(Tj >= TnoOfWords - 1); + ndbrequire(PackedSignal::verify(&signal->theData[0], + TnoOfWords, + ahostptr->hostLqhBlockRef, + LQH_RECEIVE_TYPES, + 5)); /* Commit signal length */ + } sendSignal(ahostptr->hostLqhBlockRef, GSN_PACKED_SIGNAL, signal, No bundle (reason: useless for push emails).