5058 Ole John Aske 2012-12-03
Fix for Bug#15908684: Job threads may 'sleep' without pending messages being sent
This fix correct the calculation of the 'pending' argument to
update_sched_config() such that both previously failed 'pending_send',
and currently available unsent 'send_sum' together forms the
number of 'pending' signals.
modified:
storage/ndb/src/kernel/vm/mt.cpp
5057 Ole John Aske 2012-12-03
Avoid duplicated TcConnectionRec's in LQHs hashlist of such objects.
Internaly in each LQH we keep a hash structure of TcConnectionrec
which are identified by:
- Id of Transaction owning this TcConnectionrec.
- A 'tcOpRec' id which uniquely(*below) identify this TcConnectionRec
within this specific transaction.
- An optional 'hashHi' id used for SCANREQs in cases where 'tcOpRec'
on its own cant provide uniqueness.
This is required in cases where there are multiple (internal) clients
producing REQs where the uniqueness is only guaranteed within
each client. Currently the only such client is the SPJ block.
The internal clients of NDB does *not* guarantee hash uniqueness
for LQHKEYREQs as described above (SPJ, node restart ..). However,
these requests are all 'long', 'dirtyOp'-requests and thus never
required to be searched after in the hash table.
This fix exploits this property of such 'dirtyOp' and thus never
inserts its TcConnectionRec in the hash lists. Duplicates
in the hash list should then be avoided.
modified:
storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp
storage/ndb/test/ndbapi/testSpj.cpp
5056 Pekka Nousiainen 2012-12-01
bug#14702377 c01_fix.diff
LQHKEYREQ TC(6.3)->LQH(7.x) getNormalProtocolFlag
modified:
storage/ndb/include/kernel/signaldata/LqhKey.hpp
storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp'
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2012-12-01 18:05:58 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp 2012-12-03 09:00:33 +0000
@@ -2127,6 +2127,7 @@
UintR simpleTcConnect;
UintR tableref;
UintR tcOprec;
+ UintR hashIndex;
Uint32 tcHashKeyHi;
UintR tcScanInfo;
UintR tcScanRec;
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-12-01 18:05:58 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-12-03 09:00:33 +0000
@@ -4296,6 +4296,32 @@
/* ------------------------------------------------------------------------- */
/* ------ FIND TRANSACTION BY USING HASH TABLE ------- */
/* */
+/* We keep a hash structure of TcConnectionrec which are identified by: */
+/* - Id of Transaction owning this TcConnectionrec. */
+/* - A 'tcOpRec' id which uniquely(*below) identify this TcConnectionRec */
+/* within this specific transaction. */
+/* - An optional 'hashHi' id used for SCANREQs in cases where 'tcOpRec' */
+/* on its own cant provide uniqueness. */
+/* This is required in cases where there are multiple (internal) clients */
+/* producing REQs where the uniqueness is only guaranteed within */
+/* each client. Currently the only such client is the SPJ block. */
+/* */
+/* Hash lookup of TcConnectionrecPtr might be required for TcConnectionRecs */
+/* having a lifetime beyond the initial REQ. That is: */
+/* - Short requests awaiting for a later ATTR- or KEYINFO. */
+/* - SCANREQ which may need a later NEXTREQ to fetch more or close scan */
+/* - Transactional (non-DirtyOp) REQs which need a later abort, commit */
+/* or unlock request. */
+/* */
+/* TcConnectionrec's identified as not requiring hash lookup are not */
+/* inserted in the hash table! */
+/* */
+/* NOTE: */
+/* The internal clients of NDB does *not* guarantee hash uniqueness */
+/* for LQHKEYREQs as described above (SPJ, node restart ..). However, */
+/* these requests are all 'long', 'dirtyOp'-requests and thus neither */
+/* inserted nor searched after in the hash table. */
+/* */
/* ------------------------------------------------------------------------- */
int Dblqh::findTransaction(UintR Transid1, UintR Transid2, UintR TcOprec,
Uint32 hi)
@@ -4318,6 +4344,7 @@
jam();
tcConnectptr.i = locTcConnectptr.i;
tcConnectptr.p = locTcConnectptr.p;
+ ndbassert(tcConnectptr.p->hashIndex == ThashIndex);
return (int)ZOK;
}//if
jam();
@@ -4375,6 +4402,7 @@
locTcConnectptr.p->savePointId = 0;
locTcConnectptr.p->gci_hi = 0;
locTcConnectptr.p->gci_lo = 0;
+ locTcConnectptr.p->hashIndex = RNIL;
cfirstfreeTcConrec = nextTc;
tcConnectptr = locTcConnectptr;
locTcConnectptr.p->connectState = TcConnectionrec::CONNECTED;
@@ -4940,24 +4968,40 @@
return;
}//if
}//if
- /* Check that no equal element exists */
- ndbassert(findTransaction(regTcPtr->transid[0], regTcPtr->transid[1],
- regTcPtr->tcOprec, 0) == ZNOT_FOUND);
- TcConnectionrecPtr localNextTcConnectptr;
- Uint32 hashIndex = (regTcPtr->transid[0] ^ regTcPtr->tcOprec) & 1023;
- localNextTcConnectptr.i = ctransidHash[hashIndex];
- ctransidHash[hashIndex] = tcConnectptr.i;
- regTcPtr->prevHashRec = RNIL;
- regTcPtr->nextHashRec = localNextTcConnectptr.i;
- if (localNextTcConnectptr.i != RNIL) {
-/* -------------------------------------------------------------------------- */
-/* ENSURE THAT THE NEXT RECORD HAS SET PREVIOUS TO OUR RECORD IF IT EXISTS */
-/* -------------------------------------------------------------------------- */
- ptrCheckGuard(localNextTcConnectptr,
- ctcConnectrecFileSize, tcConnectionrec);
+
+ /**
+ * If this is a 'dirtyOp' we dont care about transaction semantics.
+ * There will then be no further abort, commit or unlock requests for
+ * this operation. Thus, we will never have to find this operation
+ * in the hashlist by calling findTransaction().
+ * If also all ATTR- and KEYINFOs has been received, there will be no
+ * ::execKEY- or ATTRINFO. (Long request, or all INFO fit in the REQ.)
+ *
+ * Thus we skip insertion in hashlist whenever not required.
+ */
+ if (regTcPtr->dirtyOp == ZFALSE || //Transactional operation
+ regTcPtr->primKeyLen > keyLenWithLQHReq || //Await more KEYINFO
+ regTcPtr->totReclenAi > regTcPtr->currReclenAi) //Await more ATTRINFO
+ {
jam();
- ndbassert(localNextTcConnectptr.p->prevHashRec == RNIL);
- localNextTcConnectptr.p->prevHashRec = tcConnectptr.i;
+ /* Check that no equal element exists */
+ ndbassert(findTransaction(regTcPtr->transid[0], regTcPtr->transid[1],
+ regTcPtr->tcOprec, regTcPtr->tcHashKeyHi) == ZNOT_FOUND);
+
+ TcConnectionrecPtr localNextTcConnectptr;
+ Uint32 hashIndex = (regTcPtr->transid[0] ^ regTcPtr->tcOprec) & 1023;
+ localNextTcConnectptr.i = ctransidHash[hashIndex];
+ ctransidHash[hashIndex] = tcConnectptr.i;
+ regTcPtr->prevHashRec = RNIL;
+ regTcPtr->nextHashRec = localNextTcConnectptr.i;
+ regTcPtr->hashIndex = hashIndex;
+ if (localNextTcConnectptr.i != RNIL) {
+ ptrCheckGuard(localNextTcConnectptr,
+ ctcConnectrecFileSize, tcConnectionrec);
+ jam();
+ ndbassert(localNextTcConnectptr.p->prevHashRec == RNIL);
+ localNextTcConnectptr.p->prevHashRec = tcConnectptr.i;
+ }//if
}//if
if (tabptr.i >= ctabrecFileSize) {
LQHKEY_error(signal, 5);
@@ -7401,6 +7445,19 @@
TcConnectionrec * const regTcPtr = tcConnectptr.p;
TcConnectionrecPtr prevHashptr;
TcConnectionrecPtr nextHashptr;
+ /**
+ * This operation has not been inserted in the hash list at all.
+ * (It is a non-transactional 'dirtyOp', or the request failed
+ * before it was ever inserted in the hash list.)
+ */
+ if (regTcPtr->hashIndex == RNIL)
+ {
+ jam();
+ /* Check that this, or an equal, TcConnectionrec isn't hashed */
+ ndbassert(findTransaction(regTcPtr->transid[0], regTcPtr->transid[1],
+ regTcPtr->tcOprec, regTcPtr->tcHashKeyHi) == ZNOT_FOUND);
+ return;
+ }
prevHashptr.i = regTcPtr->prevHashRec;
nextHashptr.i = regTcPtr->nextHashRec;
@@ -7417,7 +7474,8 @@
/* THE OPERATION WAS PLACED FIRST IN THE LIST OF THE HASH TABLE. NEED TO SET */
/* A NEW LEADER OF THE LIST. */
/* ------------------------------------------------------------------------- */
- Uint32 hashIndex = (regTcPtr->transid[0] ^ regTcPtr->tcOprec) & 1023;
+ Uint32 hashIndex = regTcPtr->hashIndex;
+ ndbassert(hashIndex == ((regTcPtr->transid[0] ^ regTcPtr->tcOprec) & 1023));
ndbassert(ctransidHash[hashIndex] == tcConnectptr.i);
ctransidHash[hashIndex] = nextHashptr.i;
}//if
@@ -7428,7 +7486,7 @@
nextHashptr.p->prevHashRec = prevHashptr.i;
}//if
- regTcPtr->prevHashRec = regTcPtr->nextHashRec = RNIL;
+ regTcPtr->hashIndex = regTcPtr->prevHashRec = regTcPtr->nextHashRec = RNIL;
}//Dblqh::deleteTransidHash()
/* -------------------------------------------------------------------------
@@ -8572,6 +8630,7 @@
void Dblqh::releaseTcrec(Signal* signal, TcConnectionrecPtr locTcConnectptr)
{
jam();
+ ndbassert(locTcConnectptr.p->hashIndex==RNIL);
Uint32 op = locTcConnectptr.p->operation;
locTcConnectptr.p->tcTimer = 0;
locTcConnectptr.p->transactionState = TcConnectionrec::TC_NOT_CONNECTED;
@@ -8605,6 +8664,7 @@
void Dblqh::releaseTcrecLog(Signal* signal, TcConnectionrecPtr locTcConnectptr)
{
jam();
+ ndbassert(locTcConnectptr.p->hashIndex==RNIL);
locTcConnectptr.p->tcTimer = 0;
locTcConnectptr.p->transactionState = TcConnectionrec::TC_NOT_CONNECTED;
locTcConnectptr.p->nextTcConnectrec = cfirstfreeTcConrec;
@@ -10629,6 +10689,7 @@
ctransidHash[hashIndex] = tcConnectptr.i;
tcConnectptr.p->prevHashRec = RNIL;
tcConnectptr.p->nextHashRec = nextHashptr.i;
+ tcConnectptr.p->hashIndex = hashIndex;
if (nextHashptr.i != RNIL) {
jam();
/* ---------------------------------------------------------------------
=== modified file 'storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp 2012-01-28 09:09:19 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp 2012-12-03 09:00:33 +0000
@@ -436,6 +436,13 @@
flags |= Page_cache_client::DELAY_REQ;
req.m_delay_until_time = NdbTick_CurrentMillisecond()+(Uint64)3000;
}
+ if (ERROR_INSERTED(4035) && (rand() % 13) == 0)
+ {
+ // Disk access have to randomly wait max 16ms for a diskpage
+ Uint64 delay = (Uint64)(rand() % 16) + 1;
+ flags |= Page_cache_client::DELAY_REQ;
+ req.m_delay_until_time = NdbTick_CurrentMillisecond()+delay;
+ }
#endif
Page_cache_client pgman(this, c_pgman);
=== modified file 'storage/ndb/src/kernel/vm/mt.cpp'
--- a/storage/ndb/src/kernel/vm/mt.cpp 2012-03-21 09:26:18 +0000
+++ b/storage/ndb/src/kernel/vm/mt.cpp 2012-12-03 09:01:26 +0000
@@ -3781,7 +3781,7 @@
else
{
/* No signals processed, prepare to sleep to wait for more */
- if (pending_send || send_sum > 0)
+ if ((pending_send + send_sum) > 0)
{
/* About to sleep, _must_ send now. */
pending_send = do_send(selfptr, TRUE);
@@ -3810,7 +3810,7 @@
*/
if (sum >= selfptr->m_max_exec_signals)
{
- if (update_sched_config(selfptr, pending_send))
+ if (update_sched_config(selfptr, pending_send + send_sum))
{
/* Update current time after sleeping */
now = NdbTick_CurrentMillisecond();
=== modified file 'storage/ndb/test/ndbapi/testSpj.cpp'
--- a/storage/ndb/test/ndbapi/testSpj.cpp 2012-11-27 10:16:19 +0000
+++ b/storage/ndb/test/ndbapi/testSpj.cpp 2012-12-03 09:00:33 +0000
@@ -125,6 +125,7 @@
int joinlevel = ctx->getProperty("JoinLevel", 3);
int records = ctx->getNumRecords();
int until_stopped = ctx->getProperty("UntilStopped", (Uint32)0);
+ int inject_err = ctx->getProperty("ErrorCode");
Uint32 stepNo = step->getStepNo();
int i = 0;
@@ -136,6 +137,16 @@
const NdbQueryDef * q2 = qb2.createQuery();
HugoQueries hugoTrans1(* q1);
HugoQueries hugoTrans2(* q2);
+ NdbRestarter restarter;
+
+ if (inject_err)
+ {
+ ndbout << "insertErrorInAllNodes("<<inject_err<<")"<<endl;
+ if (restarter.insertErrorInAllNodes(inject_err) != 0){
+ g_info << "Could not insert error in all nodes "<<endl;
+ return NDBT_FAILED;
+ }
+ }
while ((i<loops || until_stopped) && !ctx->isTestStopped())
{
g_info << i << ": ";
@@ -153,6 +164,7 @@
addMask(ctx, (1 << stepNo), "Running");
}
g_info << endl;
+ restarter.insertErrorInAllNodes(0);
return NDBT_OK;
}
@@ -1286,6 +1298,12 @@
STEPS(runJoin, 6);
FINALIZER(runClearTable);
}
+TESTCASE("MixedJoinDiskWait", "Simulate disk wait during pushed joins"){
+ INITIALIZER(runLoadTable);
+ TC_PROPERTY("ErrorCode", 4035);
+ STEPS(runJoin, 4);
+ FINALIZER(runClearTable);
+}
TESTCASE("NF_Join", ""){
TC_PROPERTY("UntilStopped", 1);
TC_PROPERTY("WaitProgress", 20);
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-7.0 branch (ole.john.aske:5056 to 5058)Bug#15908684 | Ole John Aske | 3 Dec |