3483 Frazer Clement 2012-09-12
Bug #14386849 SCAN RESOURCE LEAK WHEN TC KEYINFO DATA BUFFERS EXHAUSTED
This patch fixes the scan resources leak when TC KeyInfo data buffers are exhausted.
A testcase is added - one which causes a real leak, another which simulates it.
Two new dump commands are added, to give visibility of the pool levels in TC and LQH.
modified:
storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp
storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
storage/ndb/src/ndbapi/ndberror.c
storage/ndb/test/ndbapi/testScan.cpp
storage/ndb/test/run-test/daily-basic-tests.txt
3482 magnus.blaudd@stripped 2012-09-12
Fix test case failure which depends on the binlogs content by adjusting
the used binlog position
modified:
mysql-test/r/ctype_cp932_binlog_stm.result
mysql-test/t/ctype_cp932_binlog_stm.test
=== modified file 'storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp'
--- a/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-04-24 10:55:03 +0000
+++ b/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-09-12 13:24:49 +0000
@@ -98,6 +98,7 @@ public:
LqhDumpAllActiveScanRec = 2302,
LqhDumpLcpState = 2303,
LqhErrorInsert5042 = 2315,
+ LqhDumpPoolLevels = 2353,
AccDumpOneScanRec = 2400,
AccDumpAllScanRec = 2401,
@@ -121,6 +122,7 @@ public:
StartPeriodicTcTimer = 2511,
TcStartDumpIndexOpCount = 2512,
TcDumpIndexOpCount = 2513,
+ TcDumpPoolLevels = 2555,
CmvmiDumpConnections = 2600,
CmvmiDumpLongSignalMemory = 2601,
CmvmiSetRestartOnErrorInsert = 2602,
=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-08-13 15:00:28 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp 2012-09-12 13:24:49 +0000
@@ -21604,6 +21604,104 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal
cmaxLogFilesInPageZero_DUMP = dumpState->args[1];
#endif
+ if (arg == DumpStateOrd::LqhDumpPoolLevels)
+ {
+ /* Dump some state info for internal buffers */
+ if (signal->getLength() == 1)
+ {
+ signal->theData[1] = 1;
+ signal->theData[2] = 0;
+ signal->theData[3] = 0;
+ sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 4, JBB);
+ return;
+ }
+ if (signal->getLength() != 4)
+ {
+ ndbout_c("DUMP LqhDumpPoolLevels : Bad signal length : %u", signal->getLength());
+ return;
+ }
+
+ Uint32 resource = signal->theData[1];
+ Uint32 position = signal->theData[2];
+ Uint32 sum = signal->theData[3];
+ const Uint32 MAX_ITER = 200;
+
+ switch(resource)
+ {
+ case 1:
+ {
+ /* Must get all in one loop, as we're traversing a dynamic list */
+ sum = 0;
+ TcConnectionrecPtr tcp;
+ tcp.i = cfirstfreeTcConrec;
+ while (tcp.i != RNIL)
+ {
+ sum++;
+ ptrCheckGuard(tcp, ctcConnectrecFileSize, tcConnectionrec);
+ tcp.i = tcp.p->nextTcConnectrec;
+ }
+ infoEvent("LQH : TcConnection (operation) records in use/total %u/%u (%u bytes each)",
+ ctcConnectrecFileSize - sum, ctcConnectrecFileSize, (Uint32) sizeof(TcConnectionrec));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ }
+ case 2:
+ {
+ infoEvent("LQH : ScanRecord (Fragment) pool in use/total %u/%u (%u bytes each)",
+ c_scanRecordPool.getSize()-
+ c_scanRecordPool.getNoOfFree(),
+ c_scanRecordPool.getSize(),
+ (Uint32) sizeof(ScanRecord));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ }
+ case 3:
+ {
+ /* Must get all in one loop, as we're traversing a dynamic list */
+ sum = 0;
+ DatabufPtr dbp;
+ dbp.i = cfirstfreeDatabuf;
+ while (dbp.i != RNIL)
+ {
+ sum++;
+ ptrCheckGuard(dbp, cdatabufFileSize, databuf);
+ dbp.i= dbp.p->nextDatabuf;
+ }
+ infoEvent("LQH : KeyInfo bufs in use/total %u/%u (%u bytes each)",
+ cdatabufFileSize - sum,
+ cdatabufFileSize, (Uint32) sizeof(Databuf));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ }
+ case 4:
+ {
+ infoEvent("LQH : AttrInfo bufs in use/total %u/%u (%u bytes each)",
+ cattrinbufFileSize - c_no_attrinbuf_recs,
+ cattrinbufFileSize,
+ (Uint32) sizeof(Attrbuf));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ }
+ default:
+ return;
+ }
+
+ signal->theData[0] = DumpStateOrd::LqhDumpPoolLevels;
+ signal->theData[1] = resource;
+ signal->theData[2] = position;
+ signal->theData[3] = sum;
+ sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 4, JBB);
+ return;
+ }
+
}//Dblqh::execDUMP_STATE_ORD()
/* **************************************************************** */
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2012-04-24 10:55:03 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp 2012-09-12 13:24:49 +0000
@@ -1716,6 +1716,7 @@ private:
UintR cwriteCount;
UintR cabortCount;
UintR cconcurrentOp;
+ UintR cconcurrentScans;
Uint32 c_scan_count;
Uint32 c_range_scan_count;
void reset () {
=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2012-06-21 10:31:29 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp 2012-09-12 13:24:49 +0000
@@ -1977,10 +1977,30 @@ void Dbtc::execKEYINFO(Signal* signal)
UintR TkeyLen = regCachePtr->keylen;
UintR Tlen = regCachePtr->save1;
+ ndbassert(tcConnectptr.p->tcConnectstate == OS_WAIT_SCAN);
+
do {
- if (cfirstfreeDatabuf == RNIL) {
+ if ((cfirstfreeDatabuf == RNIL) ||
+ ERROR_INSERTED(8094))
+ {
jam();
- seizeDatabuferrorLab(signal);
+ Uint32 transid0 = apiConnectptr.p->transid[0];
+ Uint32 transid1 = apiConnectptr.p->transid[1];
+ ndbrequire(apiConnectptr.p->apiScanRec != RNIL);
+ ScanRecordPtr scanPtr;
+ scanPtr.i = apiConnectptr.p->apiScanRec;
+ ptrCheckGuard(scanPtr, cscanrecFileSize, scanRecord);
+ abortScanLab(signal,
+ scanPtr,
+ ZGET_DATAREC_ERROR,
+ true /* Not started */);
+
+ /* Prepare for up coming ATTRINFO/KEYINFO */
+ apiConnectptr.p->apiConnectstate = CS_ABORTING;
+ apiConnectptr.p->abortState = AS_IDLE;
+ apiConnectptr.p->transid[0] = transid0;
+ apiConnectptr.p->transid[1] = transid1;
+
return;
}//if
linkKeybuf(signal);
@@ -9900,6 +9920,7 @@ void Dbtc::releaseScanResources(ScanReco
scanPtr.p->scanTcrec = RNIL;
scanPtr.p->scanApiRec = RNIL;
cfirstfreeScanrec = scanPtr.i;
+ ndbassert(cConcScanCount > 0);
cConcScanCount--;
apiConnectptr.p->apiScanRec = RNIL;
@@ -11978,6 +11999,111 @@ Dbtc::execDUMP_STATE_ORD(Signal* signal)
ndbrequire(rss_cconcurrentOp == c_counters.cconcurrentOp);
#endif;
}
+
+ if (arg == DumpStateOrd::TcDumpPoolLevels)
+ {
+ if (signal->getLength() == 1)
+ {
+ signal->theData[1] = 1;
+ signal->theData[2] = 0;
+ signal->theData[3] = 0;
+ sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 4, JBB);
+ return;
+ }
+ if (signal->getLength() != 4)
+ {
+ ndbout_c("DUMP TcDumpPoolLevels : Bad signal length : %u", signal->getLength());
+ return;
+ }
+
+ Uint32 resource = signal->theData[1];
+ Uint32 position = signal->theData[2];
+ Uint32 sum = signal->theData[3];
+ const Uint32 MAX_ITER = 200;
+
+ switch(resource)
+ {
+ case 1:
+ infoEvent("TC : Concurrent operations in use/total : %u/%u (%u bytes each)",
+ c_counters.cconcurrentOp,
+ ctcConnectFilesize,
+ (Uint32) sizeof(TcConnectRecord));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ case 2:
+ infoEvent("TC : Concurrent scans in use/total : %u/%u (%u bytes each)",
+ cConcScanCount,
+ cscanrecFileSize,
+ (Uint32) sizeof(ScanRecord));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ case 3:
+ infoEvent("TC : Scan Frag records in use/total : %u/%u (%u bytes each)",
+ c_scan_frag_pool.getSize() -
+ c_scan_frag_pool.getNoOfFree(),
+ c_scan_frag_pool.getSize(),
+ (Uint32) sizeof(ScanFragRec));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ case 4:
+ {
+ /* KeyInfo data buffers - traverse free list in one timeslice */
+ sum = 0;
+ DatabufRecordPtr bufPtr;
+ bufPtr.i = cfirstfreeDatabuf;
+ while (bufPtr.i != RNIL)
+ {
+ sum++;
+ ptrCheckGuard(bufPtr, cdatabufFilesize, databufRecord);
+ bufPtr.i = bufPtr.p->nextDatabuf;
+ }
+ infoEvent("TC : KeyInfo data buffers in use/total : %u/%u (%u bytes each)",
+ cdatabufFilesize - sum,
+ cdatabufFilesize,
+ (Uint32) sizeof(DatabufRecord));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ }
+ case 5:
+ {
+ /* AttrInfo data buffers - traverse free list in one timeslice */
+ sum = 0;
+ AttrbufRecordPtr attrBufPtr;
+ attrBufPtr.i = cfirstfreeAttrbuf;
+ while(attrBufPtr.i != RNIL)
+ {
+ sum++;
+ ptrCheckGuard(attrBufPtr, cattrbufFilesize, attrbufRecord);
+ attrBufPtr.i = attrBufPtr.p->attrbuf[ZINBUF_NEXT];
+ }
+ infoEvent("TC : AttrInfo buffers in use/total : %u/%u (%u bytes each)",
+ cattrbufFilesize - sum,
+ cattrbufFilesize,
+ (Uint32) sizeof(AttrbufRecord));
+ resource++;
+ position = 0;
+ sum = 0;
+ break;
+ }
+ default:
+ return;
+ }
+
+ signal->theData[0] = DumpStateOrd::TcDumpPoolLevels;
+ signal->theData[1] = resource;
+ signal->theData[2] = position;
+ signal->theData[3] = sum;
+ sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 4, JBB);
+ return;
+ }
}//Dbtc::execDUMP_STATE_ORD()
bool
=== modified file 'storage/ndb/src/ndbapi/ndberror.c'
--- a/storage/ndb/src/ndbapi/ndberror.c 2012-06-26 09:00:13 +0000
+++ b/storage/ndb/src/ndbapi/ndberror.c 2012-09-12 13:24:49 +0000
@@ -167,7 +167,7 @@ ErrorBundle ErrorCodes[] = {
* TemporaryResourceError
*/
{ 217, DMEC, TR, "217" },
- { 218, DMEC, TR, "218" },
+ { 218, DMEC, TR, "Out of KeyInfo data buffers in transaction coordinator." },
{ 219, DMEC, TR, "219" },
{ 233, DMEC, TR,
"Out of operation records in transaction coordinator (increase MaxNoOfConcurrentOperations)" },
=== modified file 'storage/ndb/test/ndbapi/testScan.cpp'
--- a/storage/ndb/test/ndbapi/testScan.cpp 2012-06-26 12:26:28 +0000
+++ b/storage/ndb/test/ndbapi/testScan.cpp 2012-09-12 13:24:49 +0000
@@ -1376,6 +1376,220 @@ finalizeBug42559(NDBT_Context* ctx, NDBT
return NDBT_OK;
}
+int takeResourceSnapshot(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter restarter;
+
+ int checksnapshot = DumpStateOrd::TcResourceSnapshot;
+ restarter.dumpStateAllNodes(&checksnapshot, 1);
+
+ /* TODO : Check other block's resources? */
+ return NDBT_OK;
+}
+
+int runScanReadIndexWithBounds(NDBT_Context* ctx, NDBT_Step* step){
+ int loops = ctx->getNumLoops();
+ int records = ctx->getNumRecords();
+ int numRanges = ctx->getProperty("NumRanges", 1);
+ int maxRunSecs = ctx->getProperty("MaxRunSecs", 60);
+ int maxRetries = ctx->getProperty("MaxRetries", 1000000);
+
+ const NdbDictionary::Index * pIdx =
+ GETNDB(step)->getDictionary()->getIndex(orderedPkIdxName,
+ ctx->getTab()->getName());
+
+ int i = 0;
+ HugoCalculator calc(*ctx->getTab());
+ NDBT_ResultRow row (*ctx->getTab());
+ Ndb* ndb = GETNDB(step);
+
+ Uint64 start = NdbTick_CurrentMillisecond();
+ Uint64 end = start + (1000*maxRunSecs);
+ int retries = 0;
+
+ /* Here we run an ordered index scan, with a bound.
+ * There are numRanges sub-scans with the same bound
+ * This is done to use up some KeyInfo, and expose bugs in that area
+ * If we run many of these in parallel we may exhaust the available KeyInfo storage,
+ * which may expose some bugs.
+ */
+ while (pIdx &&
+ i<loops &&
+ !ctx->isTestStopped() &&
+ NdbTick_CurrentMillisecond() < end) {
+ g_info << "Step " << step->getStepNo()
+ << "Loop : " << i << ": ";
+
+ /* Use specific-partition variant of startTransaction to ensure a single
+ * TC node is used
+ */
+ NdbTransaction* trans = ndb->startTransaction(ctx->getTab(), Uint32(0));
+ if (trans == NULL)
+ {
+ g_err << "Transaction start failed " << ndb->getNdbError() << endl;
+ return NDBT_FAILED;
+ }
+
+ NdbIndexScanOperation* iso = trans->getNdbIndexScanOperation(pIdx->getName(),
+ ctx->getTab()->getName());
+ if (iso == NULL)
+ {
+ g_err << "Error obtaining IndexScanOperation : " << trans->getNdbError() << endl;
+ trans->close();
+ return NDBT_FAILED;
+ }
+
+ if (iso->readTuples(NdbOperation::LM_CommittedRead,
+ (NdbScanOperation::SF_OrderBy |
+ NdbScanOperation::SF_ReadRangeNo |
+ NdbScanOperation::SF_MultiRange),
+ 0) != 0)
+ {
+ g_err << "Error calling readTuples : " << iso->getNdbError() << endl;
+ trans->close();
+ return NDBT_FAILED;
+ }
+
+ for (int range = 0; range < numRanges; range++)
+ {
+ /* Now define a bound... */
+ for (Uint32 k=0; k<pIdx->getNoOfColumns(); k++)
+ {
+ const NdbDictionary::Column* idxCol = pIdx->getColumn(k);
+ const char* colName = idxCol->getName();
+ /* Lower bound of <= NULL should return all rows */
+ if (iso->setBound(colName, NdbIndexScanOperation::BoundLE, NULL) != 0)
+ {
+ g_err << "Error setting bound for column %s. "
+ << iso->getNdbError() << endl;
+ trans->close();
+ return NDBT_FAILED;
+ }
+ }
+
+ if (iso->end_of_bound(range) != 0)
+ {
+ g_err << "Error closing range " << range << endl;
+ g_err << iso->getNdbError() << endl;
+ return NDBT_FAILED;
+ }
+ }
+
+ const NdbDictionary::Table& tab= *ctx->getTab();
+
+ /* Now request all columns in result projection */
+ for (int a=0; a<tab.getNoOfColumns(); a++){
+ if((row.attributeStore(a) =
+ iso->getValue(tab.getColumn(a)->getName())) == 0) {
+ g_err << "Error defining read value " << trans->getNdbError() << endl;
+ trans->close();
+ return NDBT_FAILED;
+ }
+ }
+
+ /* Ready to go... */
+ trans->execute(NoCommit, AbortOnError);
+
+ if (trans->getNdbError().code != 0)
+ {
+ if (trans->getNdbError().code == 218)
+ {
+ /* Out of KeyInfo buffers in TC - that's ok, let's try again */
+ trans->close();
+ if (retries++ < maxRetries)
+ {
+ g_err << "Step " << step->getStepNo()
+ << " TC out of Keyinfo buffers (218) - retrying" << endl;
+ continue;
+ }
+ }
+
+ g_err << "Error on execution : " << trans->getNdbError() << endl;
+ trans->close();
+ return NDBT_FAILED;
+ }
+
+ int eof;
+ int rows = 0;
+ while ((eof = iso->nextResult(true)) == 0)
+ {
+ rows++;
+ if (calc.verifyRowValues(&row) != 0)
+ {
+ g_err << "Verification failed." << endl;
+ trans->close();
+ return NDBT_FAILED;
+ }
+
+#ifdef BUG_14388257_FIXED
+ int rangeNum = (rows -1) / records;
+ if (iso->get_range_no() != rangeNum)
+ {
+ g_err << "Expected row " << rows
+ << " to be in range " << rangeNum
+ << " but it reports range num " << iso->get_range_no()
+ << " : " << row
+ << endl;
+ return NDBT_FAILED;
+ }
+#endif
+
+ //g_err << row << endl;
+ }
+
+ if (eof != 1)
+ {
+ g_err << "nextResult() returned " << eof << endl;
+ g_err << "Scan error : " << iso->getNdbError() << endl;
+
+ if (iso->getNdbError().status == NdbError::TemporaryError)
+ {
+ if (retries++ < maxRetries)
+ {
+ g_err << "Step "
+ << step->getStepNo()
+ << " Temporary, retrying on iteration "
+ << i << " rows so far : " << rows << endl;
+ trans->close();
+ NdbSleep_MilliSleep(2500);
+ continue;
+ }
+ }
+
+ trans->close();
+ return NDBT_FAILED;
+ }
+
+ g_err << "Read " << rows << " rows." << endl;
+
+ if (records != 0 && rows != (numRanges * records))
+ {
+ g_err << "Expected " << records << " rows"
+ << ", read " << rows << endl;
+#ifdef BUG_14388257_FIXED
+ trans->close();
+ assert(false);
+ return NDBT_FAILED;
+#endif
+ }
+
+ trans->close();
+ i++;
+ }
+ return NDBT_OK;
+}
+
+int checkResourceSnapshot(NDBT_Context* ctx, NDBT_Step* step)
+{
+ NdbRestarter restarter;
+
+ int checksnapshot = DumpStateOrd::TcResourceCheckLeak;
+ restarter.dumpStateAllNodes(&checksnapshot, 1);
+
+ /* TODO : Check other block's resources? */
+ return NDBT_OK;
+}
+
int
runBug54945(NDBT_Context* ctx, NDBT_Step* step)
@@ -2184,6 +2398,47 @@ TESTCASE("extraNextResultBug11748194",
{
INITIALIZER(runExtraNextResult);
}
+TESTCASE("ScanRealKeyInfoExhaust",
+ "Test behaviour when TC keyinfo buffers exhausted 4real")
+{
+ /* 55 threads, each setting 200 ranges in their keyinfo
+ * For the lightest single column PK case, each range should
+ * use 2 words, 200 ranges = 400 words per scan thread =
+ * 400/4 = 100 Databuffers used.
+ * 55 threads = 55*100 = 5500 Databuffers which is >
+ * the 4000 statically allocated in 6.3
+ */
+ TC_PROPERTY("NumRanges", 200);
+ TC_PROPERTY("MaxRunSecs", 120);
+ INITIALIZER(createOrderedPkIndex);
+ INITIALIZER(runLoadTable);
+ INITIALIZER(takeResourceSnapshot);
+ STEPS(runScanReadIndexWithBounds,55);
+ FINALIZER(checkResourceSnapshot);
+ FINALIZER(createOrderedPkIndex_Drop);
+ FINALIZER(runClearTable);
+}
+TESTCASE("ScanKeyInfoExhaust",
+ "Test behaviour when TC keyinfo buffers exhausted with error insert")
+{
+ /* Use error insert 8094 to cause keyinfo exhaustion, then run a single scan
+ * with keyinfo to hit the error path
+ */
+ TC_PROPERTY("MaxRunSecs", 10);
+ INITIALIZER(createOrderedPkIndex);
+ INITIALIZER(runLoadTable);
+ INITIALIZER(takeResourceSnapshot);
+ TC_PROPERTY("ErrorCode", 8094);
+ INITIALIZER(runInsertError);
+ STEP(runScanReadIndexWithBounds);
+ FINALIZER(checkResourceSnapshot);
+ FINALIZER(runInsertError);
+ FINALIZER(createOrderedPkIndex_Drop);
+ FINALIZER(runClearTable);
+}
+
+
+
NDBT_TESTSUITE_END(testScan);
int main(int argc, const char** argv){
=== modified file 'storage/ndb/test/run-test/daily-basic-tests.txt'
--- a/storage/ndb/test/run-test/daily-basic-tests.txt 2012-08-13 11:03:25 +0000
+++ b/storage/ndb/test/run-test/daily-basic-tests.txt 2012-09-12 13:24:49 +0000
@@ -1549,3 +1549,7 @@ max-time: 600
cmd: testRedo
args: -n RestartFD -l 2 T1
+max-time : 300
+cmd: testScan
+args: -n ScanKeyInfoExhaust T1
+
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-6.3 branch (frazer.clement:3482 to 3483)Bug#14386849 | Frazer Clement | 13 Sep |