4562 Jan Wedvik 2011-09-29
This patch will make the SPJ block fetch all rows for some non-root index scans
in one batch rather than two.
This will happen if the first batch reads from a subset of the fragments and
receive few rows. The SPJ block will then
try to read from the remaining fragments before finishing the batch.
This is especially useful when doing bushy scans. If there are more branches in
the bushy scan, then these will have to be
repeated for each batch of the current scan.
modified:
storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp
storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
4561 Jan Wedvik 2011-09-29
This patch fixes a bug that causes assert error in NdbQueryOperationDefImpl::printTree() for query trees that are more
than 31 operations deep.
modified:
storage/ndb/src/ndbapi/NdbQueryBuilder.cpp
storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp
4560 Jan Wedvik 2011-09-29
This patch fixes an assert error that happens in printSCANTABCONF() when enabling api signal logging and running
the SQL script below:
CREATE TABLE t1 (
pk1 int NOT NULL,
pk2 int NOT NULL,
PRIMARY KEY (`pk1`,`pk2`)
) ENGINE=ndb partition by key(pk1) partitions 8;
select count(*) from t1 where t1.pk1=0;
The resulting ScanTabConf signal carries OpData for eight fragments in a separate segment, which printSCANTABCONF()
does not handle.
modified:
storage/ndb/src/common/debugger/signaldata/ScanTab.cpp
4559 Jonas Oreland 2011-09-29
ndb - fix incorrect NdbRestarter usage
modified:
storage/ndb/test/ndbapi/testNdbApi.cpp
storage/ndb/test/ndbapi/testRestartGci.cpp
=== modified file 'storage/ndb/src/common/debugger/signaldata/ScanTab.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/ScanTab.cpp 2011-06-30 15:59:25 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/ScanTab.cpp 2011-09-29 11:31:28 +0000
@@ -78,9 +78,9 @@ printSCANTABCONF(FILE * output, const Ui
size_t op_count= requestInfo & (~ScanTabConf::EndOfData);
if (op_count)
{
- fprintf(output, " Operation(s) [api tc rows len]:\n");
if (len == ScanTabConf::SignalLength + 4 * op_count)
{
+ fprintf(output, " Operation(s) [api tc rows len]:\n");
ScanTabConf::OpData * op = (ScanTabConf::OpData*)
(theData + ScanTabConf::SignalLength);
for(size_t i = 0; i<op_count; i++)
@@ -91,9 +91,9 @@ printSCANTABCONF(FILE * output, const Ui
op++;
}
}
- else
+ else if (len == ScanTabConf::SignalLength + 3 * op_count)
{
- assert(len == ScanTabConf::SignalLength + 3 * op_count);
+ fprintf(output, " Operation(s) [api tc rows len]:\n");
for(size_t i = 0; i<op_count; i++)
{
ScanTabConf::OpData * op = (ScanTabConf::OpData*)
@@ -104,6 +104,12 @@ printSCANTABCONF(FILE * output, const Ui
ScanTabConf::getLength(op->rows));
}
}
+ else
+ {
+ // ScanTabConf::OpData stored in section 0 of signal.
+ assert(len == ScanTabConf::SignalLength);
+ fprintf(output, " Long signal. Cannot print operations.");
+ }
fprintf(output, "\n");
}
return false;
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2011-08-22 08:35:35 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2011-09-29 11:43:27 +0000
@@ -580,6 +580,8 @@ public:
Uint32 m_fragCount;
// The number of fragments that we scan in parallel.
Uint32 m_parallelism;
+ // True if we are still receiving the first batch for this operation.
+ bool m_firstBatch;
/**
* True if this is the first instantiation of this operation. A child
* operation will be instantiated once for each batch of its parent.
@@ -1229,7 +1231,6 @@ private:
void scanIndex_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
void scanIndex_parent_row(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowPtr&);
void scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr, Uint32 ptrI, Uint32);
- void scanIndex_send(Signal*,Ptr<Request>,Ptr<TreeNode>);
void scanIndex_send(Signal* signal,
Ptr<Request> requestPtr,
Ptr<TreeNode> treeNodePtr,
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2011-09-23 07:43:25 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2011-09-29 11:43:27 +0000
@@ -5023,6 +5023,7 @@ Dbspj::scanIndex_parent_batch_complete(S
const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
ndbrequire(org->batch_size_rows > 0);
+ data.m_firstBatch = true;
if (treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL)
{
jam();
@@ -5171,6 +5172,9 @@ Dbspj::scanIndex_send(Signal* signal,
Uint32 bs_rows,
Uint32& batchRange)
{
+ jam();
+ ndbassert(bs_bytes > 0);
+ ndbassert(bs_rows > 0);
/**
* if (m_bits & prunemask):
* - Range keys sliced out to each ScanFragHandle
@@ -5451,6 +5455,9 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa
if (data.m_frags_outstanding == 0)
{
+ const bool isFirstBatch = data.m_firstBatch;
+ data.m_firstBatch = false;
+
const ScanFragReq * const org
= reinterpret_cast<const ScanFragReq*>(data.m_scanFragReq);
@@ -5486,24 +5493,78 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa
{
jam();
ndbrequire((requestPtr.p->m_state & Request::RS_ABORTING) != 0);
- }
- else if (! (data.m_rows_received == data.m_rows_expecting))
- {
- jam();
+ checkBatchComplete(signal, requestPtr, 1);
return;
}
- else
+
+ if (isFirstBatch && data.m_frags_not_started > 0)
{
- if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+ /**
+ * Check if we can expect to be able to fetch the entire result set by
+ * asking for more fragments within the same batch. This may improve
+ * performance for bushy scans, as subsequent bushy branches must be
+ * re-executed for each batch of this scan.
+ */
+
+ /**
+ * Find the maximal correlation value that we may have seen so far.
+ * Correlation value must be unique within batch and smaller than
+ * org->batch_size_rows.
+ */
+ const Uint32 maxCorrVal = (data.m_totalRows) == 0 ? 0 :
+ org->batch_size_rows / data.m_parallelism * (data.m_parallelism - 1)
+ + data.m_totalRows;
+
+ // Number of rows that we can still fetch in this batch.
+ const Int32 remainingRows
+ = static_cast<Int32>(org->batch_size_rows - maxCorrVal);
+
+ if (remainingRows >= data.m_frags_not_started &&
+ /**
+ * Check that (remaning row capacity)/(remaining fragments) is
+ * greater or equal to (rows read so far)/(finished fragments).
+ */
+ remainingRows * static_cast<Int32>(data.m_parallelism) >=
+ static_cast<Int32>(data.m_totalRows * data.m_frags_not_started) &&
+ (org->batch_size_bytes - data.m_totalBytes) * data.m_parallelism >=
+ data.m_totalBytes * data.m_frags_not_started)
{
jam();
- reportBatchComplete(signal, requestPtr, treeNodePtr);
+ Uint32 batchRange = maxCorrVal;
+ DEBUG("::scanIndex_execSCAN_FRAGCONF() first batch was not full."
+ " Asking for new batches from " << data.m_frags_not_started <<
+ " fragments with " <<
+ remainingRows / data.m_frags_not_started
+ <<" rows and " <<
+ (org->batch_size_bytes - data.m_totalBytes)
+ / data.m_frags_not_started
+ << " bytes.");
+ scanIndex_send(signal,
+ requestPtr,
+ treeNodePtr,
+ data.m_frags_not_started,
+ (org->batch_size_bytes - data.m_totalBytes)
+ / data.m_frags_not_started,
+ remainingRows / data.m_frags_not_started,
+ batchRange);
+ return;
}
}
+
+ if (data.m_rows_received != data.m_rows_expecting)
+ {
+ jam();
+ return;
+ }
+
+ if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+ {
+ jam();
+ reportBatchComplete(signal, requestPtr, treeNodePtr);
+ }
checkBatchComplete(signal, requestPtr, 1);
- return;
- }
+ } // if (data.m_frags_outstanding == 0)
}
void
=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2011-09-14 10:30:08 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2011-09-29 11:35:02 +0000
@@ -343,7 +343,8 @@ NdbQueryDef::destroy() const
void
NdbQueryDef::print() const
{
- m_impl.getQueryOperation(0U).printTree(0, Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32>());
+ m_impl.getQueryOperation(0U)
+ .printTree(0, NdbQueryOperationDefImpl::SiblingMask());
}
/*************************************************************************
@@ -1188,7 +1189,8 @@ NdbQueryBuilderImpl::prepare()
if (doPrintQueryTree)
{
ndbout << "Query tree:" << endl;
- def->getQueryOperation(0U).printTree(0, Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32>());
+ def->getQueryOperation(0U)
+ .printTree(0, NdbQueryOperationDefImpl::SiblingMask());
}
return def;
@@ -2159,7 +2161,8 @@ NdbQueryOperationDefImpl::appendChildPro
* that connect the tree nodes.
*/
static void printMargin(Uint32 depth,
- Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> hasMoreSiblingsMask,
+ NdbQueryOperationDefImpl::SiblingMask
+ hasMoreSiblingsMask,
bool header)
{
if (depth > 0)
@@ -2193,11 +2196,10 @@ static void printMargin(Uint32 depth,
void
NdbQueryOperationDefImpl::printTree(Uint32 depth,
- Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32>
- hasMoreSiblingsMask) const
+ SiblingMask hasMoreSiblingsMask) const
{
// Print vertical line leading down to this node.
- Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> firstLineMask = hasMoreSiblingsMask;
+ SiblingMask firstLineMask = hasMoreSiblingsMask;
firstLineMask.set(depth);
printMargin(depth, firstLineMask, false);
ndbout << endl;
@@ -2214,22 +2216,24 @@ NdbQueryOperationDefImpl::printTree(Uint
printMargin(depth, hasMoreSiblingsMask, false);
ndbout << " index: " << getIndex()->getName() << endl;
}
- /* For each child but the last one, use a mask with an extra bit set to
- * indicate that there are more siblings.
- */
- hasMoreSiblingsMask.set(depth+1);
+
for (int childNo = 0;
- childNo < static_cast<int>(getNoOfChildOperations()) - 1;
+ childNo < static_cast<int>(getNoOfChildOperations());
childNo++)
{
- getChildOperation(childNo).printTree(depth+1, hasMoreSiblingsMask);
- }
- if (getNoOfChildOperations() > 0)
- {
- // The last child has no more siblings.
- hasMoreSiblingsMask.clear(depth+1);
- getChildOperation(getNoOfChildOperations() - 1)
- .printTree(depth+1, hasMoreSiblingsMask);
+ if (childNo == 0)
+ {
+ /* For each child but the last one, use a mask with an extra bit set to
+ * indicate that there are more siblings.
+ */
+ hasMoreSiblingsMask.set(depth+1);
+ }
+ if (childNo == static_cast<int>(getNoOfChildOperations()) - 1)
+ {
+ // The last child has no more siblings.
+ hasMoreSiblingsMask.clear(depth+1);
+ }
+ getChildOperation(childNo).printTree(depth+1, hasMoreSiblingsMask);
}
} // NdbQueryOperationDefImpl::printTree()
=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp 2011-09-14 10:30:08 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp 2011-09-29 11:35:02 +0000
@@ -429,6 +429,12 @@ public:
// Get type of query operation
virtual NdbQueryOperationDef::Type getType() const = 0;
+ /**
+ * Used for telling if parent at depth n has more siblings. (In that case
+ * we need to draw a horisontal line leading to that sibling.)
+ */
+ typedef Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> SiblingMask;
+
/** Print query tree graph to trace file (using recursion).
* @param depth Number of ancestor nodes that this node has.
* @param hasMoreSiblingsMask The n'th bit should be set if the n'th ancestor
@@ -436,7 +442,7 @@ public:
*/
void printTree(
Uint32 depth,
- Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> hasMoreSiblingsMask) const;
+ SiblingMask hasMoreSiblingsMask) const;
protected:
// QueryTree building:
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-7.0 branch (jan.wedvik:4559 to 4562) | Jan Wedvik | 2 Oct |