3915 Ole John Aske 2012-05-08
Fix for Bug#14010406 LARGE PUSHED JOIN HIT ASSERT IN SPJ BLOCK
Buffered rows (caused by PARENT refs) was prematurely
released when there was no TN_ACTIVE treeNodes childs.
However, in bushy scans, a scan branch can be 'repeated'
even if its previous execution was not 'ACTIVE'
Thus, the logic for when buffered row should be released should be
less eager to release rows. Now we keep any buffered rows until
we prepare the treeNodes to retrieve new rows for the specific treeNode.
This also made it possible to simplify this logic such that the
recursive method Dbspj::cleanupChildBranch() became obsolete.
As part of debugging / fixing this problem, SPJ-DEBUG trace was
enhanced such that execution of SPJ signals are related
to 'node_no' of treeNode and ident of the request it belongs to.
modified:
mysql-test/suite/ndb/r/ndb_join_pushdown_default.result
mysql-test/suite/ndb/t/ndb_join_pushdown.inc
storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp
storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
storage/ndb/src/ndbapi/NdbQueryBuilder.cpp
3914 Ole John Aske 2012-05-08
Fix for MTR test breaks caused by revno 3913.
Let MTR test ignore possible Error 1193 caused by setting the
'debug' variable if running a non-debug version of the code.
modified:
mysql-test/suite/ndb/t/ndb_join_pushdown.inc
=== modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown_default.result'
--- a/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-05-07 11:07:32 +0000
+++ b/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-05-08 08:03:29 +0000
@@ -5671,6 +5671,44 @@ k1 i name
Warnings:
Warning 4294 Scan filter is too large, discarded
drop table t1;
+create table t(
+pk int primary key auto_increment,
+i int,
+j int,
+k int,
+index(i,j),
+index(i),
+index(j),
+index(k)
+) engine = ndb;
+insert into t(i,j,k) values
+(1,1,1), (1,1,1), (1,1,1),
+(2,2,2), (2,2,2), (2,2,2);
+set global debug='+d,max_4rows_in_spj_batches';
+explain
+select straight_join count(*) from
+t as t1
+join t as t2 on t2.i = t1.i
+join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+join t as t6 on t6.k = t1.k
+where t1.i < 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range i,i_2,j,k i 5 NULL 3 Parent of 6 pushed join@1; Using where with pushed condition
+1 SIMPLE t2 ref i,i_2 i 5 test.t1.i 1 Child of 't1' in pushed join@1; Using where
+1 SIMPLE t3 eq_ref PRIMARY,j,k PRIMARY 4 test.t1.j 1 Child of 't1' in pushed join@1
+1 SIMPLE t4 ref i,i_2,k k 5 test.t3.k 2 Child of 't3' in pushed join@1; Using where
+1 SIMPLE t5 ref i,i_2,j i 10 test.t4.i,test.t3.j 2 Child of 't4' in pushed join@1; Using where
+1 SIMPLE t6 ref k k 5 test.t1.k 2 Child of 't1' in pushed join@1; Using where
+select straight_join count(*) from
+t as t1
+join t as t2 on t2.i = t1.i
+join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+join t as t6 on t6.k = t1.k
+where t1.i < 2;
+count(*)
+243
+set global debug='-d,max_4rows_in_spj_batches';
+drop table t;
create temporary table spj_counts_at_end
select counter_name, sum(val) as val
from ndbinfo.counters
@@ -5691,7 +5729,7 @@ counter_name spj_counts_at_end.val - spj
CONST_PRUNED_RANGE_SCANS_RECEIVED 8
LOCAL_TABLE_SCANS_SENT 254
PRUNED_RANGE_SCANS_RECEIVED 27
-RANGE_SCANS_RECEIVED 736
+RANGE_SCANS_RECEIVED 738
READS_RECEIVED 47
TABLE_SCANS_RECEIVED 254
drop table spj_counts_at_startup;
=== modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.inc'
--- a/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-05-08 06:55:23 +0000
+++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-05-08 08:03:29 +0000
@@ -4133,6 +4133,57 @@ eval $query;
drop table t1;
+############################################################
+# Bug#14010406 LARGE PUSHED JOIN HIT ASSERT IN SPJ BLOCK
+#
+# Buffered rows (caused by PARENT refs) was prematurely
+# released when there was no TN_ACTIVE treeNodes childs.
+# However, in bushy scans, a scan branch can be 'repeated'
+# even if its previous execution was 'complete'.
+#
+# Thus we have to use a less eager release strategy where
+# we don't release any buffered rows until we prepare for
+# a NEXTREQ which will fetch more rows into the treeNode.
+############################################################
+
+create table t(
+ pk int primary key auto_increment,
+ i int,
+ j int,
+ k int,
+ index(i,j),
+ index(i),
+ index(j),
+ index(k)
+) engine = ndb;
+
+insert into t(i,j,k) values
+ (1,1,1), (1,1,1), (1,1,1),
+ (2,2,2), (2,2,2), (2,2,2);
+
+--error 0,1193
+set global debug='+d,max_4rows_in_spj_batches';
+
+explain
+select straight_join count(*) from
+ t as t1
+ join t as t2 on t2.i = t1.i
+ join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+ join t as t6 on t6.k = t1.k
+ where t1.i < 2;
+
+select straight_join count(*) from
+ t as t1
+ join t as t2 on t2.i = t1.i
+ join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+ join t as t6 on t6.k = t1.k
+ where t1.i < 2;
+
+--error 0,1193
+set global debug='-d,max_4rows_in_spj_batches';
+
+drop table t;
+
########################################
# Verify DBSPJ counters for entire test:
# Note: These tables are 'temporary' withing 'connection spj'
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2012-04-25 06:24:54 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2012-05-08 08:03:29 +0000
@@ -1087,8 +1087,6 @@ private:
void releaseRow(Ptr<Request>, RowRef ref);
void registerActiveCursor(Ptr<Request>, Ptr<TreeNode>);
void nodeFail_checkRequests(Signal*);
-
- void cleanupChildBranch(Ptr<Request>, Ptr<TreeNode>);
void cleanup_common(Ptr<Request>, Ptr<TreeNode>);
/**
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-04-25 09:22:21 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-05-08 08:03:29 +0000
@@ -1160,7 +1160,7 @@ Dbspj::batchComplete(Signal* signal, Ptr
{
jam();
/**
- * release unneeded buffers and position cursor for SCAN_NEXTREQ
+ * release unneeded buffers as preparation for later SCAN_NEXTREQ
*/
releaseScanBuffers(requestPtr);
}
@@ -1193,6 +1193,8 @@ Dbspj::prepareNextBatch(Signal* signal,
return;
}
+ DEBUG("prepareNextBatch, request: " << requestPtr.i);
+
if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT)
{
/**
@@ -1427,43 +1429,36 @@ Dbspj::releaseScanBuffers(Ptr<Request> r
{
Ptr<TreeNode> treeNodePtr;
Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
- TreeNodeBitMask ancestors_of_active;
- for (list.last(treeNodePtr); !treeNodePtr.isNull(); list.prev(treeNodePtr))
+ for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
{
/**
- * If there are no active children,
- * then we can cleanup in our sub-branch
+ * Release buffered rows for all treeNodes getting more rows
+ * in the following NEXTREQ, including all its childs.
*/
- if (!ancestors_of_active.get(treeNodePtr.p->m_node_no))
+ if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) ||
+ requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
{
if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
{
jam();
releaseNodeRows(requestPtr, treeNodePtr);
}
-
- /**
- * Cleanup ACTIVE nodes fetching more rows in a NEXTREQ,
- * or nodes being in 'm_active_nodes' as they will 'repeat'.
- * (and then become active)
- */
- if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE ||
- requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no))
- {
- jam();
- cleanupChildBranch(requestPtr, treeNodePtr);
- }
}
/**
- * Collect ancestors of all nodes which are, or will
- * become active in NEXTREQ (possibly repeated)
- */
- if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE ||
- requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no))
+ * Do further cleanup in treeNodes having ancestor getting more rows.
+ * (Which excludes the restarted treeNode itself)
+ */
+ if (requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
{
- ancestors_of_active.bitOR(treeNodePtr.p->m_ancestors);
+ jam();
+ if (treeNodePtr.p->m_info->m_parent_batch_cleanup != 0)
+ {
+ jam();
+ (this->*(treeNodePtr.p->m_info->m_parent_batch_cleanup))(requestPtr,
+ treeNodePtr);
+ }
}
}
/**
@@ -1494,32 +1489,15 @@ Dbspj::registerActiveCursor(Ptr<Request>
}
void
-Dbspj::cleanupChildBranch(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
-{
- LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
- Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
- Dependency_map::ConstDataBufferIterator it;
- for (list.first(it); !it.isNull(); list.next(it))
- {
- jam();
- Ptr<TreeNode> childPtr;
- m_treenode_pool.getPtr(childPtr, *it.data);
- if (childPtr.p->m_info->m_parent_batch_cleanup != 0)
- {
- jam();
- (this->*(childPtr.p->m_info->m_parent_batch_cleanup))(requestPtr,
- childPtr);
- }
- cleanupChildBranch(requestPtr,childPtr);
- }
-}
-
-void
Dbspj::releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
{
/**
* Release all rows associated with tree node
*/
+ DEBUG("releaseNodeRows"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ );
// only when var-alloc, or else stack will be popped wo/ consideration
// to individual rows
@@ -1630,6 +1608,9 @@ Dbspj::releaseRow(Ptr<Request> requestPt
void
Dbspj::releaseRequestBuffers(Ptr<Request> requestPtr, bool reset)
{
+ DEBUG("releaseRequestBuffers"
+ << ", request: " << requestPtr.i
+ );
/**
* Release all pages for request
*/
@@ -1952,13 +1933,18 @@ Dbspj::execLQHKEYREF(Signal* signal)
const LqhKeyRef* ref = reinterpret_cast<const LqhKeyRef*>(signal->getDataPtr());
- DEBUG("execLQHKEYREF, errorCode:" << ref->errorCode);
Ptr<TreeNode> treeNodePtr;
m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr);
Ptr<Request> requestPtr;
m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+ DEBUG("execLQHKEYREF"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ << ", errorCode: " << ref->errorCode
+ );
+
ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF);
(this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal,
requestPtr,
@@ -1970,8 +1956,6 @@ Dbspj::execLQHKEYCONF(Signal* signal)
{
jamEntry();
- DEBUG("execLQHKEYCONF");
-
const LqhKeyConf* conf = reinterpret_cast<const LqhKeyConf*>(signal->getDataPtr());
Ptr<TreeNode> treeNodePtr;
m_treenode_pool.getPtr(treeNodePtr, conf->opPtr);
@@ -1979,6 +1963,11 @@ Dbspj::execLQHKEYCONF(Signal* signal)
Ptr<Request> requestPtr;
m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+ DEBUG("execLQHKEYCONF"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ );
+
ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF);
(this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal,
requestPtr,
@@ -1991,8 +1980,6 @@ Dbspj::execSCAN_FRAGREF(Signal* signal)
jamEntry();
const ScanFragRef* ref = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
- DEBUG("execSCAN_FRAGREF, errorCode:" << ref->errorCode);
-
Ptr<ScanFragHandle> scanFragHandlePtr;
m_scanfraghandle_pool.getPtr(scanFragHandlePtr, ref->senderData);
Ptr<TreeNode> treeNodePtr;
@@ -2000,6 +1987,12 @@ Dbspj::execSCAN_FRAGREF(Signal* signal)
Ptr<Request> requestPtr;
m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+ DEBUG("execSCAN_FRAGCONF"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ << ", errorCode: " << ref->errorCode
+ );
+
ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF);
(this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal,
requestPtr,
@@ -2021,6 +2014,10 @@ Dbspj::execSCAN_HBREP(Signal* signal)
m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
Ptr<Request> requestPtr;
m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+ DEBUG("execSCAN_HBREP"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ );
Uint32 ref = requestPtr.p->m_senderRef;
signal->theData[0] = requestPtr.p->m_senderData;
@@ -2031,7 +2028,6 @@ void
Dbspj::execSCAN_FRAGCONF(Signal* signal)
{
jamEntry();
- DEBUG("execSCAN_FRAGCONF");
const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
@@ -2048,6 +2044,10 @@ Dbspj::execSCAN_FRAGCONF(Signal* signal)
m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
Ptr<Request> requestPtr;
m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+ DEBUG("execSCAN_FRAGCONF"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ );
ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF);
(this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal,
@@ -2062,8 +2062,8 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
jamEntry();
const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0];
- DEBUG("Incomming SCAN_NEXTREQ");
#ifdef DEBUG_SCAN_FRAGREQ
+ DEBUG("Incomming SCAN_NEXTREQ";
printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
ScanFragNextReq::SignalLength, DBLQH);
#endif
@@ -2080,6 +2080,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
ndbrequire(req->requestInfo == ScanFragNextReq::ZCLOSE);
return;
}
+ DEBUG("execSCAN_NEXTREQ, request: " << requestPtr.i);
#ifdef SPJ_TRACE_TIME
Uint64 now = spj_now();
@@ -2132,7 +2133,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
{
jam();
- DEBUG("SCAN_NEXTREQ on TreeNode: " << treeNodePtr.i
+ DEBUG("SCAN_NEXTREQ on TreeNode: "
<< ", m_node_no: " << treeNodePtr.p->m_node_no
<< ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
@@ -2151,7 +2152,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
*/
jam();
ndbrequire(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT);
- DEBUG(" Restart TreeNode: " << treeNodePtr.i
+ DEBUG("Restart TreeNode "
<< ", m_node_no: " << treeNodePtr.p->m_node_no
<< ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
@@ -2172,7 +2173,6 @@ void
Dbspj::execTRANSID_AI(Signal* signal)
{
jamEntry();
- DEBUG("execTRANSID_AI");
TransIdAI * req = (TransIdAI *)signal->getDataPtr();
Uint32 ptrI = req->connectPtr;
//Uint32 transId[2] = { req->transId[0], req->transId[1] };
@@ -2182,6 +2182,11 @@ Dbspj::execTRANSID_AI(Signal* signal)
Ptr<Request> requestPtr;
m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+ DEBUG("execTRANSID_AI"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ );
+
ndbrequire(signal->getNoOfSections() != 0);
SegmentedSectionPtr dataPtr;
@@ -2249,6 +2254,11 @@ Dbspj::storeRow(Ptr<Request> requestPtr,
Uint32 * headptr = (Uint32*)row.m_row_data.m_section.m_header;
Uint32 headlen = 1 + row.m_row_data.m_section.m_header->m_len;
+ DEBUG("storeRow"
+ << ", node: " << treeNodePtr.p->m_node_no
+ << ", request: " << requestPtr.i
+ );
+
/**
* If rows are not in map, then they are kept in linked list
*/
@@ -3478,7 +3488,8 @@ Dbspj::lookup_parent_row(Signal* signal,
const Uint32 tableId = LqhKeyReq::getTableId(src->tableSchemaVersion);
const Uint32 corrVal = rowRef.m_src_correlation;
- DEBUG("::lookup_parent_row");
+ DEBUG("::lookup_parent_row"
+ << ", node: " << treeNodePtr.p->m_node_no);
do
{
@@ -5117,6 +5128,8 @@ Dbspj::scanIndex_parent_row(Signal* sign
const RowPtr & rowRef)
{
jam();
+ DEBUG("::scanIndex_parent_row"
+ << ", node: " << treeNodePtr.p->m_node_no);
Uint32 err;
ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
@@ -6885,6 +6898,8 @@ Dbspj::appendFromParent(Uint32 & dst, Lo
m_treenode_pool.getPtr(treeNodePtr, rowptr.m_src_node_ptrI);
Uint32 corrVal = rowptr.m_src_correlation;
RowPtr targetRow;
+ DEBUG("appendFromParent-of"
+ << " node: " << treeNodePtr.p->m_node_no);
while (levels--)
{
jam();
@@ -6894,6 +6909,8 @@ Dbspj::appendFromParent(Uint32 & dst, Lo
return DbspjErr::InvalidPattern;
}
m_treenode_pool.getPtr(treeNodePtr, treeNodePtr.p->m_parentPtrI);
+ DEBUG("appendFromParent"
+ << ", node: " << treeNodePtr.p->m_node_no);
if (unlikely((treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP) == 0))
{
DEBUG_CRASH();
=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-04-11 10:34:58 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-05-08 08:03:29 +0000
@@ -2181,7 +2181,8 @@ NdbQueryOperationDefImpl::printTree(Uint
ndbout << NdbQueryOperationDef::getTypeName(getType()) << endl;
printMargin(depth, hasMoreSiblingsMask, false);
// Print attributes.
- ndbout << " opNo: " << getOpNo() << endl;
+ ndbout << " opNo: " << getOpNo()
+ << " (internal: " << getInternalOpNo() << ")" << endl;
printMargin(depth, hasMoreSiblingsMask, false);
ndbout << " table: " << getTable().getName() << endl;
if (getIndex() != NULL)
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.5-cluster-7.2 branch (ole.john.aske:3914 to 3915)Bug#14010406 | Ole John Aske | 9 May |