From: Ole John Aske Date: May 8 2012 8:03am Subject: bzr push into mysql-5.5-cluster-7.2 branch (ole.john.aske:3914 to 3915) Bug#14010406 List-Archive: http://lists.mysql.com/commits/143772 X-Bug: 14010406 Message-Id: <20120508080359.B0AD6251@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3915 Ole John Aske 2012-05-08 Fix for Bug#14010406 LARGE PUSHED JOIN HIT ASSERT IN SPJ BLOCK Buffered rows (caused by PARENT refs) was prematurely released when there was no TN_ACTIVE treeNodes childs. However, in bushy scans, a scan branch can be 'repeated' even if its previous execution was not 'ACTIVE' Thus, the logic for when buffered row should be released should be less eager to release rows. Now we keep any buffered rows until we prepare the treeNodes to retrieve new rows for the specific treeNode. This also made it possible to simplify this logic such that the recursive method Dbspj::cleanupChildBranch() became obsolete. As part of debugging / fixing this problem, SPJ-DEBUG trace was enhanced such that execution of SPJ signals are related to 'node_no' of treeNode and ident of the request it belongs to. modified: mysql-test/suite/ndb/r/ndb_join_pushdown_default.result mysql-test/suite/ndb/t/ndb_join_pushdown.inc storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 3914 Ole John Aske 2012-05-08 Fix for MTR test breaks caused by revno 3913. Let MTR test ignore possible Error 1193 caused by setting the 'debug' variable if running a non-debug version of the code. modified: mysql-test/suite/ndb/t/ndb_join_pushdown.inc === modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown_default.result' --- a/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-05-07 11:07:32 +0000 +++ b/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-05-08 08:03:29 +0000 @@ -5671,6 +5671,44 @@ k1 i name Warnings: Warning 4294 Scan filter is too large, discarded drop table t1; +create table t( +pk int primary key auto_increment, +i int, +j int, +k int, +index(i,j), +index(i), +index(j), +index(k) +) engine = ndb; +insert into t(i,j,k) values +(1,1,1), (1,1,1), (1,1,1), +(2,2,2), (2,2,2), (2,2,2); +set global debug='+d,max_4rows_in_spj_batches'; +explain +select straight_join count(*) from +t as t1 +join t as t2 on t2.i = t1.i +join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j +join t as t6 on t6.k = t1.k +where t1.i < 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range i,i_2,j,k i 5 NULL 3 Parent of 6 pushed join@1; Using where with pushed condition +1 SIMPLE t2 ref i,i_2 i 5 test.t1.i 1 Child of 't1' in pushed join@1; Using where +1 SIMPLE t3 eq_ref PRIMARY,j,k PRIMARY 4 test.t1.j 1 Child of 't1' in pushed join@1 +1 SIMPLE t4 ref i,i_2,k k 5 test.t3.k 2 Child of 't3' in pushed join@1; Using where +1 SIMPLE t5 ref i,i_2,j i 10 test.t4.i,test.t3.j 2 Child of 't4' in pushed join@1; Using where +1 SIMPLE t6 ref k k 5 test.t1.k 2 Child of 't1' in pushed join@1; Using where +select straight_join count(*) from +t as t1 +join t as t2 on t2.i = t1.i +join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j +join t as t6 on t6.k = t1.k +where t1.i < 2; +count(*) +243 +set global debug='-d,max_4rows_in_spj_batches'; +drop table t; create temporary table spj_counts_at_end select counter_name, sum(val) as val from ndbinfo.counters @@ -5691,7 +5729,7 @@ counter_name spj_counts_at_end.val - spj CONST_PRUNED_RANGE_SCANS_RECEIVED 8 LOCAL_TABLE_SCANS_SENT 254 PRUNED_RANGE_SCANS_RECEIVED 27 -RANGE_SCANS_RECEIVED 736 +RANGE_SCANS_RECEIVED 738 READS_RECEIVED 47 TABLE_SCANS_RECEIVED 254 drop table spj_counts_at_startup; === modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.inc' --- a/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-05-08 06:55:23 +0000 +++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-05-08 08:03:29 +0000 @@ -4133,6 +4133,57 @@ eval $query; drop table t1; +############################################################ +# Bug#14010406 LARGE PUSHED JOIN HIT ASSERT IN SPJ BLOCK +# +# Buffered rows (caused by PARENT refs) was prematurely +# released when there was no TN_ACTIVE treeNodes childs. +# However, in bushy scans, a scan branch can be 'repeated' +# even if its previous execution was 'complete'. +# +# Thus we have to use a less eager release strategy where +# we don't release any buffered rows until we prepare for +# a NEXTREQ which will fetch more rows into the treeNode. +############################################################ + +create table t( + pk int primary key auto_increment, + i int, + j int, + k int, + index(i,j), + index(i), + index(j), + index(k) +) engine = ndb; + +insert into t(i,j,k) values + (1,1,1), (1,1,1), (1,1,1), + (2,2,2), (2,2,2), (2,2,2); + +--error 0,1193 +set global debug='+d,max_4rows_in_spj_batches'; + +explain +select straight_join count(*) from + t as t1 + join t as t2 on t2.i = t1.i + join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j + join t as t6 on t6.k = t1.k + where t1.i < 2; + +select straight_join count(*) from + t as t1 + join t as t2 on t2.i = t1.i + join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j + join t as t6 on t6.k = t1.k + where t1.i < 2; + +--error 0,1193 +set global debug='-d,max_4rows_in_spj_batches'; + +drop table t; + ######################################## # Verify DBSPJ counters for entire test: # Note: These tables are 'temporary' withing 'connection spj' === modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp' --- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2012-04-25 06:24:54 +0000 +++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2012-05-08 08:03:29 +0000 @@ -1087,8 +1087,6 @@ private: void releaseRow(Ptr, RowRef ref); void registerActiveCursor(Ptr, Ptr); void nodeFail_checkRequests(Signal*); - - void cleanupChildBranch(Ptr, Ptr); void cleanup_common(Ptr, Ptr); /** === modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp' --- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-04-25 09:22:21 +0000 +++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-05-08 08:03:29 +0000 @@ -1160,7 +1160,7 @@ Dbspj::batchComplete(Signal* signal, Ptr { jam(); /** - * release unneeded buffers and position cursor for SCAN_NEXTREQ + * release unneeded buffers as preparation for later SCAN_NEXTREQ */ releaseScanBuffers(requestPtr); } @@ -1193,6 +1193,8 @@ Dbspj::prepareNextBatch(Signal* signal, return; } + DEBUG("prepareNextBatch, request: " << requestPtr.i); + if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT) { /** @@ -1427,43 +1429,36 @@ Dbspj::releaseScanBuffers(Ptr r { Ptr treeNodePtr; Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes); - TreeNodeBitMask ancestors_of_active; - for (list.last(treeNodePtr); !treeNodePtr.isNull(); list.prev(treeNodePtr)) + for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr)) { /** - * If there are no active children, - * then we can cleanup in our sub-branch + * Release buffered rows for all treeNodes getting more rows + * in the following NEXTREQ, including all its childs. */ - if (!ancestors_of_active.get(treeNodePtr.p->m_node_no)) + if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) || + requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors)) { if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER) { jam(); releaseNodeRows(requestPtr, treeNodePtr); } - - /** - * Cleanup ACTIVE nodes fetching more rows in a NEXTREQ, - * or nodes being in 'm_active_nodes' as they will 'repeat'. - * (and then become active) - */ - if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE || - requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no)) - { - jam(); - cleanupChildBranch(requestPtr, treeNodePtr); - } } /** - * Collect ancestors of all nodes which are, or will - * become active in NEXTREQ (possibly repeated) - */ - if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE || - requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no)) + * Do further cleanup in treeNodes having ancestor getting more rows. + * (Which excludes the restarted treeNode itself) + */ + if (requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors)) { - ancestors_of_active.bitOR(treeNodePtr.p->m_ancestors); + jam(); + if (treeNodePtr.p->m_info->m_parent_batch_cleanup != 0) + { + jam(); + (this->*(treeNodePtr.p->m_info->m_parent_batch_cleanup))(requestPtr, + treeNodePtr); + } } } /** @@ -1494,32 +1489,15 @@ Dbspj::registerActiveCursor(Ptr } void -Dbspj::cleanupChildBranch(Ptr requestPtr, Ptr treeNodePtr) -{ - LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool); - Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes); - Dependency_map::ConstDataBufferIterator it; - for (list.first(it); !it.isNull(); list.next(it)) - { - jam(); - Ptr childPtr; - m_treenode_pool.getPtr(childPtr, *it.data); - if (childPtr.p->m_info->m_parent_batch_cleanup != 0) - { - jam(); - (this->*(childPtr.p->m_info->m_parent_batch_cleanup))(requestPtr, - childPtr); - } - cleanupChildBranch(requestPtr,childPtr); - } -} - -void Dbspj::releaseNodeRows(Ptr requestPtr, Ptr treeNodePtr) { /** * Release all rows associated with tree node */ + DEBUG("releaseNodeRows" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + ); // only when var-alloc, or else stack will be popped wo/ consideration // to individual rows @@ -1630,6 +1608,9 @@ Dbspj::releaseRow(Ptr requestPt void Dbspj::releaseRequestBuffers(Ptr requestPtr, bool reset) { + DEBUG("releaseRequestBuffers" + << ", request: " << requestPtr.i + ); /** * Release all pages for request */ @@ -1952,13 +1933,18 @@ Dbspj::execLQHKEYREF(Signal* signal) const LqhKeyRef* ref = reinterpret_cast(signal->getDataPtr()); - DEBUG("execLQHKEYREF, errorCode:" << ref->errorCode); Ptr treeNodePtr; m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr); Ptr requestPtr; m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI); + DEBUG("execLQHKEYREF" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + << ", errorCode: " << ref->errorCode + ); + ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF); (this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal, requestPtr, @@ -1970,8 +1956,6 @@ Dbspj::execLQHKEYCONF(Signal* signal) { jamEntry(); - DEBUG("execLQHKEYCONF"); - const LqhKeyConf* conf = reinterpret_cast(signal->getDataPtr()); Ptr treeNodePtr; m_treenode_pool.getPtr(treeNodePtr, conf->opPtr); @@ -1979,6 +1963,11 @@ Dbspj::execLQHKEYCONF(Signal* signal) Ptr requestPtr; m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI); + DEBUG("execLQHKEYCONF" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + ); + ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF); (this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal, requestPtr, @@ -1991,8 +1980,6 @@ Dbspj::execSCAN_FRAGREF(Signal* signal) jamEntry(); const ScanFragRef* ref = reinterpret_cast(signal->getDataPtr()); - DEBUG("execSCAN_FRAGREF, errorCode:" << ref->errorCode); - Ptr scanFragHandlePtr; m_scanfraghandle_pool.getPtr(scanFragHandlePtr, ref->senderData); Ptr treeNodePtr; @@ -2000,6 +1987,12 @@ Dbspj::execSCAN_FRAGREF(Signal* signal) Ptr requestPtr; m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI); + DEBUG("execSCAN_FRAGCONF" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + << ", errorCode: " << ref->errorCode + ); + ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF); (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal, requestPtr, @@ -2021,6 +2014,10 @@ Dbspj::execSCAN_HBREP(Signal* signal) m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI); Ptr requestPtr; m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI); + DEBUG("execSCAN_HBREP" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + ); Uint32 ref = requestPtr.p->m_senderRef; signal->theData[0] = requestPtr.p->m_senderData; @@ -2031,7 +2028,6 @@ void Dbspj::execSCAN_FRAGCONF(Signal* signal) { jamEntry(); - DEBUG("execSCAN_FRAGCONF"); const ScanFragConf* conf = reinterpret_cast(signal->getDataPtr()); @@ -2048,6 +2044,10 @@ Dbspj::execSCAN_FRAGCONF(Signal* signal) m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI); Ptr requestPtr; m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI); + DEBUG("execSCAN_FRAGCONF" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + ); ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF); (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal, @@ -2062,8 +2062,8 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal) jamEntry(); const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0]; - DEBUG("Incomming SCAN_NEXTREQ"); #ifdef DEBUG_SCAN_FRAGREQ + DEBUG("Incomming SCAN_NEXTREQ"; printSCANFRAGNEXTREQ(stdout, &signal->theData[0], ScanFragNextReq::SignalLength, DBLQH); #endif @@ -2080,6 +2080,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal) ndbrequire(req->requestInfo == ScanFragNextReq::ZCLOSE); return; } + DEBUG("execSCAN_NEXTREQ, request: " << requestPtr.i); #ifdef SPJ_TRACE_TIME Uint64 now = spj_now(); @@ -2132,7 +2133,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal) if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE) { jam(); - DEBUG("SCAN_NEXTREQ on TreeNode: " << treeNodePtr.i + DEBUG("SCAN_NEXTREQ on TreeNode: " << ", m_node_no: " << treeNodePtr.p->m_node_no << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI); @@ -2151,7 +2152,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal) */ jam(); ndbrequire(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT); - DEBUG(" Restart TreeNode: " << treeNodePtr.i + DEBUG("Restart TreeNode " << ", m_node_no: " << treeNodePtr.p->m_node_no << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI); @@ -2172,7 +2173,6 @@ void Dbspj::execTRANSID_AI(Signal* signal) { jamEntry(); - DEBUG("execTRANSID_AI"); TransIdAI * req = (TransIdAI *)signal->getDataPtr(); Uint32 ptrI = req->connectPtr; //Uint32 transId[2] = { req->transId[0], req->transId[1] }; @@ -2182,6 +2182,11 @@ Dbspj::execTRANSID_AI(Signal* signal) Ptr requestPtr; m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI); + DEBUG("execTRANSID_AI" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + ); + ndbrequire(signal->getNoOfSections() != 0); SegmentedSectionPtr dataPtr; @@ -2249,6 +2254,11 @@ Dbspj::storeRow(Ptr requestPtr, Uint32 * headptr = (Uint32*)row.m_row_data.m_section.m_header; Uint32 headlen = 1 + row.m_row_data.m_section.m_header->m_len; + DEBUG("storeRow" + << ", node: " << treeNodePtr.p->m_node_no + << ", request: " << requestPtr.i + ); + /** * If rows are not in map, then they are kept in linked list */ @@ -3478,7 +3488,8 @@ Dbspj::lookup_parent_row(Signal* signal, const Uint32 tableId = LqhKeyReq::getTableId(src->tableSchemaVersion); const Uint32 corrVal = rowRef.m_src_correlation; - DEBUG("::lookup_parent_row"); + DEBUG("::lookup_parent_row" + << ", node: " << treeNodePtr.p->m_node_no); do { @@ -5117,6 +5128,8 @@ Dbspj::scanIndex_parent_row(Signal* sign const RowPtr & rowRef) { jam(); + DEBUG("::scanIndex_parent_row" + << ", node: " << treeNodePtr.p->m_node_no); Uint32 err; ScanIndexData& data = treeNodePtr.p->m_scanindex_data; @@ -6885,6 +6898,8 @@ Dbspj::appendFromParent(Uint32 & dst, Lo m_treenode_pool.getPtr(treeNodePtr, rowptr.m_src_node_ptrI); Uint32 corrVal = rowptr.m_src_correlation; RowPtr targetRow; + DEBUG("appendFromParent-of" + << " node: " << treeNodePtr.p->m_node_no); while (levels--) { jam(); @@ -6894,6 +6909,8 @@ Dbspj::appendFromParent(Uint32 & dst, Lo return DbspjErr::InvalidPattern; } m_treenode_pool.getPtr(treeNodePtr, treeNodePtr.p->m_parentPtrI); + DEBUG("appendFromParent" + << ", node: " << treeNodePtr.p->m_node_no); if (unlikely((treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP) == 0)) { DEBUG_CRASH(); === modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp' --- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-04-11 10:34:58 +0000 +++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-05-08 08:03:29 +0000 @@ -2181,7 +2181,8 @@ NdbQueryOperationDefImpl::printTree(Uint ndbout << NdbQueryOperationDef::getTypeName(getType()) << endl; printMargin(depth, hasMoreSiblingsMask, false); // Print attributes. - ndbout << " opNo: " << getOpNo() << endl; + ndbout << " opNo: " << getOpNo() + << " (internal: " << getInternalOpNo() << ")" << endl; printMargin(depth, hasMoreSiblingsMask, false); ndbout << " table: " << getTable().getName() << endl; if (getIndex() != NULL) No bundle (reason: useless for push emails).