List:Commits« Previous MessageNext Message »
From:Ole John Aske Date:May 8 2012 8:03am
Subject:bzr push into mysql-5.5-cluster-7.2 branch (ole.john.aske:3914 to 3915)
Bug#14010406
View as plain text  
 3915 Ole John Aske	2012-05-08
      Fix for Bug#14010406 LARGE PUSHED JOIN HIT ASSERT IN SPJ BLOCK
      
      Buffered rows (caused by PARENT refs) was prematurely
      released when there was no TN_ACTIVE treeNodes childs.
      
      However, in bushy scans, a scan branch can be 'repeated'
      even if its previous execution was not 'ACTIVE'
      
      Thus, the logic for when buffered row should be released should be
      less eager to release rows. Now we keep any buffered rows until
      we prepare the treeNodes to retrieve new rows for the specific treeNode.
      
      This also made it possible to simplify this logic such that the
      recursive method Dbspj::cleanupChildBranch() became obsolete.
      
      As part of debugging / fixing this problem, SPJ-DEBUG trace was
      enhanced such that execution of SPJ signals are related
      to 'node_no' of treeNode and ident of the request it belongs to.

    modified:
      mysql-test/suite/ndb/r/ndb_join_pushdown_default.result
      mysql-test/suite/ndb/t/ndb_join_pushdown.inc
      storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp
      storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
      storage/ndb/src/ndbapi/NdbQueryBuilder.cpp
 3914 Ole John Aske	2012-05-08
      Fix for MTR test breaks caused by revno 3913.
      
      Let MTR test ignore possible Error 1193 caused by setting the
      'debug' variable if running a non-debug version of the code.
       

    modified:
      mysql-test/suite/ndb/t/ndb_join_pushdown.inc
=== modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown_default.result'
--- a/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result	2012-05-07 11:07:32 +0000
+++ b/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result	2012-05-08 08:03:29 +0000
@@ -5671,6 +5671,44 @@ k1	i	name
 Warnings:
 Warning	4294	Scan filter is too large, discarded
 drop table t1;
+create table t(
+pk int primary key auto_increment,
+i int, 
+j int,
+k int,
+index(i,j),
+index(i),
+index(j),
+index(k)
+) engine = ndb;
+insert into t(i,j,k) values
+(1,1,1), (1,1,1), (1,1,1),
+(2,2,2), (2,2,2), (2,2,2);
+set global debug='+d,max_4rows_in_spj_batches';
+explain
+select straight_join count(*) from 
+t as t1
+join t as t2 on t2.i = t1.i
+join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+join t as t6 on t6.k = t1.k
+where t1.i < 2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	i,i_2,j,k	i	5	NULL	3	Parent of 6 pushed join@1; Using where with pushed condition
+1	SIMPLE	t2	ref	i,i_2	i	5	test.t1.i	1	Child of 't1' in pushed join@1; Using where
+1	SIMPLE	t3	eq_ref	PRIMARY,j,k	PRIMARY	4	test.t1.j	1	Child of 't1' in pushed join@1
+1	SIMPLE	t4	ref	i,i_2,k	k	5	test.t3.k	2	Child of 't3' in pushed join@1; Using where
+1	SIMPLE	t5	ref	i,i_2,j	i	10	test.t4.i,test.t3.j	2	Child of 't4' in pushed join@1; Using where
+1	SIMPLE	t6	ref	k	k	5	test.t1.k	2	Child of 't1' in pushed join@1; Using where
+select straight_join count(*) from 
+t as t1
+join t as t2 on t2.i = t1.i
+join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+join t as t6 on t6.k = t1.k
+where t1.i < 2;
+count(*)
+243
+set global debug='-d,max_4rows_in_spj_batches';
+drop table t;
 create temporary table spj_counts_at_end
 select counter_name, sum(val) as val 
 from ndbinfo.counters 
@@ -5691,7 +5729,7 @@ counter_name	spj_counts_at_end.val - spj
 CONST_PRUNED_RANGE_SCANS_RECEIVED	8
 LOCAL_TABLE_SCANS_SENT	254
 PRUNED_RANGE_SCANS_RECEIVED	27
-RANGE_SCANS_RECEIVED	736
+RANGE_SCANS_RECEIVED	738
 READS_RECEIVED	47
 TABLE_SCANS_RECEIVED	254
 drop table spj_counts_at_startup;

=== modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.inc'
--- a/mysql-test/suite/ndb/t/ndb_join_pushdown.inc	2012-05-08 06:55:23 +0000
+++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.inc	2012-05-08 08:03:29 +0000
@@ -4133,6 +4133,57 @@ eval $query;
 
 drop table t1;
 
+############################################################
+# Bug#14010406 LARGE PUSHED JOIN HIT ASSERT IN SPJ BLOCK
+#
+# Buffered rows (caused by PARENT refs) was prematurely
+# released when there was no TN_ACTIVE treeNodes childs.
+# However, in bushy scans, a scan branch can be 'repeated'
+# even if its previous execution was 'complete'. 
+#
+# Thus we have to use a less eager release strategy where
+# we don't release any buffered rows until we prepare for
+# a NEXTREQ which will fetch more rows into the treeNode.
+############################################################
+
+create table t(
+  pk int primary key auto_increment,
+  i int, 
+  j int,
+  k int,
+  index(i,j),
+  index(i),
+  index(j),
+  index(k)
+) engine = ndb;
+
+insert into t(i,j,k) values
+   (1,1,1), (1,1,1), (1,1,1),
+   (2,2,2), (2,2,2), (2,2,2);
+
+--error 0,1193
+set global debug='+d,max_4rows_in_spj_batches';
+
+explain
+select straight_join count(*) from 
+  t as t1
+  join t as t2 on t2.i = t1.i
+  join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+  join t as t6 on t6.k = t1.k
+  where t1.i < 2;
+
+select straight_join count(*) from 
+  t as t1
+  join t as t2 on t2.i = t1.i
+  join (t as t3 join t as t4 on t4.k=t3.k join t as t5 on t5.i=t4.i and t5.j=t3.j) on t3.pk=t1.j
+  join t as t6 on t6.k = t1.k
+  where t1.i < 2;
+
+--error 0,1193
+set global debug='-d,max_4rows_in_spj_batches';
+
+drop table t;
+
 ########################################
 # Verify DBSPJ counters for entire test:
 # Note: These tables are 'temporary' withing 'connection spj'

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp	2012-04-25 06:24:54 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp	2012-05-08 08:03:29 +0000
@@ -1087,8 +1087,6 @@ private:
   void releaseRow(Ptr<Request>, RowRef ref);
   void registerActiveCursor(Ptr<Request>, Ptr<TreeNode>);
   void nodeFail_checkRequests(Signal*);
-
-  void cleanupChildBranch(Ptr<Request>, Ptr<TreeNode>);
   void cleanup_common(Ptr<Request>, Ptr<TreeNode>);
 
   /**

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2012-04-25 09:22:21 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2012-05-08 08:03:29 +0000
@@ -1160,7 +1160,7 @@ Dbspj::batchComplete(Signal* signal, Ptr
   {
     jam();
     /**
-     * release unneeded buffers and position cursor for SCAN_NEXTREQ
+     * release unneeded buffers as preparation for later SCAN_NEXTREQ
      */
     releaseScanBuffers(requestPtr);
   }
@@ -1193,6 +1193,8 @@ Dbspj::prepareNextBatch(Signal* signal, 
     return;
   }
 
+  DEBUG("prepareNextBatch, request: " << requestPtr.i);
+
   if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT)
   {
     /**
@@ -1427,43 +1429,36 @@ Dbspj::releaseScanBuffers(Ptr<Request> r
 {
   Ptr<TreeNode> treeNodePtr;
   Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
-  TreeNodeBitMask ancestors_of_active;
 
-  for (list.last(treeNodePtr); !treeNodePtr.isNull(); list.prev(treeNodePtr))
+  for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
   {
     /**
-     * If there are no active children,
-     *   then we can cleanup in our sub-branch
+     * Release buffered rows for all treeNodes getting more rows
+     * in the following NEXTREQ, including all its childs.
      */
-    if (!ancestors_of_active.get(treeNodePtr.p->m_node_no))
+    if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) ||
+        requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
     {
       if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
       {
         jam();
         releaseNodeRows(requestPtr, treeNodePtr);
       }
-      
-      /**
-       * Cleanup ACTIVE nodes fetching more rows in a NEXTREQ,
-       * or nodes being in 'm_active_nodes' as they will 'repeat'.
-       * (and then become active)
-       */
-      if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE ||
-          requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no))
-      {
-        jam();
-        cleanupChildBranch(requestPtr, treeNodePtr);
-      }
     }
 
     /**
-      * Collect ancestors of all nodes which are, or will
-      * become active in NEXTREQ (possibly repeated)
-      */
-    if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE ||
-        requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no))
+     * Do further cleanup in treeNodes having ancestor getting more rows.
+     * (Which excludes the restarted treeNode itself)
+     */
+    if (requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
     {
-      ancestors_of_active.bitOR(treeNodePtr.p->m_ancestors);
+      jam();
+      if (treeNodePtr.p->m_info->m_parent_batch_cleanup != 0)
+      {
+        jam();
+        (this->*(treeNodePtr.p->m_info->m_parent_batch_cleanup))(requestPtr,
+                                                                 treeNodePtr);
+      }
     }
   }
   /**
@@ -1494,32 +1489,15 @@ Dbspj::registerActiveCursor(Ptr<Request>
 }
 
 void
-Dbspj::cleanupChildBranch(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
-{
-  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-  Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
-  Dependency_map::ConstDataBufferIterator it;
-  for (list.first(it); !it.isNull(); list.next(it))
-  {
-    jam();
-    Ptr<TreeNode> childPtr;
-    m_treenode_pool.getPtr(childPtr, *it.data);
-    if (childPtr.p->m_info->m_parent_batch_cleanup != 0)
-    {
-      jam();
-      (this->*(childPtr.p->m_info->m_parent_batch_cleanup))(requestPtr,
-                                                            childPtr);
-    }
-    cleanupChildBranch(requestPtr,childPtr);
-  }
-}
-
-void
 Dbspj::releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
 {
   /**
    * Release all rows associated with tree node
    */
+  DEBUG("releaseNodeRows"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+  );
 
   // only when var-alloc, or else stack will be popped wo/ consideration
   // to individual rows
@@ -1630,6 +1608,9 @@ Dbspj::releaseRow(Ptr<Request> requestPt
 void
 Dbspj::releaseRequestBuffers(Ptr<Request> requestPtr, bool reset)
 {
+  DEBUG("releaseRequestBuffers"
+     << ", request: " << requestPtr.i
+  );
   /**
    * Release all pages for request
    */
@@ -1952,13 +1933,18 @@ Dbspj::execLQHKEYREF(Signal* signal)
 
   const LqhKeyRef* ref = reinterpret_cast<const LqhKeyRef*>(signal->getDataPtr());
 
-  DEBUG("execLQHKEYREF, errorCode:" << ref->errorCode);
   Ptr<TreeNode> treeNodePtr;
   m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr);
 
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
 
+  DEBUG("execLQHKEYREF"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+     << ", errorCode: " << ref->errorCode
+  );
+
   ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF);
   (this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal,
                                                     requestPtr,
@@ -1970,8 +1956,6 @@ Dbspj::execLQHKEYCONF(Signal* signal)
 {
   jamEntry();
 
-  DEBUG("execLQHKEYCONF");
-
   const LqhKeyConf* conf = reinterpret_cast<const LqhKeyConf*>(signal->getDataPtr());
   Ptr<TreeNode> treeNodePtr;
   m_treenode_pool.getPtr(treeNodePtr, conf->opPtr);
@@ -1979,6 +1963,11 @@ Dbspj::execLQHKEYCONF(Signal* signal)
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
 
+  DEBUG("execLQHKEYCONF"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+  );
+
   ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF);
   (this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal,
                                                      requestPtr,
@@ -1991,8 +1980,6 @@ Dbspj::execSCAN_FRAGREF(Signal* signal)
   jamEntry();
   const ScanFragRef* ref = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
 
-  DEBUG("execSCAN_FRAGREF, errorCode:" << ref->errorCode);
-
   Ptr<ScanFragHandle> scanFragHandlePtr;
   m_scanfraghandle_pool.getPtr(scanFragHandlePtr, ref->senderData);
   Ptr<TreeNode> treeNodePtr;
@@ -2000,6 +1987,12 @@ Dbspj::execSCAN_FRAGREF(Signal* signal)
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
 
+  DEBUG("execSCAN_FRAGCONF"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+     << ", errorCode: " << ref->errorCode
+  );
+
   ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF);
   (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal,
                                                        requestPtr,
@@ -2021,6 +2014,10 @@ Dbspj::execSCAN_HBREP(Signal* signal)
   m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+  DEBUG("execSCAN_HBREP"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+  );
 
   Uint32 ref = requestPtr.p->m_senderRef;
   signal->theData[0] = requestPtr.p->m_senderData;
@@ -2031,7 +2028,6 @@ void
 Dbspj::execSCAN_FRAGCONF(Signal* signal)
 {
   jamEntry();
-  DEBUG("execSCAN_FRAGCONF");
 
   const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
 
@@ -2048,6 +2044,10 @@ Dbspj::execSCAN_FRAGCONF(Signal* signal)
   m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+  DEBUG("execSCAN_FRAGCONF"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+  );
 
   ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF);
   (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal,
@@ -2062,8 +2062,8 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
   jamEntry();
   const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0];
 
-  DEBUG("Incomming SCAN_NEXTREQ");
 #ifdef DEBUG_SCAN_FRAGREQ
+  DEBUG("Incomming SCAN_NEXTREQ";
   printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
                        ScanFragNextReq::SignalLength, DBLQH);
 #endif
@@ -2080,6 +2080,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
     ndbrequire(req->requestInfo == ScanFragNextReq::ZCLOSE);
     return;
   }
+  DEBUG("execSCAN_NEXTREQ, request: " << requestPtr.i);
 
 #ifdef SPJ_TRACE_TIME
   Uint64 now = spj_now();
@@ -2132,7 +2133,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
       if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
       {
         jam();
-        DEBUG("SCAN_NEXTREQ on TreeNode: " << treeNodePtr.i
+        DEBUG("SCAN_NEXTREQ on TreeNode: "
            << ",  m_node_no: " << treeNodePtr.p->m_node_no
            << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
 
@@ -2151,7 +2152,7 @@ Dbspj::execSCAN_NEXTREQ(Signal* signal)
          */
         jam();
         ndbrequire(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT);
-        DEBUG("  Restart TreeNode: " << treeNodePtr.i
+        DEBUG("Restart TreeNode "
            << ",  m_node_no: " << treeNodePtr.p->m_node_no
            << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
 
@@ -2172,7 +2173,6 @@ void
 Dbspj::execTRANSID_AI(Signal* signal)
 {
   jamEntry();
-  DEBUG("execTRANSID_AI");
   TransIdAI * req = (TransIdAI *)signal->getDataPtr();
   Uint32 ptrI = req->connectPtr;
   //Uint32 transId[2] = { req->transId[0], req->transId[1] };
@@ -2182,6 +2182,11 @@ Dbspj::execTRANSID_AI(Signal* signal)
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
 
+  DEBUG("execTRANSID_AI"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+  );
+
   ndbrequire(signal->getNoOfSections() != 0);
 
   SegmentedSectionPtr dataPtr;
@@ -2249,6 +2254,11 @@ Dbspj::storeRow(Ptr<Request> requestPtr,
   Uint32 * headptr = (Uint32*)row.m_row_data.m_section.m_header;
   Uint32 headlen = 1 + row.m_row_data.m_section.m_header->m_len;
 
+  DEBUG("storeRow"
+     << ", node: " << treeNodePtr.p->m_node_no
+     << ", request: " << requestPtr.i
+  );
+
   /**
    * If rows are not in map, then they are kept in linked list
    */
@@ -3478,7 +3488,8 @@ Dbspj::lookup_parent_row(Signal* signal,
   const Uint32 tableId = LqhKeyReq::getTableId(src->tableSchemaVersion);
   const Uint32 corrVal = rowRef.m_src_correlation;
 
-  DEBUG("::lookup_parent_row");
+  DEBUG("::lookup_parent_row"
+     << ", node: " << treeNodePtr.p->m_node_no);
 
   do
   {
@@ -5117,6 +5128,8 @@ Dbspj::scanIndex_parent_row(Signal* sign
                             const RowPtr & rowRef)
 {
   jam();
+  DEBUG("::scanIndex_parent_row"
+     << ", node: " << treeNodePtr.p->m_node_no);
 
   Uint32 err;
   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
@@ -6885,6 +6898,8 @@ Dbspj::appendFromParent(Uint32 & dst, Lo
   m_treenode_pool.getPtr(treeNodePtr, rowptr.m_src_node_ptrI);
   Uint32 corrVal = rowptr.m_src_correlation;
   RowPtr targetRow;
+  DEBUG("appendFromParent-of"
+     << " node: " << treeNodePtr.p->m_node_no);
   while (levels--)
   {
     jam();
@@ -6894,6 +6909,8 @@ Dbspj::appendFromParent(Uint32 & dst, Lo
       return DbspjErr::InvalidPattern;
     }
     m_treenode_pool.getPtr(treeNodePtr, treeNodePtr.p->m_parentPtrI);
+    DEBUG("appendFromParent"
+       << ", node: " << treeNodePtr.p->m_node_no);
     if (unlikely((treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP) == 0))
     {
       DEBUG_CRASH();

=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp	2012-04-11 10:34:58 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp	2012-05-08 08:03:29 +0000
@@ -2181,7 +2181,8 @@ NdbQueryOperationDefImpl::printTree(Uint
   ndbout << NdbQueryOperationDef::getTypeName(getType()) << endl;
   printMargin(depth, hasMoreSiblingsMask, false);
   // Print attributes.
-  ndbout << " opNo: " << getOpNo() << endl;
+  ndbout << " opNo: " << getOpNo()
+         << " (internal: " << getInternalOpNo() << ")" << endl;
   printMargin(depth, hasMoreSiblingsMask, false);
   ndbout << " table: " << getTable().getName() << endl;
   if (getIndex() != NULL)

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.5-cluster-7.2 branch (ole.john.aske:3914 to 3915)Bug#14010406Ole John Aske9 May