List:Commits« Previous MessageNext Message »
From:Jonas Oreland Date:May 20 2010 11:18am
Subject:bzr commit into mysql-5.1-telco-7.0-spj branch (jonas:3165)
View as plain text  
#At file:///home/jonas/src/70-spj-next/ based on revid:ole.john.aske@stripped

 3165 Jonas Oreland	2010-05-20
      ndb spj - bzr commit for spj-ng, including scan.vs.scan nf-handling and multi-parent

    modified:
      mysql-test/suite/ndb/r/ndb_join_pushdown.result
      mysql-test/suite/ndb/t/ndb_join_pushdown.test
      storage/ndb/include/kernel/signaldata/DbspjErr.hpp
      storage/ndb/include/kernel/signaldata/QueryTree.hpp
      storage/ndb/include/kernel/signaldata/ReadNodesConf.hpp
      storage/ndb/include/kernel/signaldata/ScanFrag.hpp
      storage/ndb/src/kernel/blocks/ERROR_codes.txt
      storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
      storage/ndb/src/kernel/blocks/dblqh/DblqhProxy.cpp
      storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp
      storage/ndb/src/kernel/blocks/dbspj/DbspjInit.cpp
      storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
      storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp
      storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp
      storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp
      storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
      storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp
      storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp
      storage/ndb/src/kernel/blocks/record_types.hpp
      storage/ndb/src/kernel/vm/DataBuffer2.hpp
      storage/ndb/src/kernel/vm/SLFifoList.hpp
      storage/ndb/src/kernel/vm/SectionReader.cpp
      storage/ndb/src/kernel/vm/SectionReader.hpp
      storage/ndb/src/kernel/vm/SimulatedBlock.hpp
      storage/ndb/src/ndbapi/NdbQueryOperation.cpp
      storage/ndb/src/ndbapi/NdbTransaction.cpp
      storage/ndb/src/ndbapi/ndberror.c
      storage/ndb/test/include/HugoQueries.hpp
      storage/ndb/test/ndbapi/testSpj.cpp
      storage/ndb/test/run-test/files.cpp
      storage/ndb/test/src/HugoQueries.cpp
      storage/ndb/test/tools/hugoJoin.cpp
      storage/ndb/test/tools/test_spj.cpp
=== modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown.result'
--- a/mysql-test/suite/ndb/r/ndb_join_pushdown.result	2010-05-20 06:36:59 +0000
+++ b/mysql-test/suite/ndb/r/ndb_join_pushdown.result	2010-05-20 11:18:08 +0000
@@ -2151,6 +2151,20 @@ k	b	k	b	k	b	k	b
 3	1	1	1	1	1	1	1
 4	1	1	1	1	1	1	1
 drop table t1;
+CREATE TABLE t1 (
+a int NOT NULL auto_increment,
+b char(255) not null,
+c int NOT NULL,
+d char(255) not null,
+PRIMARY KEY (`a`,`b`)
+) ENGINE=ndbcluster;
+select count(*)
+from t1 
+join t1 as t2 on t2.a = t1.c and t2.b = t1.d
+join t1 as t3 on t3.a = t2.c and t3.b = t2.d;
+count(*)
+2996
+drop table t1;
 CREATE LOGFILE GROUP lg1
 ADD UNDOFILE 'undofile.dat'
 INITIAL_SIZE 1M

=== modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.test'
--- a/mysql-test/suite/ndb/t/ndb_join_pushdown.test	2010-05-20 06:36:59 +0000
+++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.test	2010-05-20 11:18:08 +0000
@@ -1476,6 +1476,34 @@ where t2.k = 1;
 
 drop table t1;
 
+##
+# Try with higher row-count to test batching/flow control
+#
+CREATE TABLE t1 (
+  a int NOT NULL auto_increment,
+  b char(255) not null,
+  c int NOT NULL,
+  d char(255) not null,
+  PRIMARY KEY (`a`,`b`)
+) ENGINE=ndbcluster;
+
+let $1=1000;
+disable_query_log;
+while ($1)
+{
+ eval insert into t1(a,b,c,d) values
+ ($1, 'a', $1, 'a'),($1, 'b', $1+1, 'b'),($1, 'c', $1-1, 'c');
+ dec $1;
+}
+enable_query_log;
+
+select count(*)
+from t1 
+join t1 as t2 on t2.a = t1.c and t2.b = t1.d
+join t1 as t3 on t3.a = t2.c and t3.b = t2.d;
+
+drop table t1;
+
 # Pushed join accessing disk data.
 
 CREATE LOGFILE GROUP lg1

=== modified file 'storage/ndb/include/kernel/signaldata/DbspjErr.hpp'
--- a/storage/ndb/include/kernel/signaldata/DbspjErr.hpp	2010-04-29 14:56:00 +0000
+++ b/storage/ndb/include/kernel/signaldata/DbspjErr.hpp	2010-05-20 11:18:08 +0000
@@ -33,6 +33,9 @@ struct DbspjErr
     ,BothTreeAndParametersContainInterpretedProgram = 20011
     ,InvalidTreeParametersSpecificationKeyParamBitsMissmatch = 20012
     ,InvalidTreeParametersSpecificationIncorrectKeyParamCount = 20013
+    ,InternalError = 20014
+    ,OutOfRowMemory = 20015
+    ,NodeFailure = 20016
   };
 };
 

=== modified file 'storage/ndb/include/kernel/signaldata/QueryTree.hpp'
--- a/storage/ndb/include/kernel/signaldata/QueryTree.hpp	2010-05-20 06:36:59 +0000
+++ b/storage/ndb/include/kernel/signaldata/QueryTree.hpp	2010-05-20 11:18:08 +0000
@@ -28,8 +28,9 @@ struct QueryNode  // Effectively used as
 
   enum OpType
   {
-    QN_LOOKUP    = 0x1,
-    QN_SCAN_FRAG = 0x2,
+    QN_LOOKUP     = 0x1,
+    QN_SCAN_FRAG  = 0x2,
+    QN_SCAN_INDEX = 0x3,
     QN_END = 0
   };
 
@@ -206,6 +207,79 @@ struct QN_ScanFragParameters // Is a Que
 };
 
 /**
+ * This node describes a IndexScan
+ */
+struct QN_ScanIndexNode
+{
+  Uint32 len;
+  Uint32 requestInfo;
+  Uint32 tableId;      // 16-bit
+  Uint32 tableVersion;
+  STATIC_CONST( NodeSize = 4 );
+
+  enum ScanIndexBits
+  {
+    /**
+     * If doing equality search that can be pruned
+     *   a pattern that creates the key to hash with is stored before
+     *   the DA optional part
+     */
+    SI_PRUNE_PATTERN = 0x10000,
+    
+    // Do pattern contain parameters
+    SI_PRUNE_PARAMS = 0x20000,
+
+    // Is prune pattern dependant on parent key (or only on parameters)
+    SI_PRUNE_LINKED = 0x40000,
+
+    // Should it be parallel scan (can also be set as in parameters)
+    SI_PARALLEL = 0x80000,
+
+    SI_END = 0
+  };
+
+  /**
+   * See DABits::NodeInfoBits
+   */
+  Uint32 optional[1];
+};
+
+/**
+ * This struct describes parameters that are associated with
+ *  a QN_ScanIndexNode
+ */
+struct QN_ScanIndexParameters
+{
+  Uint32 len;
+  Uint32 requestInfo;
+  Uint32 batchSize;    // (bytes << 16) | (rows)
+  Uint32 resultData;   // Api connect ptr
+  STATIC_CONST ( NodeSize = 4 );
+
+  enum ScanIndexParamBits
+  {
+    /**
+     * Do arguments contain parameters for prune-pattern
+     */
+    SIP_PRUNE_PARAMS = 0x10000,
+
+    /**
+     * Should it scan index in parallel
+     *   This is needed for "multi-cursor" semantics
+     *   with (partial) ordering
+     */
+    SIP_PARALLEL = 0x20000,
+
+    SIP_END = 0
+  };
+
+  /**
+   * See DABits::ParamInfoBits
+   */
+  Uint32 optional[1];
+};
+
+/**
  * This is the definition of a QueryTree
  */
 struct QueryTree
@@ -231,6 +305,7 @@ struct QueryPattern
     P_COL    = 0x2,  // Get column value from RowRef
     P_UNQ_PK = 0x3,  // NDB$PK column from a unique index
     P_PARAM  = 0x4,  // User specified parameter value
+    P_PARENT = 0x5,  // Move up in tree
     P_PARAM_HEADER = 0x6, // User specified param val including AttributeHeader
     P_ATTRINFO = 0x7,// Get column including header from RowRef
     P_END    = 0
@@ -264,6 +339,19 @@ struct QueryPattern
    */
   static Uint32 getParamNo(Uint32 info) { return info & 0xFFFF;}
   static Uint32 param(Uint32 no) { return (P_PARAM << 16) | no; }
+
+  static Uint32 paramHeader(Uint32 no) { return (P_PARAM_HEADER << 16) | no; }
+
+  /**
+   * get col including header
+   */
+  static Uint32 attrInfo(Uint32 no) { return (P_ATTRINFO << 16) | no;}
+
+  /**
+   * Move to grand-parent no
+   * (0 == imediate parent)
+   */
+  static Uint32 parent(Uint32 no) { return (P_PARENT << 16) | no;}
 };
 
 #endif

=== modified file 'storage/ndb/include/kernel/signaldata/ReadNodesConf.hpp'
--- a/storage/ndb/include/kernel/signaldata/ReadNodesConf.hpp	2009-05-27 15:21:45 +0000
+++ b/storage/ndb/include/kernel/signaldata/ReadNodesConf.hpp	2010-05-20 11:18:08 +0000
@@ -50,6 +50,7 @@ class ReadNodesConf {
   friend class Suma;
   friend class LocalProxy;
   friend class Dbinfo;
+  friend class Dbspj;
 
   friend bool printREAD_NODES_CONF(FILE*, const Uint32 *, Uint32, Uint16);
 public:

=== modified file 'storage/ndb/include/kernel/signaldata/ScanFrag.hpp'
--- a/storage/ndb/include/kernel/signaldata/ScanFrag.hpp	2009-06-12 12:01:12 +0000
+++ b/storage/ndb/include/kernel/signaldata/ScanFrag.hpp	2010-05-20 11:18:08 +0000
@@ -167,7 +167,7 @@ class ScanFragConf {
   friend class Backup;
   friend class Suma;
 public:
-  STATIC_CONST( SignalLength = 6 );
+  STATIC_CONST( SignalLength = 7 );
   
 public:
   Uint32 senderData;
@@ -176,6 +176,7 @@ public:
   Uint32 transId1;
   Uint32 transId2;
   Uint32 total_len;
+  Uint32 fragId;
 };
 
 class ScanFragRef {
@@ -191,7 +192,7 @@ class ScanFragRef {
   friend class Backup;
   friend class Suma;
 public:
-  STATIC_CONST( SignalLength = 4 );
+  STATIC_CONST( SignalLength = 5 );
 public:
   enum ErrorCode {
     ZNO_FREE_TC_CONREC_ERROR = 484,
@@ -210,6 +211,7 @@ public:
   Uint32 transId1;
   Uint32 transId2;
   Uint32 errorCode;
+  Uint32 fragId;
 };
 
 /**

=== modified file 'storage/ndb/src/kernel/blocks/ERROR_codes.txt'
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2010-04-29 14:52:05 +0000
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt	2010-05-20 11:18:08 +0000
@@ -3,7 +3,7 @@ Next NDBCNTR 1002
 Next NDBFS 2000
 Next DBACC 3002
 Next DBTUP 4035
-Next DBLQH 5055
+Next DBLQH 5056
 Next DBDICT 6025
 Next DBDIH 7221
 Next DBTC 8087

=== modified file 'storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2010-03-22 12:38:39 +0000
+++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2010-05-20 11:18:08 +0000
@@ -2568,7 +2568,8 @@ void Dbdih::execINCL_NODECONF(Signal* si
   blocklist[2] = cdictblockref;
   blocklist[3] = numberToRef(BACKUP, getOwnNodeId());
   blocklist[4] = numberToRef(SUMA, getOwnNodeId());
-  blocklist[5] = 0;
+  blocklist[5] = numberToRef(DBSPJ, getOwnNodeId());
+  blocklist[6] = 0;
   
   for (Uint32 i = 0; blocklist[i] != 0; i++)
   {

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2010-05-06 08:42:59 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp	2010-05-20 11:18:08 +0000
@@ -4144,6 +4144,7 @@ void Dblqh::execSIGNAL_DROPPED_REP(Signa
     const Uint32 senderData= truncatedScanFragReq->senderData;
     const Uint32 transid1= truncatedScanFragReq->transId1;
     const Uint32 transid2= truncatedScanFragReq->transId2;
+    const Uint32 fragId = truncatedScanFragReq->fragmentNoKeyLen;
 
     /* Send SCAN_FRAGREF back to the client */
     ScanFragRef* ref= (ScanFragRef*)&signal->theData[0];
@@ -4151,7 +4152,8 @@ void Dblqh::execSIGNAL_DROPPED_REP(Signa
     ref->transId1= transid1;
     ref->transId2= transid2;
     ref->errorCode= ZGET_ATTRINBUF_ERROR;
-    
+    ref->fragId = fragId;
+
     sendSignal(signal->senderBlockRef(), GSN_SCAN_FRAGREF, signal,
                ScanFragRef::SignalLength, JBB);
     break;
@@ -4203,16 +4205,13 @@ void Dblqh::execLQHKEYREQ(Signal* signal
   }
 
   sig0 = lqhKeyReq->clientConnectPtr;
-  if (cfirstfreeTcConrec != RNIL && !ERROR_INSERTED(5031)) {
+  if (cfirstfreeTcConrec != RNIL && !ERROR_INSERTED_CLEAR(5031)) {
     jamEntry();
     seizeTcrec();
   } else {
 /* ------------------------------------------------------------------------- */
 /* NO FREE TC RECORD AVAILABLE, THUS WE CANNOT HANDLE THE REQUEST.           */
 /* ------------------------------------------------------------------------- */
-    if (ERROR_INSERTED(5031)) {
-      CLEAR_ERROR_INSERT_VALUE;
-    }
     releaseSections(handle);
     noFreeRecordLab(signal, lqhKeyReq, ZNO_TC_CONNECT_ERROR);
     return;
@@ -8628,7 +8627,7 @@ void Dblqh::continueAfterLogAbortWriteLa
     tcKeyRef->transId[0] = regTcPtr->transid[0];
     tcKeyRef->transId[1] = regTcPtr->transid[1];
     tcKeyRef->errorCode = regTcPtr->errorCode;
-    sendTCKEYREF(signal, regTcPtr->applRef, regTcPtr->clientBlockref, 0);
+    sendTCKEYREF(signal, regTcPtr->applRef, regTcPtr->tcBlockref, 0);
     cleanUp(signal);
     return;
   }//if
@@ -9789,7 +9788,7 @@ void Dblqh::execSCAN_FRAGREQ(Signal* sig
     goto error_handler_early_1;
   }
   
-  if (cfirstfreeTcConrec != RNIL) {
+  if (cfirstfreeTcConrec != RNIL && !ERROR_INSERTED_CLEAR(5055)) {
     seizeTcrec();
     tcConnectptr.p->clientConnectrec = scanFragReq->senderData;
     tcConnectptr.p->clientBlockref = signal->senderBlockRef();
@@ -9919,6 +9918,7 @@ error_handler:
   ref->transId1 = transid1;
   ref->transId2 = transid2;
   ref->errorCode = errorCode;
+  ref->fragId = fragId;
   sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGREF, signal, 
 	     ScanFragRef::SignalLength, JBB);
   releaseSections(handle);
@@ -9936,6 +9936,7 @@ error_handler:
   ref->transId1 = transid1;
   ref->transId2 = transid2;
   ref->errorCode = errorCode;
+  ref->fragId = fragId;
   sendSignal(signal->senderBlockRef(), GSN_SCAN_FRAGREF, signal,
 	     ScanFragRef::SignalLength, JBB);
 }//Dblqh::execSCAN_FRAGREQ()
@@ -10030,6 +10031,7 @@ void Dblqh::abort_scan(Signal* signal, U
     ref->transId1 = tcConnectptr.p->transid[0];
     ref->transId2 = tcConnectptr.p->transid[1];
     ref->errorCode = errcode;
+    ref->fragId = tcConnectptr.p->fragmentid;
     sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGREF, signal, 
 	       ScanFragRef::SignalLength, JBB);
   }
@@ -10212,6 +10214,7 @@ Dblqh::copyNextRange(Uint32 * dst, TcCon
     ndbrequire( keyInfoReader.getWord(&firstWord) );
     const Uint32 rangeLen= (firstWord >> 16) ? (firstWord >> 16) : totalLen;
     tcPtrP->m_scan_curr_range_no= (firstWord & 0xFFF0) >> 4;
+    tcPtrP->m_anyValue = ((firstWord & 0xFFF0) >> 4) << 16;
     
     firstWord &= 0xF; // Remove length+range num from first word
     
@@ -10972,6 +10975,7 @@ void Dblqh::tupScanCloseConfLab(Signal* 
     ref->transId1 = tcConnectptr.p->transid[0];
     ref->transId2 = tcConnectptr.p->transid[1];
     ref->errorCode = tcConnectptr.p->errorCode; 
+    ref->fragId = tcConnectptr.p->fragmentid;
     sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGREF, signal, 
 	 ScanFragRef::SignalLength, JBB);
   } else {
@@ -11457,6 +11461,7 @@ void Dblqh::sendScanFragConf(Signal* sig
 #ifdef NOT_USED
   NodeId tc_node_id= refToNode(tcConnectptr.p->clientBlockref);
 #endif
+  Uint32 fragId = tcConnectptr.p->fragmentid;
   Uint32 trans_id1= tcConnectptr.p->transid[0];
   Uint32 trans_id2= tcConnectptr.p->transid[1];
 
@@ -11466,6 +11471,7 @@ void Dblqh::sendScanFragConf(Signal* sig
   conf->transId1 = trans_id1;
   conf->transId2 = trans_id2;
   conf->total_len= total_len;
+  conf->fragId = fragId;
   sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGCONF, 
              signal, ScanFragConf::SignalLength, JBB);
   

=== modified file 'storage/ndb/src/kernel/blocks/dblqh/DblqhProxy.cpp'
--- a/storage/ndb/src/kernel/blocks/dblqh/DblqhProxy.cpp	2010-04-30 09:41:04 +0000
+++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhProxy.cpp	2010-05-20 11:18:08 +0000
@@ -13,7 +13,6 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
-#include "../dbspj/Dbspj.hpp"
 #include "DblqhProxy.hpp"
 #include "Dblqh.hpp"
 #include "DblqhCommon.hpp"

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp	2010-05-20 06:36:59 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp	2010-05-20 11:18:08 +0000
@@ -20,9 +20,13 @@ Foundation, Inc., 59 Temple Place, Suite
 #include <signaldata/LqhKey.hpp>
 #include <signaldata/ScanFrag.hpp>
 #include <AttributeHeader.hpp>
+#include <SLFifoList.hpp>
 #include <DLFifoList.hpp>
+#include <SLList.hpp>
 #include <ArenaPool.hpp>
 #include <DataBuffer2.hpp>
+#include <signaldata/DbspjErr.hpp>
+#include "../dbtup/tuppage.hpp"
 
 class SectionReader;
 struct QueryNode;
@@ -43,6 +47,11 @@ private:
   void execSCAN_FRAGREQ(Signal* signal);
   void execSCAN_NEXTREQ(Signal* signal);
 
+  void execDIH_SCAN_TAB_REF(Signal*);
+  void execDIH_SCAN_TAB_CONF(Signal*);
+  void execDIH_SCAN_GET_NODES_REF(Signal*);
+  void execDIH_SCAN_GET_NODES_CONF(Signal*);
+
   /**
    * Signals from LQH
    */
@@ -57,9 +66,16 @@ private:
    * General signals
    */
   void execDUMP_STATE_ORD(Signal* signal){}
+  void execREAD_NODESCONF(Signal*);
   void execREAD_CONFIG_REQ(Signal* signal);
   void execSTTOR(Signal* signal);
   void execDBINFO_SCANREQ(Signal* signal); 
+  void execCONTINUEB(Signal*);
+  void execNODE_FAILREP(Signal*);
+  void execINCL_NODEREQ(Signal*);
+  void execAPI_FAILREQ(Signal*);
+
+  void sendSTTORRY(Signal* signal);
 
 protected:
   //virtual bool getParam(const char* name, Uint32* count);
@@ -72,13 +88,50 @@ public:
   typedef DataBuffer2<14, LocalArenaPoolImpl> PatternStore;
   typedef LocalDataBuffer2<14, LocalArenaPoolImpl> Local_pattern_store;
 
+  struct RowRef
+  {
+    Uint32 m_page_id;
+    Uint16 m_page_pos;
+    union 
+    {
+      Uint16 unused;
+      Uint16 m_allocator;
+    };
+
+    void copyto_link(Uint32 * dst) const { 
+      dst[0] = m_page_id; dst[1] = m_page_pos;
+    }
+    void assign_from_link(const Uint32 * src) { 
+      m_page_id = src[0]; 
+      m_page_pos = src[1];
+    }
+
+    void copyto_map(Uint16 * dst) const {
+      dst[0] = Uint16(m_page_id);
+      dst[1] = Uint16(m_page_id >> 16);
+      dst[2] = m_page_pos;
+    }
+
+    void assign_from_map(const Uint16 * src) {
+      m_page_id = src[0];
+      m_page_id += Uint32(src[1]) << 16;
+      m_page_pos = src[2];
+    }
+
+    static bool map_is_null(const Uint16 * src) {
+      return src[2] == 0xFFFF;
+    }
+
+    void setNull() { m_page_id = RNIL;}
+    bool isNull() const { return m_page_id == RNIL;}
+  };
+
+  static const RowRef NullRowRef;
+
   /**
    * This struct represent a row being passed to a child
-   *   currently only the RT_SECTION type is supported
-   *   but RT_ROW_BUF is also planned (for buffered rows)
-   *     that will be used for equi-join (and increased parallelism on scans)
    */
-  struct RowRef
+  struct RowPtr
   {
     Uint32 m_type;
     Uint32 m_src_node_no;
@@ -88,7 +141,7 @@ public:
     struct Header
     {
       Uint32 m_len;
-      AttributeHeader m_headers[1];
+      Uint32 m_offset[1];
     };
 
     struct Section
@@ -99,6 +152,7 @@ public:
 
     struct Linear
     {
+      RowRef m_row_ref;
       const Header * m_header;
       const Uint32 * m_data;
     };
@@ -116,6 +170,99 @@ public:
     };
   };
 
+  struct SLFifoRowList
+  {
+    /**
+     * Data used for a single linked list of rows
+     */
+    Uint32 m_first_row_page_id;
+    Uint32 m_last_row_page_id;
+    Uint16 m_first_row_page_pos;
+    Uint16 m_last_row_page_pos;
+
+    void init() { m_first_row_page_id = RNIL;}
+    bool isNull() const { return m_first_row_page_id == RNIL; }
+  };
+  
+  struct RowMap
+  {
+    /**
+     * Data used for a map with rows (key is correlation id)
+     *   currently a single array is used to store row references
+     *   (size == batch size)
+     */
+    RowRef m_map_ref;
+    Uint16 m_size;                // size of array
+    Uint16 m_elements;            // #elements in array
+
+    void init() { m_map_ref.setNull();}
+    bool isNull() const { return m_map_ref.isNull(); }
+
+    void assign (RowRef ref) {
+      m_map_ref = ref;
+    }
+
+    void copyto(RowRef& ref) const {
+      ref = m_map_ref;
+    }
+
+    /**
+     * functions for manipulating *content* of map
+     */
+    void clear(Uint32 * ptr)  { 
+      memset(ptr, 0xFF, MAP_SIZE_PER_REF_16 * m_size * sizeof(Uint16));
+    }
+    void store(Uint32 * _ptr, Uint32 pos, RowRef ref) { 
+      Uint16 * ptr = (Uint16*)_ptr;
+      ptr += MAP_SIZE_PER_REF_16 * pos;
+      ref.copyto_map(ptr);
+      m_elements++;
+    }
+    static void load(const Uint32 * _ptr, Uint32 pos, RowRef & ref) {
+      const Uint16 * ptr = (const Uint16*)_ptr;
+      ptr += MAP_SIZE_PER_REF_16 * pos;
+      ref.assign_from_map(ptr);
+    }
+    static bool isNull(const Uint32 * _ptr, Uint32 pos) {
+      const Uint16 * ptr = (const Uint16*)_ptr;
+      ptr += MAP_SIZE_PER_REF_16 * pos;
+      return RowRef::map_is_null(ptr);
+    }
+
+    STATIC_CONST( MAP_SIZE_PER_REF_16 = 3 );
+  };
+
+  struct SLFifoRowListIterator
+  {
+    RowRef m_ref;
+    Uint32 * m_row_ptr;
+
+    bool isNull() const { return m_ref.isNull(); }
+    void setNull() { m_ref.setNull(); }
+  };
+
+  struct SLFifoRowListIteratorPtr
+  {
+    RowRef m_ref;
+  };
+
+  struct RowMapIterator
+  {
+    Uint32 * m_row_ptr;
+    Uint32 * m_map_ptr;
+    RowRef m_ref; // position of actual row
+    Uint16 m_size;
+    Uint16 m_element_no;
+    bool isNull() const { return m_ref.isNull(); }
+    void setNull() { m_ref.setNull(); }
+  };
+
+  struct RowMapIteratorPtr
+  {
+    Uint32 m_element_no;
+  };
+
+
   /**
    * A struct used when building an TreeNode
    */
@@ -124,9 +271,55 @@ public:
     Uint32 m_cnt;
     Uint32 m_scanPrio;
     Uint32 m_savepointId;
+    Uint32 m_batch_size_rows;
     Uint32 m_resultRef;  // API
     Uint32 m_resultData; // API
-    Ptr<TreeNode> m_node_list[63]; // Used for resolving dependencies
+    Uint32 m_senderRef;  // TC (used for routing)
+    Uint32 m_scan_cnt;
+    Signal* m_start_signal; // Argument to first node in tree
+    SegmentedSectionPtr m_keyPtr;
+    Ptr<TreeNode> m_node_list[63];   // Used for resolving dependencies
+  };
+
+  struct RowPage
+  {
+    /**
+     * NOTE: This contains various padding to be binary aligned with Tup_page
+     *       (for storing into DLFifoList<RowPage>
+     */
+    struct File_formats::Page_header m_page_header;
+    Uint32 unused0;
+    Uint32 unused1;
+    Uint32 nextList;
+    Uint32 prevList;
+    Uint32 m_data[GLOBAL_PAGE_SIZE_WORDS - 7];
+    STATIC_CONST( SIZE = GLOBAL_PAGE_SIZE_WORDS - 7 );
+  };
+
+  typedef Tup_varsize_page Var_page;
+
+  struct RowBuffer 
+  {
+    RowBuffer() { stack_init(); }
+    DLFifoList<RowPage>::Head m_page_list;
+
+    void stack_init() { new (&m_page_list) DLFifoList<RowPage>::Head(); m_stack.m_pos = 0xFFFF; }
+    void var_init() { new (&m_page_list) DLFifoList<RowPage>::Head(); m_var.m_free = 0; }
+
+    struct Stack
+    {
+      Uint32 m_pos; // position on head-page
+    };
+
+    struct Var
+    {
+      Uint32 m_free; // Free on last page in list
+    };
+
+    union {
+      struct Stack m_stack;
+      struct Var m_var;
+    };
   };
 
   /**
@@ -152,16 +345,22 @@ public:
                              const QueryNode*, const QueryNodeParameters*);
 
     /**
+     * This function is called aftger build, but before start
+     *   it's allowed to block (i.e send signals)
+     *   and should if so increase request::m_outstanding
+     */
+    void (Dbspj::*m_prepare)(Signal*, Ptr<Request>, Ptr<TreeNode>);
+
+    /**
      * This function is used for starting a request
      */
-    void (Dbspj::*m_start)(Signal*, Ptr<Request>, Ptr<TreeNode>,
-			   SegmentedSectionPtr);
+    void (Dbspj::*m_start)(Signal*, Ptr<Request>, Ptr<TreeNode>);
 
     /**
      * This function is used when getting a TRANSID_AI
      */
     void (Dbspj::*m_execTRANSID_AI)(Signal*,Ptr<Request>,Ptr<TreeNode>,
-				    const RowRef&);
+				    const RowPtr&);
 
     /**
      * This function is used when getting a LQHKEYREF
@@ -186,8 +385,14 @@ public:
     /**
      * This function is called on the *child* by the *parent* when passing rows
      */
-    void (Dbspj::*m_start_child)(Signal*,Ptr<Request>,Ptr<TreeNode>,
-                                 const RowRef&);
+    void (Dbspj::*m_parent_row)(Signal*,Ptr<Request>,Ptr<TreeNode>,
+                                const RowPtr&);
+    
+    /**
+     * This function is called on the *child* by the *parent* when *parent*
+     *   has completed a batch
+     */
+    void (Dbspj::*m_parent_batch_complete)(Signal*,Ptr<Request>,Ptr<TreeNode>);
 
     /**
      * This function is called when getting a SCAN_NEXTREQ
@@ -196,111 +401,113 @@ public:
 
     /**
      * This function is called when all nodes in tree are finished
-     *   it's allowed to "block" (by increaseing requestPtr.p->m_cnt_active)
+     *   it's allowed to "block" (by increaseing requestPtr.p->m_outstanding)
      */
     void (Dbspj::*m_complete)(Signal*, Ptr<Request>,Ptr<TreeNode>);
 
     /**
      * This function is called when a tree is aborted
-     *   it's allowed to "block" (by increaseing requestPtr.p->m_cnt_active)
+     *   it's allowed to "block" (by increaseing requestPtr.p->m_outstanding)
      */
-    void (Dbspj::*m_abort)(Signal*, Ptr<Request>,Ptr<TreeNode>);
+    void (Dbspj::*m_abort)(Signal*, Ptr<Request>, Ptr<TreeNode>);
 
     /**
+     * This function is called on node-failure
+     */
+    Uint32 (Dbspj::*m_execNODE_FAILREP)(Signal*, Ptr<Request>, Ptr<TreeNode>,
+                                        NdbNodeBitmask);
+    /**
      * This function is called when request/node(s) is/are removed
      *  should only do local cleanup(s)
      */
     void (Dbspj::*m_cleanup)(Ptr<Request>, Ptr<TreeNode>);
-
-    /**
-     * This function is called on the root operation  when a LQHKEYCONF, 
-     * LQKEYREF or LQHKEYREQ signal is sent or received on behalf of a 
-     * descendant operation*/
-    void (Dbspj::*m_count_descendant_signal)(Signal* signal,
-                                             Ptr<Request> requestPtr,
-                                             Ptr<TreeNode> treeNodePtr,
-                                             Ptr<TreeNode> rootPtr,
-                                             Uint32 globalSignalNo);
   };
 
   struct LookupData
   {
     Uint32 m_api_resultRef;
     Uint32 m_api_resultData;
-    Uint32 m_outstanding;
     Uint32 m_lqhKeyReq[LqhKeyReq::FixedSignalLength + 4];
   };
 
   struct ScanFragData
   {
-    enum ScanFragState
-    {
-      /**
-       * Nothing oustanding
-       */
-      SF_IDLE = 0,
-
-      /**
-       * SCAN_FRAGREQ/SCAN_NEXTREQ is sent
-       */
-      SF_RUNNING = 1,
-
-      /**
-       * SCAN_FRAGCONF is received
-       */
-      SF_STARTED = 2,
+    Uint32 m_rows_received;  // #execTRANSID_AI
+    Uint32 m_rows_expecting; // ScanFragConf
+    Uint32 m_scanFragReq[ScanFragReq::SignalLength + 2];
+  };
 
-      /**
-       * SCAN_NEXTREQ(close) has been sent to datanodes
-       */
-      SF_CLOSING = 3
+  struct ScanIndexFrag
+  {
+    void init(Uint32 fid) { 
+      m_rangePtrI = RNIL; 
+      m_ref = 0; 
+      m_fragId = fid; 
+      m_range_builder.m_range_cnt = 0;
+      m_range_builder.m_range_size = 0;
+      m_state = Uint16(~0);
+    }
+    Uint32 m_magic;
+    Uint16 m_fragId;
+    Uint16 m_state;
+    Uint32 m_ref;
+    struct RangeBuilder 
+    {
+      Uint16 m_range_size;
+      Uint16 m_range_cnt; // too set bounds info correctly
+    } m_range_builder;
+    Uint32 m_rangePtrI;
+    union {
+      Uint32 nextList;
+      Uint32 nextPool;
     };
+  };
+
+  typedef RecordPool<ScanIndexFrag, ArenaPool> ScanIndexFrag_pool;
+  typedef SLFifoListImpl<ScanIndexFrag_pool, ScanIndexFrag> ScanIndexFrag_list;
+  typedef LocalSLFifoListImpl<ScanIndexFrag_pool, ScanIndexFrag> Local_ScanIndexFrag_list;
 
-    Uint32 m_scan_state;     // Only valid if TreeNodeState >= TN_ACTIVE
-    Uint32 m_scan_status;    // fragmentCompleted
-    bool   m_pending_close;  // SCAN_NEXTREQ(close) pending while SF_RUNNING
-    /** True if signal has been received since sending 
-     * last SCAN_FRAGREQ/SCAN_NEXTREQ*/
-    bool   m_scan_fragconf_received; 
+  struct ScanIndexData
+  {
+    Uint16 m_frags_complete;
+    Uint32 m_frags_outstanding;
     Uint32 m_rows_received;  // #execTRANSID_AI
-    Uint32 m_rows_expecting; // ScanFragConf
-    /** Number of receiced LQHKEYCONF messages from descendant lookup 
-     * operations which has user projections.*/
-    Uint32 m_descendant_keyconfs_received;
-    /** Number of receiced LQHKEYCONF messages from descendant lookup 
-     * operations which has no user projections.*/
-    Uint32 m_descendant_silent_keyconfs_received;
-    /** Number of received LQHKEYREF messages from descendant lookup 
-     * operations.*/
-    Uint32 m_descendant_keyrefs_received;
-    /** Number of LQHKEYREQ messages sent for descendant lookup operations.*/
-    Uint32 m_descendant_keyreqs_sent;
-    /** Number of missing transid AI messages for descendant lookup operations.
-     * This is decremented when we receive TRANSID_AI, and incremented when
-     * we receive LQHKEYCONF for a non-leaf operation. (For leaf operations,
-     * no TRANSID_AI is sent to the SPJ block.)*/
-    int m_missing_descendant_rows;
+    Uint32 m_rows_expecting; // Sum(ScanFragConf)
+    Uint32 m_scanCookie;
+    Uint32 m_fragCount;
+    Uint32 m_currentFragmentPtrI;
+    ScanIndexFrag_list::HeadPOD m_fragments; // ScanFrag states
+    union 
+    {
+      PatternStore::HeadPOD m_prunePattern;
+      Uint32 m_constPrunePtrI;
+    };
     Uint32 m_scanFragReq[ScanFragReq::SignalLength + 2];
   };
 
+  struct TreeNode_cursor_ptr
+  {
+    Uint32 nextList;
+  };
+
   /**
    * A node in a Query
    *   (This is an instantiated version of QueryNode in
    *    include/kernel/signal/QueryTree.hpp)
    */
-  struct TreeNode
+  struct TreeNode : TreeNode_cursor_ptr
   {
     STATIC_CONST ( MAGIC = ~RT_SPJ_TREENODE );
 
     TreeNode()
     : m_magic(MAGIC), m_state(TN_END),
-      m_node_no(0), m_requestPtrI(0)
+      m_parentPtrI(RNIL), m_requestPtrI(0)
     {}
 
-    TreeNode(Uint32 node_no, Uint32 request)
+    TreeNode(Uint32 request)
     : m_magic(MAGIC),
       m_info(0), m_bits(T_LEAF), m_state(TN_BUILDING),
-      m_node_no(node_no), m_requestPtrI(request),
+      m_parentPtrI(RNIL), m_requestPtrI(request),
       nextList(RNIL), prevList(RNIL)
     {
 //    m_send.m_ref = 0;
@@ -350,8 +557,8 @@ public:
       T_ATTR_INTERPRETED = 0x1,
 
       /**
-       * Will this request be executed only once
-       *   (implies key/attr-info will be disowned (by send-signal)
+       * Will node be executed only once (::parent_row())
+       *   implies key/attr-info will be disowned (by send-signal)
        */
       T_ONE_SHOT = 0x2,
 
@@ -385,6 +592,44 @@ public:
        */
       T_UNIQUE_INDEX_LOOKUP = 0x40,
 
+      /*
+       * Should this node buffers its rows
+       */
+      T_ROW_BUFFER = 0x80,
+
+      /**
+       * Should rows have dictionary (i.e random access capability)
+       *  This is typically used when having nodes depending on multiple parents
+       *  so that when row gets availble from "last" parent, a key can be
+       *  constructed using correlation value from parents
+       */
+      T_ROW_BUFFER_MAP = 0x100,
+
+      /**
+       * Does any child need to know about when *my* batch is complete
+       */
+      T_REPORT_BATCH_COMPLETE  = 0x200,
+
+      /**
+       * Do I need to know when parent batch is cimpleted
+       */
+      T_NEED_REPORT_BATCH_COMPLETED = 0x400,
+
+      /**
+       * Constant prune pattern
+       */
+      T_CONST_PRUNE = 0x800,
+
+      /**
+       * Prune pattern
+       */
+      T_PRUNE_PATTERN = 0x1000,
+
+      /**
+       * Should index scan be parallel
+       */
+      T_SCAN_PARALLEL = 0x2000,
+
       // End marker...
       T_END = 0
     };
@@ -393,16 +638,27 @@ public:
 
     Uint32 m_bits;
     Uint32 m_state;
-    const Uint32 m_node_no;
+    Uint32 m_batch_size;
+    Uint32 m_parentPtrI;
     const Uint32 m_requestPtrI;
     Dependency_map::Head m_dependent_nodes;
     PatternStore::Head m_keyPattern;
     PatternStore::Head m_attrParamPattern;
 
+    /**
+     * Rows buffered by this node
+     */
+    union
+    {
+      RowMap m_row_map;
+      SLFifoRowList m_row_list;
+    };
+
     union
     {
       LookupData m_lookup_data;
       ScanFragData m_scanfrag_data;
+      ScanIndexData m_scanindex_data;
     };
 
     struct {
@@ -423,34 +679,62 @@ public:
     Uint32 prevList;
   };
 
+  static const Ptr<TreeNode> NullTreeNodePtr;
+
   typedef RecordPool<TreeNode, ArenaPool> TreeNode_pool;
   typedef DLFifoListImpl<TreeNode_pool, TreeNode> TreeNode_list;
   typedef LocalDLFifoListImpl<TreeNode_pool, TreeNode> Local_TreeNode_list;
 
+  typedef SLListImpl<TreeNode_pool, TreeNode, TreeNode_cursor_ptr>
+  TreeNodeCursor_list;
+  typedef LocalSLListImpl<TreeNode_pool, TreeNode, TreeNode_cursor_ptr> 
+  Local_TreeNodeCursor_list;
+
   /**
    * A request (i.e a query + parameters)
    */
   struct Request
   {
+    enum RequestBits
+    {
+      RT_SCAN = 0x1            // unbounded result set, scan interface
+      ,RT_ROW_BUFFERS = 0x2    // Do any of the node use row-buffering
+      ,RT_MULTI_SCAN  = 0x4    // Is there several scans in request
+      ,RT_VAR_ALLOC   = 0x8    // Is var-allocation used for row-buffer
+      ,RT_NEED_PREPARE = 0x10  // Does any node need m_prepare hook
+      ,RT_NEED_COMPLETE = 0x20 // Does any node need m_complete hook
+    };
+
+    enum RequestState
+    {
+      RS_BUILDING   = 0x1,
+      RS_PREPARING  = 0x2,
+      RS_RUNNING    = 0x3,
+      RS_COMPLETING = 0x4,
+      RS_ABORTING   = 0x1000, // Or:ed together with other states
+
+      RS_END = 0
+    };
+
     Request() {}
     Request(const ArenaHead & arena) : m_arena(arena) {}
     Uint32 m_magic;
     Uint32 m_bits;
+    Uint32 m_state;
+    Uint32 m_errCode;
     Uint32 m_node_cnt;
     Uint32 m_senderRef;
     Uint32 m_senderData;
     Uint32 m_rootResultData;
     Uint32 m_transId[2];
-    NdbNodeBitmask m_node_mask; // Dependant data nodes...
     TreeNode_list::Head m_nodes;
-    Uint32 m_currentNodePtrI;
+    TreeNodeCursor_list::Head m_cursor_nodes;
     Uint32 m_cnt_active;       // No of "running" nodes
+    Uint32 m_rows;             // Rows accumulated in current batch
+    Uint32 m_outstanding;      // Outstanding signals, when 0, batch is done
+    Uint16 m_lookup_node_data[MAX_NDB_NODES];
     ArenaHead m_arena;
-
-    enum RequestBits
-    {
-      RT_SCAN = 0x1  // unbounded result set, scan interface
-    };
+    RowBuffer m_rowBuffer;
 
     bool isScan() const { return (m_bits & RT_SCAN) != 0;}
     bool isLookup() const { return (m_bits & RT_SCAN) == 0;}
@@ -522,7 +806,24 @@ private:
      */
     CI_LOCAL_RANGE_SCANS_SENT = 6,
 
-    CI_END = 7 // End marker - not a valid counter id. 
+    /**
+     * This the number of scans using ordered indexes that have been sent to a
+     * remote LQH block.
+     */
+    CI_REMOTE_RANGE_SCANS_SENT = 7,
+    
+    /**
+     * No of prunable indexscans that has been received
+     */
+    CI_PRUNNED_RANGE_SCANS_RECEIVED = 8,
+
+    /**
+     * No of "const" prunable index scans that has been received 
+     * i.e index-scan only access 1 partition
+     */
+    CI_CONST_PRUNNED_RANGE_SCANS_RECEIVED = 9,
+
+    CI_END = 10 // End marker - not a valid counter id. 
   };
 
   /**
@@ -559,6 +860,7 @@ private:
   typedef DLListImpl<Request_pool, Request> Request_list;
   typedef LocalDLListImpl<Request_pool, Request> Local_Request_list;
   typedef DLHashTableImpl<Request_pool, Request> Request_hash;
+  typedef DLHashTableImpl<Request_pool, Request>::Iterator Request_iterator;
 
   ArenaAllocator m_arenaAllocator;
   Request_pool m_request_pool;
@@ -566,11 +868,15 @@ private:
   Request_hash m_lookup_request_hash;
   ArenaPool m_dependency_map_pool;
   TreeNode_pool m_treenode_pool;
+  ScanIndexFrag_pool m_scanindexfrag_pool;
+
+  NdbNodeBitmask c_alive_nodes;
 
   void do_init(Request*, const LqhKeyReq*, Uint32 senderRef);
   void store_lookup(Ptr<Request>);
   void handle_early_lqhkey_ref(Signal*, const LqhKeyReq *, Uint32 err);
   void sendTCKEYREF(Signal* signal, Uint32 ref, Uint32 routeRef);
+  void sendTCKEYCONF(Signal* signal, Uint32 len, Uint32 ref, Uint32 routeRef);
 
   void do_init(Request*, const ScanFragReq*, Uint32 senderRef);
   void store_scan(Ptr<Request>);
@@ -588,34 +894,95 @@ private:
    * Build
    */
   const OpInfo* getOpInfo(Uint32 op);
-  Uint32 build(Build_context&, Ptr<Request>, SectionReader&, SectionReader&);
-  Uint32 createNode(Build_context&, Ptr<Request>, Ptr<TreeNode> &);
-  void start(Signal*, Ptr<Request>, SegmentedSectionPtr);
-  void nodeFinished(Signal* signal, Ptr<Request>, Ptr<TreeNode>);
+  Uint32 build(Build_context&,Ptr<Request>,SectionReader&,SectionReader&);
+  void checkPrepareComplete(Signal*, Ptr<Request>, Uint32 cnt);
+  void start(Signal*, Ptr<Request>);
+  void checkBatchComplete(Signal*, Ptr<Request>, Uint32 cnt);
+  void batchComplete(Signal*, Ptr<Request>);
+  void sendConf(Signal*, Ptr<Request>, bool is_complete);
+  void complete(Signal*, Ptr<Request>);
   void cleanup(Ptr<Request>);
+  void abort(Signal*, Ptr<Request>, Uint32 errCode);
+  Uint32 nodeFail(Signal*, Ptr<Request>, NdbNodeBitmask mask);
+
+  Uint32 createNode(Build_context&, Ptr<Request>, Ptr<TreeNode> &);
+  void reportBatchComplete(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void releaseScanBuffers(Ptr<Request> requestPtr);
+  void releaseRequestBuffers(Ptr<Request> requestPtr, bool reset);
+  void releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode>);
+  void releaseRow(Ptr<Request>, RowRef ref);
+  Uint32 releaseScanBuffers(Ptr<Request> requestPtr, Ptr<TreeNode>);
+  void registerCursor(Ptr<Request>, Ptr<TreeNode>);
+  void nodeFail_checkRequests(Signal*);
+
   void cleanup_common(Ptr<Request>, Ptr<TreeNode>);
 
   /**
+   * Row buffering
+   */
+  Uint32 storeRow(Ptr<Request>, Ptr<TreeNode>, RowPtr &row);
+  Uint32* stackAlloc(RowBuffer& dst, RowRef&, Uint32 len);
+  Uint32* varAlloc(RowBuffer& dst, RowRef&, Uint32 len);
+
+  void add_to_list(SLFifoRowList & list, RowRef rowref);
+  Uint32 add_to_map(Ptr<Request> requestPtr, Ptr<TreeNode>, Uint32, RowRef);
+  Uint32 * get_row_ptr(const RowMap&, RowMapIterator pos);
+  void setupRowPtr(Ptr<TreeNode>, RowPtr& dst, RowRef, const Uint32 * src);
+
+  // NOTE: ref contains info about it being stack/var
+  // so adding an inline would be nice...but that remove possibility
+  // to add jam()'s
+  Uint32 * get_row_ptr_stack(RowRef pos);
+  Uint32 * get_row_ptr_var(RowRef pos);
+
+  /**
+   * SLFifoRowListIterator
+   */
+  bool first(Ptr<Request>, Ptr<TreeNode>, SLFifoRowListIterator&);
+  bool next(SLFifoRowListIterator&);
+  bool next(Ptr<Request>, Ptr<TreeNode>, SLFifoRowListIterator&, SLFifoRowListIteratorPtr);
+
+  bool first(Ptr<Request>, Ptr<TreeNode>, RowMapIterator&);
+  bool next(RowMapIterator&);
+  bool next(Ptr<Request>,Ptr<TreeNode>, RowMapIterator&, RowMapIteratorPtr);
+
+  /**
    * Misc
    */
-  Uint32 buildRowHeader(RowRef::Header *, SegmentedSectionPtr);
-  Uint32 buildRowHeader(RowRef::Header *, const Uint32 *& src, Uint32 len);
-  void getCorrelationData(const RowRef::Section & row, Uint32 col,
+  Uint32 buildRowHeader(RowPtr::Header *, SegmentedSectionPtr);
+  Uint32 buildRowHeader(RowPtr::Header *, const Uint32 *& src, Uint32 len);
+  void getCorrelationData(const RowPtr::Section & row, Uint32 col,
+                          Uint32& rootStreamId, Uint32& correlationNumber);
+  void getCorrelationData(const RowPtr::Linear & row, Uint32 col,
                           Uint32& rootStreamId, Uint32& correlationNumber);
   Uint32 appendToPattern(Local_pattern_store &, DABuffer & tree, Uint32);
-  Uint32 appendParamToPattern(Local_pattern_store&,const RowRef::Linear&,
+  Uint32 appendParamToPattern(Local_pattern_store&,const RowPtr::Linear&,
                               Uint32);
 
   Uint32 appendTreeToSection(Uint32 & ptrI, SectionReader &, Uint32);
-  Uint32 appendColToSection(Uint32 & ptrI, const RowRef::Linear&, Uint32 col);
-  Uint32 appendColToSection(Uint32 & ptrI, const RowRef::Section&, Uint32 col);
-  Uint32 appendPkColToSection(Uint32 & ptrI, const RowRef::Section&, Uint32 col);
+  Uint32 appendColToSection(Uint32 & ptrI, const RowPtr::Linear&, Uint32 col);
+  Uint32 appendColToSection(Uint32 & ptrI, const RowPtr::Section&, Uint32 col);
+  Uint32 appendPkColToSection(Uint32 & ptrI, const RowPtr::Section&,Uint32 col);
+  Uint32 appendPkColToSection(Uint32 & ptrI, const RowPtr::Linear&, Uint32 col);
+  Uint32 appendAttrinfoToSection(Uint32 &, const RowPtr::Linear&, Uint32 col);
+  Uint32 appendAttrinfoToSection(Uint32 &, const RowPtr::Section&, Uint32 col);
   Uint32 appendDataToSection(Uint32 & ptrI, Local_pattern_store&,
 			     Local_pattern_store::ConstDataBufferIterator&,
 			     Uint32 len);
-  Uint32 appendAttrinfoToSection(Uint32&, const RowRef::Linear&, Uint32 col);
-  Uint32 appendAttrinfoToSection(Uint32&, const RowRef::Section&, Uint32 col);
-  Uint32 expand(Uint32 & ptrI, Local_pattern_store&, const RowRef::Section&);
+  Uint32 appendFromParent(Uint32 & ptrI, Local_pattern_store&,
+                          Local_pattern_store::ConstDataBufferIterator&,
+                          Uint32 level, const RowPtr&);
+  Uint32 expand(Uint32 & ptrI, Local_pattern_store& p, const RowPtr& r){
+    switch(r.m_type){
+    case RowPtr::RT_SECTION:
+      return expandS(ptrI, p, r);
+    case RowPtr::RT_LINEAR:
+      return expandL(ptrI, p, r);
+    }
+    return DbspjErr::InternalError;
+  }
+  Uint32 expandS(Uint32 & ptrI, Local_pattern_store&, const RowPtr&);
+  Uint32 expandL(Uint32 & ptrI, Local_pattern_store&, const RowPtr&);
   Uint32 expand(Uint32 & ptrI, DABuffer& pattern, Uint32 len,
                 DABuffer & param, Uint32 cnt);
   Uint32 expand(Local_pattern_store& dst, DABuffer& pattern, Uint32 len,
@@ -627,28 +994,26 @@ private:
   Uint32 zeroFill(Uint32 & ptrI, Uint32 cnt);
   Uint32 createEmptySection(Uint32 & ptrI);
 
-  /** Find root operation.*/
-  const Ptr<TreeNode> getRoot(TreeNode_list::Head& head);
-  
+  Uint32 getResultRef(Ptr<Request> requestPtr);
+
   /**
    * Lookup
    */
   static const OpInfo g_LookupOpInfo;
   Uint32 lookup_build(Build_context&,Ptr<Request>,
 		      const QueryNode*, const QueryNodeParameters*);
-  void lookup_start(Signal*, Ptr<Request>, Ptr<TreeNode>, SegmentedSectionPtr);
+  void lookup_start(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void lookup_send(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void lookup_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
-			     const RowRef&);
+			     const RowPtr&);
   void lookup_execLQHKEYREF(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void lookup_execLQHKEYCONF(Signal*, Ptr<Request>, Ptr<TreeNode>);
-  void lookup_start_child(Signal*, Ptr<Request>, Ptr<TreeNode>, const RowRef &);
+  void lookup_parent_row(Signal*, Ptr<Request>, Ptr<TreeNode>, const RowPtr &);
+  void lookup_parent_batch_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void lookup_abort(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  Uint32 lookup_execNODE_FAILREP(Signal*signal, Ptr<Request>, Ptr<TreeNode>,
+                               NdbNodeBitmask);
   void lookup_cleanup(Ptr<Request>, Ptr<TreeNode>);
-  void lookup_count_descendant_signal(Signal* signal,
-                                      Ptr<Request> requestPtr,
-                                      Ptr<TreeNode> treeNodePtr,
-                                      Ptr<TreeNode> rootPtr,
-                                      Uint32 globalSignalNo){};
 
   Uint32 handle_special_hash(Uint32 tableId, Uint32 dstHash[4],
                              const Uint64* src,
@@ -664,21 +1029,55 @@ private:
   static const OpInfo g_ScanFragOpInfo;
   Uint32 scanFrag_build(Build_context&, Ptr<Request>,
                         const QueryNode*, const QueryNodeParameters*);
-  void scanFrag_start(Signal*, Ptr<Request>,Ptr<TreeNode>,SegmentedSectionPtr);
+  void scanFrag_start(Signal*, Ptr<Request>,Ptr<TreeNode>);
   void scanFrag_send(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void scanFrag_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
-			       const RowRef &);
+			       const RowPtr &);
   void scanFrag_execSCAN_FRAGREF(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void scanFrag_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>);
-  void scanFrag_batch_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
-  void scanFrag_start_child(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowRef &);
+  void scanFrag_parent_row(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowPtr &);
+  void scanFrag_parent_batch_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void scanFrag_execSCAN_NEXTREQ(Signal*, Ptr<Request>,Ptr<TreeNode>);
+  void scanFrag_abort(Signal*, Ptr<Request>, Ptr<TreeNode>);
   void scanFrag_cleanup(Ptr<Request>, Ptr<TreeNode>);
-  void scanFrag_count_descendant_signal(Signal* signal,
-                                        Ptr<Request> requestPtr,
-                                        Ptr<TreeNode> treeNodePtr,
-                                        Ptr<TreeNode> rootPtr,
-                                        Uint32 globalSignalNo);
+
+  /**
+   * ScanIndex
+   */
+  static const OpInfo g_ScanIndexOpInfo;
+  Uint32 scanIndex_build(Build_context&, Ptr<Request>,
+                         const QueryNode*, const QueryNodeParameters*);
+  Uint32 parseScanIndex(Build_context&, Ptr<Request>, Ptr<TreeNode>,
+                        DABuffer tree, Uint32 treeBits,
+                        DABuffer param, Uint32 paramBits);
+  void scanIndex_prepare(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void scanIndex_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
+                                const RowPtr &);
+  void scanIndex_execSCAN_FRAGREF(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void scanIndex_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void scanIndex_parent_row(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowPtr&);
+  void scanIndex_fixupBound(Ptr<ScanIndexFrag> fragPtr, Uint32 ptrI, Uint32);
+  void scanIndex_send(Signal*,Ptr<Request>,Ptr<TreeNode>);
+  void scanIndex_batchComplete(Signal* signal);
+  Uint32 scanIndex_findFrag(Local_ScanIndexFrag_list &, Ptr<ScanIndexFrag>&,
+                            Uint32 fragId);
+  void scanIndex_parent_batch_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void scanIndex_execSCAN_NEXTREQ(Signal*, Ptr<Request>,Ptr<TreeNode>);
+  void scanIndex_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  void scanIndex_abort(Signal*, Ptr<Request>, Ptr<TreeNode>);
+  Uint32 scanIndex_execNODE_FAILREP(Signal*signal, Ptr<Request>, Ptr<TreeNode>,
+                                  NdbNodeBitmask);
+  void scanIndex_cleanup(Ptr<Request>, Ptr<TreeNode>);
+
+  /**
+   * Page manager
+   */
+  bool allocPage(Ptr<RowPage> &);
+  void releasePage(Ptr<RowPage>);
+  void releasePages(Uint32 first, Ptr<RowPage> last);
+  void releaseGlobal(Signal*);
+  SLList<RowPage>::Head m_free_page_list;
+  ArrayPool<RowPage> m_page_pool;
 
   /**
    * Scratch buffers...

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjInit.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjInit.cpp	2010-03-09 20:04:49 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjInit.cpp	2010-05-20 11:18:08 +0000
@@ -30,9 +30,22 @@ Dbspj::Dbspj(Block_context& ctx, Uint32 
   BLOCK_CONSTRUCTOR(Dbspj);
 
   addRecSignal(GSN_DUMP_STATE_ORD, &Dbspj::execDUMP_STATE_ORD);
+  addRecSignal(GSN_READ_NODESCONF, &Dbspj::execREAD_NODESCONF);
   addRecSignal(GSN_READ_CONFIG_REQ, &Dbspj::execREAD_CONFIG_REQ);
   addRecSignal(GSN_STTOR, &Dbspj::execSTTOR);
   addRecSignal(GSN_DBINFO_SCANREQ, &Dbspj::execDBINFO_SCANREQ);
+  addRecSignal(GSN_CONTINUEB, &Dbspj::execCONTINUEB);
+  addRecSignal(GSN_NODE_FAILREP, &Dbspj::execNODE_FAILREP);
+  addRecSignal(GSN_INCL_NODEREQ, &Dbspj::execINCL_NODEREQ);
+  addRecSignal(GSN_API_FAILREQ, &Dbspj::execAPI_FAILREQ);
+
+  /**
+   * Signals from DIH
+   */
+  addRecSignal(GSN_DIH_SCAN_TAB_REF, &Dbspj::execDIH_SCAN_TAB_REF);
+  addRecSignal(GSN_DIH_SCAN_TAB_CONF, &Dbspj::execDIH_SCAN_TAB_CONF);
+  addRecSignal(GSN_DIH_SCAN_GET_NODES_REF, &Dbspj::execDIH_SCAN_GET_NODES_REF);
+  addRecSignal(GSN_DIH_SCAN_GET_NODES_CONF,&Dbspj::execDIH_SCAN_GET_NODES_CONF);
 
   /**
    * Signals from TC
@@ -55,6 +68,7 @@ Dbspj::Dbspj(Block_context& ctx, Uint32 
 
 Dbspj::~Dbspj() 
 {
+  m_page_pool.clear();
 }//Dbspj::~Dbspj()
 
 

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2010-05-20 06:36:59 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2010-05-20 11:18:08 +0000
@@ -18,12 +18,12 @@
 
 #include <SectionReader.hpp>
 #include <signaldata/LqhKey.hpp>
-#include <signaldata/DbspjErr.hpp>
 #include <signaldata/QueryTree.hpp>
 #include <signaldata/TcKeyRef.hpp>
 #include <signaldata/RouteOrd.hpp>
 #include <signaldata/TransIdAI.hpp>
 #include <signaldata/DiGetNodes.hpp>
+#include <signaldata/DihScanTab.hpp>
 #include <signaldata/AttrInfo.hpp>
 #include <Interpreter.hpp>
 #include <AttributeHeader.hpp>
@@ -31,6 +31,9 @@
 #include <md5_hash.hpp>
 #include <signaldata/TcKeyConf.hpp>
 
+#include <signaldata/NodeFailRep.hpp>
+#include <signaldata/ReadNodesConf.hpp>
+
 // Use DEBUG to print messages that should be
 // seen only when we debug the product
 #ifdef VM_TRACE
@@ -53,11 +56,12 @@
 
 
 #undef DEBUG
+#define DEBUG(x)
 #undef DEBUG_LQHKEYREQ
 #undef DEBUG_SCAN_FRAGREQ
 
-#define DEBUG(x)
-
+const Ptr<Dbspj::TreeNode> Dbspj::NullTreeNodePtr = { 0, RNIL };
+const Dbspj::RowRef Dbspj::NullRowRef = { RNIL, GLOBAL_PAGE_SIZE_WORDS, 0 };
 
 /** A noop for now.*/
 void Dbspj::execREAD_CONFIG_REQ(Signal* signal) 
@@ -76,8 +80,11 @@ void Dbspj::execREAD_CONFIG_REQ(Signal* 
   m_arenaAllocator.init(1024, RT_SPJ_ARENA_BLOCK, pc);
   m_request_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_REQUEST, pc);
   m_treenode_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_TREENODE, pc);
+  m_scanindexfrag_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_SCANFRAG, pc);
   m_lookup_request_hash.setSize(16);
   m_scan_request_hash.setSize(16);
+  void* ptr = m_ctx.m_mm.get_memroot();
+  m_page_pool.set((RowPage*)ptr, (Uint32)~0);
 
   Record_info ri;
   Dependency_map::createRecordInfo(ri, RT_SPJ_DATABUFFER);
@@ -92,6 +99,7 @@ void Dbspj::execREAD_CONFIG_REQ(Signal* 
 	     ReadConfigConf::SignalLength, JBB);
 }//Dbspj::execREAD_CONF_REQ()
 
+static Uint32 f_STTOR_REF = 0;
 
 void Dbspj::execSTTOR(Signal* signal) 
 {
@@ -100,20 +108,28 @@ void Dbspj::execSTTOR(Signal* signal) 
   jamEntry();
   /* START CASE */
   const Uint16 tphase = signal->theData[1];
+  f_STTOR_REF = signal->getSendersBlockRef();
 
   ndbout << "Dbspj::execSTTOR() inst:" << instance() 
 	 << " phase=" << tphase << endl;
-  const Uint16 csignalKey = signal->theData[6];
-  signal->theData[0] = csignalKey;
-  signal->theData[1] = 3;    /* BLOCK CATEGORY */
-  signal->theData[2] = 2;    /* SIGNAL VERSION NUMBER */
-#ifdef UNIT_TEST_DATABUFFER2
-  signal->theData[3] = 120;  /* Start phase end*/
-#else
-  signal->theData[3] = 255;
-#endif
-  signal->theData[4] = 255;
-  sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 5, JBB);
+
+  if (tphase == 1)
+  {
+    jam();
+    signal->theData[0] = 0;
+    sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 1);
+  }
+
+  if (tphase == 4)
+  {
+    jam();
+
+    signal->theData[0] = reference();
+    sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
+    return;
+  }
+
+  sendSTTORRY(signal);
 
 #ifdef UNIT_TEST_DATABUFFER2
   if (tphase == 120)
@@ -180,6 +196,173 @@ void Dbspj::execSTTOR(Signal* signal) 
 #endif
 }//Dbspj::execSTTOR()
 
+void
+Dbspj::sendSTTORRY(Signal* signal)
+{
+  signal->theData[0] = 0;
+  signal->theData[1] = 0;    /* BLOCK CATEGORY */
+  signal->theData[2] = 0;    /* SIGNAL VERSION NUMBER */
+  signal->theData[3] = 4;
+#ifdef UNIT_TEST_DATABUFFER2
+  signal->theData[4] = 120;  /* Start phase end*/
+#else
+  signal->theData[4] = 255;
+#endif
+  signal->theData[5] = 255;
+  sendSignal(f_STTOR_REF, GSN_STTORRY, signal, 6, JBB);
+}
+
+void
+Dbspj::execREAD_NODESCONF(Signal* signal)
+{
+  jamEntry();
+
+  ReadNodesConf * const conf = (ReadNodesConf *)signal->getDataPtr();
+
+  if (getNodeState().getNodeRestartInProgress())
+  {
+    jam();
+    c_alive_nodes.assign(NdbNodeBitmask::Size, conf->startedNodes);
+    c_alive_nodes.set(getOwnNodeId());
+  }
+  else
+  {
+    jam();
+    c_alive_nodes.assign(NdbNodeBitmask::Size, conf->startingNodes);
+    NdbNodeBitmask tmp;
+    tmp.assign(NdbNodeBitmask::Size, conf->startedNodes);
+    ndbrequire(tmp.isclear()); // No nodes can be started during SR
+  }
+
+  sendSTTORRY(signal);
+}
+
+void
+Dbspj::execINCL_NODEREQ(Signal* signal)
+{
+  jamEntry();
+  const Uint32 senderRef = signal->theData[0];
+  const Uint32 nodeId  = signal->theData[1];
+
+  ndbrequire(!c_alive_nodes.get(nodeId));
+  c_alive_nodes.set(nodeId);
+
+  signal->theData[0] = nodeId;
+  signal->theData[1] = reference();
+  sendSignal(senderRef, GSN_INCL_NODECONF, signal, 2, JBB);
+}
+
+void
+Dbspj::execNODE_FAILREP(Signal* signal)
+{
+  jamEntry();
+
+  const NodeFailRep * rep = (NodeFailRep*)signal->getDataPtr();
+  NdbNodeBitmask failed;
+  failed.assign(NdbNodeBitmask::Size, rep->theNodes);
+
+  c_alive_nodes.bitANDC(failed);
+
+  signal->theData[0] = 1;
+  signal->theData[1] = 0;
+  failed.copyto(NdbNodeBitmask::Size, signal->theData + 2);
+  sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
+             JBB);
+}
+
+void
+Dbspj::execAPI_FAILREQ(Signal* signal)
+{
+  jamEntry();
+  Uint32 failedApiNode = signal->theData[0];
+  ndbrequire(signal->theData[1] == QMGR_REF); // As callback hard-codes QMGR
+
+  /**
+   * We only need to care about lookups
+   *   as SCAN's are aborted by DBTC
+   */
+
+  signal->theData[0] = failedApiNode;
+  signal->theData[1] = reference();
+  sendSignal(QMGR_REF, GSN_API_FAILCONF, signal, 2, JBB);
+}
+
+void
+Dbspj::execCONTINUEB(Signal* signal)
+{
+  switch(signal->theData[0]) {
+  case 0:
+    releaseGlobal(signal);
+    return;
+  case 1:
+    nodeFail_checkRequests(signal);
+    return;
+  case 2:
+    nodeFail_checkRequests(signal);
+    return;
+  }
+
+  ndbrequire(false);
+}
+
+void
+Dbspj::nodeFail_checkRequests(Signal* signal)
+{
+  jam();
+  const Uint32 type = signal->theData[0];
+  const Uint32 bucket = signal->theData[1];
+
+  NdbNodeBitmask failed;
+  failed.assign(NdbNodeBitmask::Size, signal->theData+2);
+
+  Request_iterator iter;
+  Request_hash * hash;
+  switch(type){
+  case 1:
+    hash = &m_lookup_request_hash;
+    break;
+  case 2:
+    hash = &m_scan_request_hash;
+    break;
+  }
+  hash->next(bucket, iter);
+
+  const Uint32 RT_BREAK = 64;
+  for(Uint32 i = 0; (i<RT_BREAK || iter.bucket == bucket) &&
+        !iter.curr.isNull(); i++)
+  {
+    jam();
+
+    Ptr<Request> requestPtr = iter.curr;
+    hash->next(iter);
+    i += nodeFail(signal, requestPtr, failed);
+  }
+
+  if (!iter.curr.isNull())
+  {
+    jam();
+    signal->theData[0] = type;
+    signal->theData[1] = bucket;
+    failed.copyto(NdbNodeBitmask::Size, signal->theData+2);
+    sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
+               JBB);
+  }
+  else if (type == 1)
+  {
+    jam();
+    signal->theData[0] = 2;
+    signal->theData[1] = 0;
+    failed.copyto(NdbNodeBitmask::Size, signal->theData+2);
+    sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
+               JBB);
+  }
+  else if (type == 2)
+  {
+    jam();
+    ndbout_c("Finished with handling node-failure");
+  }
+}
+
 /**
  * MODULE LQHKEYREQ
  */
@@ -236,6 +419,11 @@ void Dbspj::execLQHKEYREQ(Signal* signal
       Build_context ctx;
       ctx.m_resultRef = req->variableData[0];
       ctx.m_savepointId = req->savePointId;
+      ctx.m_scanPrio = 1;
+      ctx.m_start_signal = signal;
+      ctx.m_keyPtr.i = handle.m_ptr[LqhKeyReq::KeyInfoSectionNum].i;
+      ctx.m_senderRef = signal->getSendersBlockRef();
+
       err = build(ctx, requestPtr, treeReader, paramReader);
       if (unlikely(err != 0))
 	break;
@@ -257,10 +445,9 @@ void Dbspj::execLQHKEYREQ(Signal* signal
     store_lookup(requestPtr);
 
     release(ssPtr);
-    handle.getSection(ssPtr, LqhKeyReq::KeyInfoSectionNum);
     handle.clear();
 
-    start(signal, requestPtr, ssPtr);
+    start(signal, requestPtr);
     return;
   } while (0);
 
@@ -281,11 +468,15 @@ void
 Dbspj::do_init(Request* requestP, const LqhKeyReq* req, Uint32 senderRef)
 {
   requestP->m_bits = 0;
+  requestP->m_errCode = 0;
+  requestP->m_state = Request::RS_BUILDING;
   requestP->m_node_cnt = 0;
   requestP->m_cnt_active = 0;
+  requestP->m_rows = 0;
+  requestP->m_outstanding = 0;
   requestP->m_transId[0] = req->transId1;
   requestP->m_transId[1] = req->transId2;
-  requestP->m_node_mask.clear();
+  bzero(requestP->m_lookup_node_data, sizeof(requestP->m_lookup_node_data));
 
   const Uint32 reqInfo = req->requestInfo;
   Uint32 tmp = req->clientConnectPtr;
@@ -299,7 +490,7 @@ Dbspj::do_init(Request* requestP, const 
     //const Uint32 apiOpRec = lqhKeyReq->variableData[1];
     tmp = req->variableData[1];
     requestP->m_senderData = tmp;
-    requestP->m_senderRef = 0;
+    requestP->m_senderRef = senderRef;
   }
   else
   {
@@ -394,25 +585,46 @@ Dbspj::sendTCKEYREF(Signal* signal, Uint
   }
   else
   {
-    if (routeRef)
-    {
-      jam();
-      memmove(signal->theData+25, signal->theData, 4*TcKeyRef::SignalLength);
-      RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
-      ord->dstRef = ref;
-      ord->srcRef = reference();
-      ord->gsn = GSN_TCKEYREF;
-      ord->cnt = 0;
-      LinearSectionPtr ptr[3];
-      ptr[0].p = signal->theData+25;
-      ptr[0].sz = TcKeyRef::SignalLength;
-      sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
-		 ptr, 1);
-    }
-    else
-    {
-      ndbrequire(false);
-    }
+    jam();
+    memmove(signal->theData+25, signal->theData, 4*TcKeyRef::SignalLength);
+    RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
+    ord->dstRef = ref;
+    ord->srcRef = reference();
+    ord->gsn = GSN_TCKEYREF;
+    ord->cnt = 0;
+    LinearSectionPtr ptr[3];
+    ptr[0].p = signal->theData+25;
+    ptr[0].sz = TcKeyRef::SignalLength;
+    sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
+               ptr, 1);
+  }
+}
+
+void
+Dbspj::sendTCKEYCONF(Signal* signal, Uint32 len, Uint32 ref, Uint32 routeRef)
+{
+  const Uint32 nodeId = refToNode(ref);
+  const bool connectedToNode = getNodeInfo(nodeId).m_connected;
+  
+  if (likely(connectedToNode))
+  {
+    jam();
+    sendSignal(ref, GSN_TCKEYCONF, signal, len, JBB);
+  }
+  else
+  {
+    jam();
+    memmove(signal->theData+25, signal->theData, 4*len);
+    RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
+    ord->dstRef = ref;
+    ord->srcRef = reference();
+    ord->gsn = GSN_TCKEYCONF;
+    ord->cnt = 0;
+    LinearSectionPtr ptr[3];
+    ptr[0].p = signal->theData+25;
+    ptr[0].sz = len;
+    sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
+               ptr, 1);
   }
 }
 
@@ -490,6 +702,21 @@ Dbspj::execSCAN_FRAGREQ(Signal* signal)
       ctx.m_resultRef = req->resultRef;
       ctx.m_scanPrio = ScanFragReq::getScanPrio(req->requestInfo);
       ctx.m_savepointId = req->savePointId;
+      ctx.m_batch_size_rows = req->batch_size_rows;
+      ctx.m_start_signal = signal;
+      ctx.m_senderRef = signal->getSendersBlockRef();
+
+      if (handle.m_cnt > 1)
+      {
+        jam();
+        ctx.m_keyPtr.i = handle.m_ptr[ScanFragReq::KeyInfoSectionNum].i;
+      }
+      else
+      {
+        jam();
+        ctx.m_keyPtr.i = RNIL;
+      }
+
       err = build(ctx, requestPtr, treeReader, paramReader);
       if (unlikely(err != 0))
 	break;
@@ -507,16 +734,9 @@ Dbspj::execSCAN_FRAGREQ(Signal* signal)
     store_scan(requestPtr);
 
     release(ssPtr);
-    ssPtr.i = RNIL;
-    ssPtr.p = 0;
-    if (handle.m_cnt > 1)
-    {
-      jam();
-      handle.getSection(ssPtr, ScanFragReq::KeyInfoSectionNum);
-    }
     handle.clear();
 
-    start(signal, requestPtr, ssPtr);
+    start(signal, requestPtr);
     return;
   } while (0);
 
@@ -533,15 +753,18 @@ void
 Dbspj::do_init(Request* requestP, const ScanFragReq* req, Uint32 senderRef)
 {
   requestP->m_bits = 0;
+  requestP->m_errCode = 0;
+  requestP->m_state = Request::RS_BUILDING;
   requestP->m_node_cnt = 0;
   requestP->m_cnt_active = 0;
+  requestP->m_rows = 0;
+  requestP->m_outstanding = 0;
   requestP->m_senderRef = senderRef;
   requestP->m_senderData = req->senderData;
   requestP->m_transId[0] = req->transId1;
   requestP->m_transId[1] = req->transId2;
-  requestP->m_node_mask.clear();
   requestP->m_rootResultData = req->resultData;
-  requestP->m_currentNodePtrI = RNIL;
+  bzero(requestP->m_lookup_node_data, sizeof(requestP->m_lookup_node_data));
 }
 
 void
@@ -580,13 +803,14 @@ Dbspj::handle_early_scanfrag_ref(Signal*
  */
 Uint32
 Dbspj::build(Build_context& ctx,
-	     Ptr<Request> requestPtr,
+             Ptr<Request> requestPtr,
              SectionReader & tree,
              SectionReader & param)
 {
   Uint32 tmp0, tmp1;
   Uint32 err = DbspjErr::ZeroLengthQueryTree;
   ctx.m_cnt = 0;
+  ctx.m_scan_cnt = 0;
 
   tree.getWord(&tmp0);
   Uint32 loop = QueryTree::getNodeCnt(tmp0);
@@ -654,6 +878,7 @@ Dbspj::build(Build_context& ctx,
       DEBUG_CRASH();
       goto error;
     }
+
     const OpInfo* info = getOpInfo(node_op);
     if (unlikely(info == 0))
     {
@@ -673,12 +898,56 @@ Dbspj::build(Build_context& ctx,
     }
 
     /**
+     * only first node gets access to signal
+     */
+    ctx.m_start_signal = 0;
+
+    /**
      * TODO handle error, by aborting request
      */
     ndbrequire(ctx.m_cnt < NDB_ARRAY_SIZE(ctx.m_node_list));
     ctx.m_cnt++;
   }
   requestPtr.p->m_node_cnt = ctx.m_cnt;
+
+  if (ctx.m_scan_cnt > 1)
+  {
+    jam();
+    requestPtr.p->m_bits |= Request::RT_MULTI_SCAN;
+
+    /**
+     * Iff, multi-scan is non-bushy (normal case)
+     *   we don't strictly need RT_VAR_ALLOC for RT_ROW_BUFFERS
+     *   but could instead pop-row stack frame, 
+     *     however this is not implemented...
+     *
+     * so, use RT_VAR_ALLOC
+     */
+    if (requestPtr.p->m_bits & Request::RT_ROW_BUFFERS)
+    {
+      jam();
+      requestPtr.p->m_bits |= Request::RT_VAR_ALLOC;
+    }
+
+    {
+      /**
+       * If multi scan, then cursors are determined when one batch is complete
+       *   hence clear list here...
+       * But if it's single scan...the list will already contain the
+       *   only scan in the tree
+       */
+      Local_TreeNodeCursor_list list(m_treenode_pool, 
+                                     requestPtr.p->m_cursor_nodes);
+      ndbassert(list.noOfElements() > 1);
+      list.remove();
+    }
+  }
+  
+//#define JONAS_TESTING_ROW_BUFFERING
+#ifdef JONAS_TESTING_ROW_BUFFERING
+  requestPtr.p->m_bits |= Request::RT_VAR_ALLOC;
+#endif
+
   return 0;
 
 error:
@@ -698,7 +967,7 @@ Dbspj::createNode(Build_context& ctx, Pt
   if (m_treenode_pool.seize(requestPtr.p->m_arena, treeNodePtr))
   {
     DEBUG("createNode - seize -> ptrI: " << treeNodePtr.i);
-    new (treeNodePtr.p) TreeNode(ctx.m_cnt, requestPtr.i);
+    new (treeNodePtr.p) TreeNode(requestPtr.i);
     ctx.m_node_list[ctx.m_cnt] = treeNodePtr;
     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
     list.addLast(treeNodePtr);
@@ -709,312 +978,807 @@ Dbspj::createNode(Build_context& ctx, Pt
 
 void
 Dbspj::start(Signal* signal,
-	     Ptr<Request> requestPtr,
-	     SegmentedSectionPtr keyPtr)
+             Ptr<Request> requestPtr)
 {
-  Ptr<TreeNode> nodePtr;
+  if (requestPtr.p->m_bits & Request::RT_NEED_PREPARE)
   {
-    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
-    ndbrequire(list.first(nodePtr));
-  }
-  ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_start != 0);
-  (this->*(nodePtr.p->m_info->m_start))(signal, requestPtr, nodePtr, keyPtr);
-}
-
-void
-Dbspj::nodeFinished(Signal* signal,
-                    Ptr<Request> requestPtr,
-                    Ptr<TreeNode> treeNodePtr)
-{
-  ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
-  treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
+    jam();
+    requestPtr.p->m_outstanding = 0;
+    requestPtr.p->m_state = Request::RS_PREPARING;
 
-  Uint32 cnt = requestPtr.p->m_cnt_active;
-  DEBUG("nodeFinished(" << cnt << ")");
-  ndbrequire(cnt);
-  requestPtr.p->m_cnt_active = cnt - 1;
+    Ptr<TreeNode> nodePtr;
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
+    {
+      jam();
+      ndbrequire(nodePtr.p->m_info != 0);
+      if (nodePtr.p->m_info->m_prepare != 0)
+      {
+        jam();
+        (this->*(nodePtr.p->m_info->m_prepare))(signal, requestPtr, nodePtr);
+      }
+    }
 
-  if (cnt == 1)
-  {
-    jam();
-    DEBUG("->requestFinished");
     /**
-     * TODO add complete/abort phase
+     * preferably RT_NEED_PREPARE should only be set if blocking
+     * calls are used, in which case m_outstanding should have been increased
      */
-    cleanup(requestPtr);
+    ndbassert(requestPtr.p->m_outstanding);
   }
+  
+  checkPrepareComplete(signal, requestPtr, 0);
 }
 
 void
-Dbspj::cleanup(Ptr<Request> requestPtr)
+Dbspj::checkPrepareComplete(Signal * signal, Ptr<Request> requestPtr, 
+                            Uint32 cnt)
 {
-  {
-    Ptr<TreeNode> nodePtr;
-    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
-    for (list.first(nodePtr); !nodePtr.isNull(); )
-    {
-      jam();
-      ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_cleanup != 0);
-      (this->*(nodePtr.p->m_info->m_cleanup))(requestPtr, nodePtr);
+  ndbrequire(requestPtr.p->m_outstanding >= cnt);
+  requestPtr.p->m_outstanding -= cnt;
 
-      Ptr<TreeNode> tmp = nodePtr;
-      list.next(nodePtr);
-      m_treenode_pool.release(tmp);
-    }
-  }
-  if (requestPtr.p->isScan())
+  if (requestPtr.p->m_outstanding == 0)
   {
     jam();
-#ifdef VM_TRACE
+    Ptr<TreeNode> nodePtr;
     {
-      Request key;
-      key.m_transId[0] = requestPtr.p->m_transId[0];
-      key.m_transId[1] = requestPtr.p->m_transId[1];
-      key.m_senderData = requestPtr.p->m_senderData;
-      Ptr<Request> tmp;
-      ndbrequire(m_scan_request_hash.find(tmp, key));
+      Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+      ndbrequire(list.first(nodePtr));
     }
-#endif
-    m_scan_request_hash.remove(requestPtr);
+    ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_start != 0);
+    (this->*(nodePtr.p->m_info->m_start))(signal, requestPtr, nodePtr);
   }
-  else
+}
+
+void
+Dbspj::checkBatchComplete(Signal * signal, Ptr<Request> requestPtr, 
+                              Uint32 cnt)
+{
+  ndbrequire(requestPtr.p->m_outstanding >= cnt);
+  requestPtr.p->m_outstanding -= cnt;
+
+  if (requestPtr.p->m_outstanding == 0)
   {
     jam();
-#ifdef VM_TRACE
-    {
-      Request key;
-      key.m_transId[0] = requestPtr.p->m_transId[0];
-      key.m_transId[1] = requestPtr.p->m_transId[1];
-      key.m_senderData = requestPtr.p->m_senderData;
-      Ptr<Request> tmp;
-      ndbrequire(m_lookup_request_hash.find(tmp, key));
-    }
-#endif
-    m_lookup_request_hash.remove(requestPtr);
+    batchComplete(signal, requestPtr);
   }
-  ArenaHead ah = requestPtr.p->m_arena;
-  m_request_pool.release(requestPtr);
-  m_arenaAllocator.release(ah);
 }
 
 void
-Dbspj::cleanup_common(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
+Dbspj::batchComplete(Signal* signal, Ptr<Request> requestPtr)
 {
-  jam();
+  ndbrequire(requestPtr.p->m_outstanding == 0); // "definition" of batchComplete
 
-  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-  {
-    Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
-    list.release();
-  }
+  bool is_complete = requestPtr.p->m_cnt_active == 0;
+  bool need_complete_phase = requestPtr.p->m_bits & Request::RT_NEED_COMPLETE;
 
+  if (requestPtr.p->isLookup())
   {
-    Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
-    pattern.release();
+    ndbassert(requestPtr.p->m_cnt_active == 0);
   }
 
+  if (!is_complete || (is_complete && need_complete_phase == false))
   {
-    Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
-    pattern.release();
-  }
+    /**
+     * one batch complete, and either 
+     *   - request not complete
+     *   - or not complete_phase needed
+     */
+    jam();
 
-  if (treeNodePtr.p->m_send.m_keyInfoPtrI != RNIL)
+    if (requestPtr.p->m_state & Request::RS_ABORTING == 0)
+      ndbassert(is_complete);
+    
+    sendConf(signal, requestPtr, is_complete);
+  }
+  else if (is_complete && need_complete_phase)
   {
     jam();
-    releaseSection(treeNodePtr.p->m_send.m_keyInfoPtrI);
+    /**
+     * run complete-phase
+     */
+    complete(signal, requestPtr);
+    return;
   }
 
-  if (treeNodePtr.p->m_send.m_attrInfoPtrI != RNIL)
+  if (requestPtr.p->m_cnt_active == 0)
   {
     jam();
-    releaseSection(treeNodePtr.p->m_send.m_attrInfoPtrI);
+    /**
+     * request completed
+     */
+    cleanup(requestPtr);
+  }
+  else if ((requestPtr.p->m_bits & Request::RT_MULTI_SCAN) != 0)
+  {
+    jam();
+    /**
+     * release unneeded buffers and position cursor for SCAN_NEXTREQ
+     */
+    releaseScanBuffers(requestPtr);
+  }
+  else if ((requestPtr.p->m_bits & Request::RT_ROW_BUFFERS) != 0)
+  {
+    jam();
+    /**
+     * if not multiple scans in request, simply release all pages allocated
+     * for row buffers (all rows will be released anyway)
+     */
+    releaseRequestBuffers(requestPtr, true);
   }
 }
 
-/**
- * Processing of signals from LQH
- */
 void
-Dbspj::execLQHKEYREF(Signal* signal)
+Dbspj::sendConf(Signal* signal, Ptr<Request> requestPtr, bool is_complete)
 {
-  jamEntry();
+  if (requestPtr.p->isScan())
+  {
+    if (requestPtr.p->m_errCode == 0)
+    {
+      jam();
+      ScanFragConf * conf=
+        reinterpret_cast<ScanFragConf*>(signal->getDataPtrSend());
+      conf->senderData = requestPtr.p->m_senderData;
+      conf->transId1 = requestPtr.p->m_transId[0];
+      conf->transId2 = requestPtr.p->m_transId[1];
+      conf->completedOps = requestPtr.p->m_rows;
+      conf->fragmentCompleted = is_complete ? 1 : 0;
+      conf->total_len = 0; // Not supported...
 
-  const LqhKeyRef* ref = reinterpret_cast<const LqhKeyRef*>(signal->getDataPtr());
+      requestPtr.p->m_rows = 0; // reset for next batch
+      sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGCONF, signal,
+                 ScanFragConf::SignalLength, JBB);
+    }
+    else
+    {
+      jam();
+      ndbrequire(is_complete);
+      ScanFragRef * ref=
+        reinterpret_cast<ScanFragRef*>(signal->getDataPtrSend());
+      ref->senderData = requestPtr.p->m_senderData;
+      ref->transId1 = requestPtr.p->m_transId[0];
+      ref->transId2 = requestPtr.p->m_transId[1];
+      ref->errorCode = requestPtr.p->m_errCode;
 
-  DEBUG("execLQHKEYREF, errorCode:" << ref->errorCode);
-  Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr);
+      sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGREF, signal,
+                 ScanFragRef::SignalLength, JBB);
+    }
+  }
+  else
+  {
+    ndbassert(is_complete);
+    if (requestPtr.p->m_errCode)
+    {
+      jam();
+      Uint32 resultRef = getResultRef(requestPtr);
+      TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
+      ref->connectPtr = requestPtr.p->m_senderData;
+      ref->transId[0] = requestPtr.p->m_transId[0];
+      ref->transId[1] = requestPtr.p->m_transId[1];
+      ref->errorCode = requestPtr.p->m_errCode;
+      ref->errorData = 0;
 
-  Ptr<Request> requestPtr;
-  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+      sendTCKEYREF(signal, resultRef, requestPtr.p->m_senderRef);
+    }
+  }
+}
 
-  ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF);
-  (this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal,
-                                                    requestPtr,
-                                                    treeNodePtr);
+Uint32
+Dbspj::getResultRef(Ptr<Request> requestPtr)
+{
+  Ptr<TreeNode> nodePtr;
+  Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+  for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
+  {
+    if (nodePtr.p->m_info == &g_LookupOpInfo)
+    {
+      jam();
+      return nodePtr.p->m_lookup_data.m_api_resultRef;
+    }
+  }
+  ndbrequire(false);
+  return 0;
 }
 
 void
-Dbspj::execLQHKEYCONF(Signal* signal)
+Dbspj::releaseScanBuffers(Ptr<Request> requestPtr)
 {
-  jamEntry();
-
-  DEBUG("execLQHKEYCONF");
-
-  const LqhKeyConf* conf = reinterpret_cast<const LqhKeyConf*>(signal->getDataPtr());
   Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, conf->opPtr);
+  {
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    list.first(treeNodePtr);
+  }
 
-  Ptr<Request> requestPtr;
-  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+  /**
+   * This is calling recursive function...buh!
+   *   but i can't figure out how to do it someother way...
+   */
 
-  ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF);
-  (this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal,
-                                                     requestPtr,
-                                                     treeNodePtr);
+  /**
+   * Needs to be atleast 1 active otherwise we should have
+   *   taken the cleanup "path" in batchComplete
+   */
+  ndbrequire(releaseScanBuffers(requestPtr, treeNodePtr) > 0);
 }
 
 void
-Dbspj::execSCAN_FRAGREF(Signal* signal)
+Dbspj::registerCursor(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
 {
-  jamEntry();
-  const ScanFragRef* ref = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
-
-  DEBUG("execSCAN_FRAGREF, errorCode:" << ref->errorCode);
-
-  Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, ref->senderData);
-  Ptr<Request> requestPtr;
-  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
-
-  ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF);
-  (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal,
-                                                       requestPtr,
-                                                       treeNodePtr);
+  Local_TreeNodeCursor_list list(m_treenode_pool, requestPtr.p->m_cursor_nodes);
+  list.add(treeNodePtr);
 }
 
-void
-Dbspj::execSCAN_HBREP(Signal* signal)
+Uint32
+Dbspj::releaseScanBuffers(Ptr<Request> requestPtr, 
+                          Ptr<TreeNode> treeNodePtr)
 {
-  jamEntry();
+  Uint32 active_child = 0;
 
-  Uint32 senderData = signal->theData[0];
-  //Uint32 transId[2] = { signal->theData[1], signal->theData[2] };
+  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+  Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+  Dependency_map::ConstDataBufferIterator it;
+  for (list.first(it); !it.isNull(); list.next(it))
+  {
+    jam();
+    Ptr<TreeNode> childPtr;
+    m_treenode_pool.getPtr(childPtr, * it.data);
+    active_child += releaseScanBuffers(requestPtr, childPtr);
+  }
 
-  Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, senderData);
-  Ptr<Request> requestPtr;
-  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+  const bool active = treeNodePtr.p->m_state == TreeNode::TN_ACTIVE;
+  if (active_child == 0)
+  {
+    jam();
 
-  Uint32 ref = requestPtr.p->m_senderRef;
-  signal->theData[0] = requestPtr.p->m_senderData;
-  sendSignal(ref, GSN_SCAN_HBREP, signal, 3, JBB);
+    /**
+     * If there is no active children,
+     *   then we can release our own (optionally) buffered rows
+     */
+    if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
+    {
+      jam();
+      releaseNodeRows(requestPtr, treeNodePtr);
+    }
+    
+    /**
+     * If we have no active children,
+     *   and we ourself is active (i.e not consumed all rows originating
+     *   from parent rows)
+     *
+     * Then, this is a position that execSCAN_NEXTREQ should continue
+     */
+    if (active)
+    {
+      jam();
+      registerCursor(requestPtr, treeNodePtr);
+    }
+  }
+  
+  return active_child + (active ? 1 : 0);
 }
 
 void
-Dbspj::execSCAN_FRAGCONF(Signal* signal)
+Dbspj::releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
 {
-  jamEntry();
-  DEBUG("execSCAN_FRAGCONF");
+  /**
+   * Release all rows associated with tree node
+   */
 
-  const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
+  // only when var-alloc, or else stack will be popped wo/ consideration
+  // to individual rows
+  ndbassert(requestPtr.p->m_bits & Request::RT_VAR_ALLOC);
+  ndbassert(treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER);
 
-  Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, conf->senderData);
-  Ptr<Request> requestPtr;
-  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+  /**
+   * Two ways to iterate...
+   */
+  if ((treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP) == 0)
+  {
+    jam();
+    Uint32 cnt = 0;
+    SLFifoRowListIterator iter;
+    for (first(requestPtr, treeNodePtr, iter); !iter.isNull(); )
+    {
+      jam();
+      RowRef pos = iter.m_ref;
+      next(iter);
+      releaseRow(requestPtr, pos);
+      cnt ++;
+    }
+    treeNodePtr.p->m_row_list.init();
+    ndbout_c("SLFifoRowListIterator: released %u rows!", cnt);
+  }
+  else
+  {
+    jam();
+    Uint32 cnt = 0;
+    RowMapIterator iter;
+    for (first(requestPtr, treeNodePtr, iter); !iter.isNull(); )
+    {
+      jam();
+      RowRef pos = iter.m_ref;
+      // this could be made more efficient by not actually seting up m_row_ptr
+      next(iter); 
+      releaseRow(requestPtr, pos);
+      cnt++;
+    }
+    treeNodePtr.p->m_row_map.init();
+    ndbout_c("RowMapIterator: released %u rows!", cnt);
+  }
+}
 
-  ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF);
-  (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal,
-                                                        requestPtr,
-                                                        treeNodePtr);
+void
+Dbspj::releaseRow(Ptr<Request> requestPtr, RowRef pos)
+{
+  ndbassert(requestPtr.p->m_bits & Request::RT_VAR_ALLOC);
+  ndbassert(pos.m_allocator == 1);
+  Ptr<RowPage> ptr;
+  m_page_pool.getPtr(ptr, pos.m_page_id);
+  ((Var_page*)ptr.p)->free_record(pos.m_page_pos, Var_page::CHAIN);
+  Uint32 free_space = ((Var_page*)ptr.p)->free_space;
+  if (free_space == 0)
+  {
+    jam();
+    LocalDLFifoList<RowPage> list(m_page_pool, 
+                                  requestPtr.p->m_rowBuffer.m_page_list);
+    list.remove(ptr);
+    releasePage(ptr);
+  }
+  else if (free_space > requestPtr.p->m_rowBuffer.m_var.m_free)
+  {
+    LocalDLFifoList<RowPage> list(m_page_pool, 
+                                  requestPtr.p->m_rowBuffer.m_page_list);
+    list.remove(ptr);
+    list.addLast(ptr);
+    requestPtr.p->m_rowBuffer.m_var.m_free = free_space;
+  }
 }
 
 void
-Dbspj::execSCAN_NEXTREQ(Signal* signal)
+Dbspj::releaseRequestBuffers(Ptr<Request> requestPtr, bool reset)
 {
-  jamEntry();
-  const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0];
+  /**
+   * Release all pages for request
+   */
+  {
+    {    
+      LocalDLFifoList<RowPage> list(m_page_pool, 
+                                    requestPtr.p->m_rowBuffer.m_page_list);
+      if (!list.isEmpty())
+      {
+        jam();
+        Ptr<RowPage> first, last;
+        list.first(first);
+        list.last(last);
+        releasePages(first.i, last);
+        list.remove();
+      }
+    }
+    requestPtr.p->m_rowBuffer.stack_init();
+  }
 
-  Request key;
-  key.m_transId[0] = req->transId1;
-  key.m_transId[1] = req->transId2;
-  key.m_senderData = req->senderData;
+  if (reset)
+  {
+    Ptr<TreeNode> nodePtr;
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
+    {
+      jam();
+      if (nodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
+      {
+        jam();
+        if (nodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP)
+        {
+          jam();
+          nodePtr.p->m_row_map.init();
+        }
+        else
+        {
+          nodePtr.p->m_row_list.init();
+        }
+      }
+    }
+  }
+}
 
-  Ptr<Request> requestPtr;
-  if (unlikely(!m_scan_request_hash.find(requestPtr, key)))
+void
+Dbspj::reportBatchComplete(Signal * signal, Ptr<Request> requestPtr, 
+                           Ptr<TreeNode> treeNodePtr)
+{
+  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+  Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+  Dependency_map::ConstDataBufferIterator it;
+  for (list.first(it); !it.isNull(); list.next(it))
   {
     jam();
-    ndbrequire(req->closeFlag == ZTRUE);
-    DEBUG(key.m_senderData << " Received SCAN_NEXTREQ with close when closed");
-    return;
+    Ptr<TreeNode> childPtr;
+    m_treenode_pool.getPtr(childPtr, * it.data);
+    if (childPtr.p->m_bits & TreeNode::T_NEED_REPORT_BATCH_COMPLETED)
+    {
+      jam();
+      ndbrequire(childPtr.p->m_info != 0 && 
+                 childPtr.p->m_info->m_parent_batch_complete !=0 );
+      (this->*(childPtr.p->m_info->m_parent_batch_complete))(signal, 
+                                                             requestPtr, 
+                                                             childPtr);
+    }
   }
+}
 
-  Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, requestPtr.p->m_currentNodePtrI);
+void
+Dbspj::abort(Signal* signal, Ptr<Request> requestPtr, Uint32 errCode)
+{
+  jam();
 
-  if (treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_CLOSING)
+  if ((requestPtr.p->m_state & Request::RS_ABORTING) != 0)
   {
     jam();
-    /**
-     * Duplicate of a close request already sent to datanodes.
-     * Ignore this and wait for reply on pending request.
-     */
-    DEBUG("execSCAN_NEXTREQ, is SF_CLOSING -> ignore request");
     return;
   }
 
-  if (req->closeFlag == ZTRUE)                             // Requested close scan
+  requestPtr.p->m_state |= Request::RS_ABORTING;
+  requestPtr.p->m_errCode = errCode;
+  
   {
-    if (treeNodePtr.p->m_scanfrag_data.m_scan_status == 2) // Is closed on LQH
+    Ptr<TreeNode> nodePtr;
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
     {
       jam();
-      ndbassert (treeNodePtr.p->m_scanfrag_data.m_scan_state != ScanFragData::SF_RUNNING);
+      ndbrequire(nodePtr.p->m_info != 0);
+      if (nodePtr.p->m_info->m_abort != 0)
+      {
+        jam();
+        (this->*(nodePtr.p->m_info->m_abort))(signal, requestPtr, nodePtr);
+      }
+    }
+  }
+  
+  checkBatchComplete(signal, requestPtr, 0);
+}
 
-      ScanFragConf* conf = reinterpret_cast<ScanFragConf*>(signal->getDataPtrSend());
-      conf->senderData = requestPtr.p->m_senderData;
-      conf->transId1 = requestPtr.p->m_transId[0];
-      conf->transId2 = requestPtr.p->m_transId[1];
-      conf->completedOps = 0;
-      conf->fragmentCompleted = 2; // =ZSCAN_FRAG_CLOSED -> Finished...
-      conf->total_len = 0; // Not supported...
+Uint32
+Dbspj::nodeFail(Signal* signal, Ptr<Request> requestPtr,
+                NdbNodeBitmask nodes)
+{
+  Uint32 cnt = 0;
+  Uint32 iter = 0;
+  Uint32 outstanding = requestPtr.p->m_outstanding;
+  Uint32 aborting = requestPtr.p->m_state & Request::RS_ABORTING;
 
-      DEBUG("execSCAN_NEXTREQ(close), LQH has conf'ed 'w/ ZSCAN_FRAG_CLOSED");
-      sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGCONF, signal,
-                 ScanFragConf::SignalLength, JBB);
+  {
+    Ptr<TreeNode> nodePtr;
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
+    {
+      jam();
+      ndbrequire(nodePtr.p->m_info != 0);
+      if (nodePtr.p->m_info->m_execNODE_FAILREP != 0)
+      {
+        jam();
+        iter ++;
+        cnt += (this->*(nodePtr.p->m_info->m_execNODE_FAILREP))(signal,
+                                                                requestPtr,
+                                                                nodePtr, nodes);
+      }
+    }
+  }
 
-      cleanup(requestPtr);
-      return;
+  if (cnt == 0)
+  {
+    jam();
+    /**
+     * None of the operations needed NodeFailRep "action"
+     *   check if our TC has died...but...only needed in
+     *   scan case...for lookup...not so...
+     */
+    if (requestPtr.p->isScan() &&
+        nodes.get(refToNode(requestPtr.p->m_senderRef)))
+    {
+      jam();
+      abort(signal, requestPtr, DbspjErr::NodeFailure);
     }
-    else if (treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_RUNNING)
+  }
+  else
+  {
+    jam();
+    abort(signal, requestPtr, DbspjErr::NodeFailure);
+
+    if (aborting && outstanding && requestPtr.p->m_outstanding == 0)
     {
       jam();
-      DEBUG("execSCAN_NEXTREQ, make PENDING CLOSE");
-      treeNodePtr.p->m_scanfrag_data.m_pending_close = true;
-      return;
+      checkBatchComplete(signal, requestPtr, 0);
     }
-    // else; fallthrough & send to datanodes:
   }
 
-  ndbassert (!treeNodePtr.p->m_scanfrag_data.m_pending_close);
-  ndbassert (treeNodePtr.p->m_scanfrag_data.m_scan_status != 2);
-  ndbrequire(treeNodePtr.p->m_info != 0 &&
-             treeNodePtr.p->m_info->m_execSCAN_NEXTREQ != 0);
-  (this->*(treeNodePtr.p->m_info->m_execSCAN_NEXTREQ))(signal,
-                                                       requestPtr, treeNodePtr);
+  return cnt + iter;
 }
 
 void
-Dbspj::execTRANSID_AI(Signal* signal)
+Dbspj::complete(Signal* signal, Ptr<Request> requestPtr)
 {
-  jamEntry();
-  DEBUG("execTRANSID_AI");
-  TransIdAI * req = (TransIdAI *)signal->getDataPtr();
-  Uint32 ptrI = req->connectPtr;
-  //Uint32 transId[2] = { req->transId[0], req->transId[1] };
-
-  Ptr<TreeNode> treeNodePtr;
-  m_treenode_pool.getPtr(treeNodePtr, ptrI);
+  /**
+   * we need to run complete-phase before sending last SCAN_FRAGCONF
+   */
+  Uint32 is_abort = requestPtr.p->m_state & Request::RS_ABORTING;
+  requestPtr.p->m_state = Request::RS_COMPLETING | is_abort;
+  
+  // clear bit so that next batchComplete()
+  // will continue to cleanup
+  ndbassert((requestPtr.p->m_bits & Request::RT_NEED_COMPLETE) != 0);
+  requestPtr.p->m_bits &= ~(Uint32)Request::RT_NEED_COMPLETE;
+  requestPtr.p->m_outstanding = 0;
+  {
+    Ptr<TreeNode> nodePtr;
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
+    {
+      jam();
+      ndbrequire(nodePtr.p->m_info != 0);
+      if (nodePtr.p->m_info->m_complete != 0)
+      {
+        jam();
+        (this->*(nodePtr.p->m_info->m_complete))(signal, requestPtr, nodePtr);
+      }
+    }
+
+    /**
+     * preferably RT_NEED_COMPLETE should only be set if blocking
+     * calls are used, in which case m_outstanding should have been increased
+     *
+     * BUT: scanIndex does DIH_SCAN_TAB_COMPLETE_REP which does not send reply
+     *      so it not really "blocking"
+     *      i.e remove assert
+     */
+    //ndbassert(requestPtr.p->m_outstanding);
+  }
+  checkBatchComplete(signal, requestPtr, 0);
+}
+
+void
+Dbspj::cleanup(Ptr<Request> requestPtr)
+{
+  {
+    Ptr<TreeNode> nodePtr;
+    Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
+    for (list.first(nodePtr); !nodePtr.isNull(); )
+    {
+      jam();
+      ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_cleanup != 0);
+      (this->*(nodePtr.p->m_info->m_cleanup))(requestPtr, nodePtr);
+
+      Ptr<TreeNode> tmp = nodePtr;
+      list.next(nodePtr);
+      m_treenode_pool.release(tmp);
+    }
+  }
+  if (requestPtr.p->isScan())
+  {
+    jam();
+#ifdef VM_TRACE
+    {
+      Request key;
+      key.m_transId[0] = requestPtr.p->m_transId[0];
+      key.m_transId[1] = requestPtr.p->m_transId[1];
+      key.m_senderData = requestPtr.p->m_senderData;
+      Ptr<Request> tmp;
+      ndbrequire(m_scan_request_hash.find(tmp, key));
+    }
+#endif
+    m_scan_request_hash.remove(requestPtr);
+  }
+  else
+  {
+    jam();
+#ifdef VM_TRACE
+    {
+      Request key;
+      key.m_transId[0] = requestPtr.p->m_transId[0];
+      key.m_transId[1] = requestPtr.p->m_transId[1];
+      key.m_senderData = requestPtr.p->m_senderData;
+      Ptr<Request> tmp;
+      ndbrequire(m_lookup_request_hash.find(tmp, key));
+    }
+#endif
+    m_lookup_request_hash.remove(requestPtr);
+  }
+  releaseRequestBuffers(requestPtr, false);
+  ArenaHead ah = requestPtr.p->m_arena;
+  m_request_pool.release(requestPtr);
+  m_arenaAllocator.release(ah);
+}
+
+void
+Dbspj::cleanup_common(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
+{
+  jam();
+
+  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+  {
+    Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+    list.release();
+  }
+
+  {
+    Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
+    pattern.release();
+  }
+
+  {
+    Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
+    pattern.release();
+  }
+
+  if (treeNodePtr.p->m_send.m_keyInfoPtrI != RNIL)
+  {
+    jam();
+    releaseSection(treeNodePtr.p->m_send.m_keyInfoPtrI);
+  }
+
+  if (treeNodePtr.p->m_send.m_attrInfoPtrI != RNIL)
+  {
+    jam();
+    releaseSection(treeNodePtr.p->m_send.m_attrInfoPtrI);
+  }
+}
+
+/**
+ * Processing of signals from LQH
+ */
+void
+Dbspj::execLQHKEYREF(Signal* signal)
+{
+  jamEntry();
+
+  const LqhKeyRef* ref = reinterpret_cast<const LqhKeyRef*>(signal->getDataPtr());
+
+  DEBUG("execLQHKEYREF, errorCode:" << ref->errorCode);
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr);
+
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+
+  ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF);
+  (this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal,
+                                                    requestPtr,
+                                                    treeNodePtr);
+}
+
+void
+Dbspj::execLQHKEYCONF(Signal* signal)
+{
+  jamEntry();
+
+  DEBUG("execLQHKEYCONF");
+
+  const LqhKeyConf* conf = reinterpret_cast<const LqhKeyConf*>(signal->getDataPtr());
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, conf->opPtr);
+
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+
+  ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF);
+  (this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal,
+                                                     requestPtr,
+                                                     treeNodePtr);
+}
+
+void
+Dbspj::execSCAN_FRAGREF(Signal* signal)
+{
+  jamEntry();
+  const ScanFragRef* ref = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
+
+  DEBUG("execSCAN_FRAGREF, errorCode:" << ref->errorCode);
+
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, ref->senderData);
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+
+  ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF);
+  (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal,
+                                                       requestPtr,
+                                                       treeNodePtr);
+}
+
+void
+Dbspj::execSCAN_HBREP(Signal* signal)
+{
+  jamEntry();
+
+  Uint32 senderData = signal->theData[0];
+  //Uint32 transId[2] = { signal->theData[1], signal->theData[2] };
+
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, senderData);
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+
+  Uint32 ref = requestPtr.p->m_senderRef;
+  signal->theData[0] = requestPtr.p->m_senderData;
+  sendSignal(ref, GSN_SCAN_HBREP, signal, 3, JBB);
+}
+
+void
+Dbspj::execSCAN_FRAGCONF(Signal* signal)
+{
+  jamEntry();
+  DEBUG("execSCAN_FRAGCONF");
+
+  const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
+
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, conf->senderData);
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+
+  ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF);
+  (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal,
+                                                        requestPtr,
+                                                        treeNodePtr);
+}
+
+void
+Dbspj::execSCAN_NEXTREQ(Signal* signal)
+{
+  jamEntry();
+  const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0];
+
+  Request key;
+  key.m_transId[0] = req->transId1;
+  key.m_transId[1] = req->transId2;
+  key.m_senderData = req->senderData;
+
+  Ptr<Request> requestPtr;
+  if (unlikely(!m_scan_request_hash.find(requestPtr, key)))
+  {
+    jam();
+    ndbrequire(req->closeFlag == ZTRUE);
+    return;
+  }
+
+  if (req->closeFlag == ZTRUE)  // Requested close scan
+  {
+    jam();
+    abort(signal, requestPtr, 0);
+    return;
+  }
+
+  ndbrequire(requestPtr.p->m_outstanding == 0);
+
+  {
+    /**
+     * Scroll all relevant cursors...
+     */
+    Ptr<TreeNode> treeNodePtr;
+    Local_TreeNodeCursor_list list(m_treenode_pool, 
+                                   requestPtr.p->m_cursor_nodes);
+    for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
+    {
+      jam();
+      ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
+      ndbrequire(treeNodePtr.p->m_info != 0 &&
+                 treeNodePtr.p->m_info->m_execSCAN_NEXTREQ != 0);
+      (this->*(treeNodePtr.p->m_info->m_execSCAN_NEXTREQ))(signal, 
+                                                           requestPtr, 
+                                                           treeNodePtr);
+    }
+  }
+}
+
+void
+Dbspj::execTRANSID_AI(Signal* signal)
+{
+  jamEntry();
+  DEBUG("execTRANSID_AI");
+  TransIdAI * req = (TransIdAI *)signal->getDataPtr();
+  Uint32 ptrI = req->connectPtr;
+  //Uint32 transId[2] = { req->transId[0], req->transId[1] };
+
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, ptrI);
   Ptr<Request> requestPtr;
   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
 
@@ -1036,177 +1800,1642 @@ Dbspj::execTRANSID_AI(Signal* signal)
    * build easy-access-array for row
    */
   Uint32 tmp[2+MAX_ATTRIBUTES_IN_TABLE];
-  RowRef::Header* const header = reinterpret_cast<RowRef::Header*>(tmp);
+  RowPtr::Header* const header = reinterpret_cast<RowPtr::Header*>(tmp);
 
   Uint32 cnt = buildRowHeader(header, dataPtr);
-  ndbassert(header->m_len <= 1+MAX_ATTRIBUTES_IN_TABLE);
+  ndbassert(header->m_len < NDB_ARRAY_SIZE(tmp));
 
-  /**
-   * TODO: If row needs to be buffered (m_bits & ROW_BUFFER)
-   *   we should here allocate a row, and store it...
-   */
-  struct RowRef row;
-  row.m_type = RowRef::RT_SECTION;
+  struct RowPtr row;
+  row.m_type = RowPtr::RT_SECTION;
   row.m_src_node_ptrI = treeNodePtr.i;
-  row.m_src_node_no = treeNodePtr.p->m_node_no;
-  row.m_row_data.m_section.m_header = (RowRef::Header*)tmp;
+  row.m_row_data.m_section.m_header = (RowPtr::Header*)tmp;
   row.m_row_data.m_section.m_dataPtr.assign(dataPtr);
   Uint32 rootStreamId = 0;
+
   getCorrelationData(row.m_row_data.m_section, 
                      cnt - 1, 
                      rootStreamId, 
                      row.m_src_correlation);
+
+  if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
+  {
+    jam();
+    Uint32 err = storeRow(requestPtr, treeNodePtr, row);
+    ndbrequire(err == 0);
+  }
+  
   ndbrequire(requestPtr.p->m_rootResultData == rootStreamId);
   ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execTRANSID_AI);
+  
   (this->*(treeNodePtr.p->m_info->m_execTRANSID_AI))(signal,
                                                      requestPtr,
                                                      treeNodePtr,
                                                      row);
-  release(row.m_row_data.m_section.m_dataPtr);
+  release(dataPtr);
+}
+
+Uint32
+Dbspj::storeRow(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr, RowPtr &row)
+{
+  ndbassert(row.m_type == RowPtr::RT_SECTION);
+  SegmentedSectionPtr dataPtr = row.m_row_data.m_section.m_dataPtr;
+  Uint32 * headptr = (Uint32*)row.m_row_data.m_section.m_header;
+  Uint32 headlen = 1 + row.m_row_data.m_section.m_header->m_len;
+
+  /**
+   * If rows are not in map, then they are kept in linked list
+   */
+  Uint32 linklen = (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP)?
+    0 : 2;
+
+  Uint32 totlen = 0;
+  totlen += dataPtr.sz;
+  totlen += headlen;
+  totlen += linklen;
+
+  RowRef ref;
+  Uint32 * dstptr = 0;
+  if ((requestPtr.p->m_bits & Request::RT_VAR_ALLOC) == 0)
+  {
+    jam();
+    dstptr = stackAlloc(requestPtr.p->m_rowBuffer, ref, totlen);
+  }
+  else
+  {
+    jam();
+    dstptr = varAlloc(requestPtr.p->m_rowBuffer, ref, totlen);
+  }
+
+  if (unlikely(dstptr == 0))
+  {
+    jam();
+    return DbspjErr::OutOfRowMemory;
+  }
+
+  row.m_type = RowPtr::RT_LINEAR;
+  row.m_row_data.m_linear.m_row_ref = ref;
+  row.m_row_data.m_linear.m_header = (RowPtr::Header*)(dstptr + linklen);
+  row.m_row_data.m_linear.m_data = dstptr + linklen + headlen;
+  
+  memcpy(dstptr + linklen, headptr, 4 * headlen);
+  copy(dstptr + linklen + headlen, dataPtr);
+
+  if (linklen)
+  {
+    jam();
+    NullRowRef.copyto_link(dstptr); // Null terminate list...
+    add_to_list(treeNodePtr.p->m_row_list, ref);
+  }
+  else
+  {
+    jam();
+    return add_to_map(requestPtr, treeNodePtr, row.m_src_correlation, ref);
+  }
+
+  return 0;
+}
+
+void
+Dbspj::setupRowPtr(Ptr<TreeNode> treeNodePtr,
+                   RowPtr& row, RowRef ref, const Uint32 * src)
+{
+  Uint32 linklen = (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP)?
+    0 : 2;
+
+  row.m_type = RowPtr::RT_LINEAR;
+  row.m_row_data.m_linear.m_row_ref = ref;
+  row.m_row_data.m_linear.m_header = (RowPtr::Header*)(src + linklen);
+  row.m_row_data.m_linear.m_data = src + linklen + 
+    row.m_row_data.m_linear.m_header->m_len;
+}
+
+void
+Dbspj::add_to_list(SLFifoRowList & list, RowRef rowref)
+{
+  if (list.isNull())
+  {
+    jam();
+    list.m_first_row_page_id = rowref.m_page_id;
+    list.m_first_row_page_pos = rowref.m_page_pos;
+  }
+  else
+  {
+    jam();
+    /**
+     * add last to list
+     */
+    RowRef last;
+    last.m_allocator = rowref.m_allocator;
+    last.m_page_id = list.m_last_row_page_id;
+    last.m_page_pos = list.m_last_row_page_pos;
+    Uint32 * rowptr;
+    if (rowref.m_allocator == 0)
+    {
+      jam();
+      rowptr = get_row_ptr_stack(last);
+    }
+    else
+    {
+      jam();
+      rowptr = get_row_ptr_var(last);
+    }
+    rowref.copyto_link(rowptr);
+  }
+
+  list.m_last_row_page_id = rowref.m_page_id;
+  list.m_last_row_page_pos = rowref.m_page_pos;
+}
+
+Uint32 *
+Dbspj::get_row_ptr_stack(RowRef pos)
+{
+  ndbassert(pos.m_allocator == 0);
+  Ptr<RowPage> ptr;
+  m_page_pool.getPtr(ptr, pos.m_page_id);
+  return ptr.p->m_data + pos.m_page_pos;
+}
+
+Uint32 *
+Dbspj::get_row_ptr_var(RowRef pos)
+{
+  ndbassert(pos.m_allocator == 1);
+  Ptr<RowPage> ptr;
+  m_page_pool.getPtr(ptr, pos.m_page_id);
+  return ((Var_page*)ptr.p)->get_ptr(pos.m_page_pos);
+}
+
+bool
+Dbspj::first(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr, 
+             SLFifoRowListIterator& iter)
+{
+  Uint32 var = (requestPtr.p->m_bits & Request::RT_VAR_ALLOC) != 0;
+  SLFifoRowList & list = treeNodePtr.p->m_row_list;
+  if (list.isNull())
+  {
+    jam();
+    iter.setNull();
+    return false;
+  }
+
+  iter.m_ref.m_allocator = var;
+  iter.m_ref.m_page_id = list.m_first_row_page_id;
+  iter.m_ref.m_page_pos = list.m_first_row_page_pos;
+  if (var == 0)
+  {
+    jam();
+    iter.m_row_ptr = get_row_ptr_stack(iter.m_ref);
+  }
+  else
+  {
+    jam();
+    iter.m_row_ptr = get_row_ptr_var(iter.m_ref);
+  }
+
+  return true;
+}
+
+bool
+Dbspj::next(SLFifoRowListIterator& iter)
+{
+  iter.m_ref.assign_from_link(iter.m_row_ptr);
+  if (iter.m_ref.isNull())
+  {
+    jam();
+    return false;
+  }
+
+  if (iter.m_ref.m_allocator == 0)
+  {
+    jam();
+    iter.m_row_ptr = get_row_ptr_stack(iter.m_ref);
+  }
+  else
+  {
+    jam();
+    iter.m_row_ptr = get_row_ptr_var(iter.m_ref);
+  }
+  return true;
+}
+
+bool
+Dbspj::next(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr, 
+            SLFifoRowListIterator& iter, SLFifoRowListIteratorPtr start)
+{
+  Uint32 var = (requestPtr.p->m_bits & Request::RT_VAR_ALLOC) != 0;
+  (void)var;
+  ndbassert(var == iter.m_ref.m_allocator);
+  if (iter.m_ref.m_allocator == 0)
+  {
+    jam();
+    iter.m_row_ptr = get_row_ptr_var(start.m_ref);
+  }
+  else
+  {
+    jam();
+    iter.m_row_ptr = get_row_ptr_stack(start.m_ref);
+  }
+  return next(iter);
+}
+
+Uint32
+Dbspj::add_to_map(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr,
+                  Uint32 corrVal, RowRef rowref)
+{
+  Uint32 * mapptr;
+  RowMap& map = treeNodePtr.p->m_row_map;
+  if (map.isNull())
+  {
+    jam();
+    Uint16 batchsize = treeNodePtr.p->m_batch_size;
+    Uint32 sz16 = RowMap::MAP_SIZE_PER_REF_16 * batchsize;
+    Uint32 sz32 = (sz16 + 1) / 2;
+    RowRef ref;
+    if ((requestPtr.p->m_bits & Request::RT_VAR_ALLOC) == 0)
+    {
+      jam();
+      mapptr = stackAlloc(requestPtr.p->m_rowBuffer, ref, sz32);
+    }
+    else
+    {
+      jam();
+      mapptr = varAlloc(requestPtr.p->m_rowBuffer, ref, sz32);
+    }
+    if (unlikely(mapptr == 0))
+    {
+      jam();
+      return DbspjErr::OutOfRowMemory;
+    }
+    map.assign(ref);
+    map.m_elements = 0;
+    map.m_size = batchsize;
+    map.clear(mapptr);
+  }
+  else
+  {
+    jam();
+    RowRef ref;
+    map.copyto(ref);
+    if (ref.m_allocator == 0)
+    {
+      jam();
+      mapptr = get_row_ptr_stack(ref);
+    }
+    else
+    {
+      jam();
+      mapptr = get_row_ptr_var(ref);
+    }
+  }
+
+  Uint32 pos = corrVal & 0xFFFF;
+  ndbrequire(pos < map.m_size);
+  ndbrequire(map.m_elements < map.m_size);
+
+  if (1)
+  {
+    /**
+     * Check that *pos* is empty
+     */
+    RowRef check;
+    map.load(mapptr, pos, check);
+    ndbrequire(check.m_page_pos == 0xFFFF);
+  }
+
+  map.store(mapptr, pos, rowref);
+
+  return 0;
+}
+
+bool
+Dbspj::first(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr, 
+             RowMapIterator & iter)
+{
+  Uint32 var = (requestPtr.p->m_bits & Request::RT_VAR_ALLOC) != 0;
+  RowMap& map = treeNodePtr.p->m_row_map;
+  if (map.isNull())
+  {
+    jam();
+    iter.setNull();
+    return false;
+  }
+
+  if (var == 0)
+  {
+    jam();
+    iter.m_map_ptr = get_row_ptr_stack(map.m_map_ref);
+  }
+  else
+  {
+    jam();
+    iter.m_map_ptr = get_row_ptr_var(map.m_map_ref);
+  }
+  iter.m_size = map.m_size;
+  iter.m_ref.m_allocator = var;
+
+  Uint32 pos = 0;
+  while (RowMap::isNull(iter.m_map_ptr, pos) && pos < iter.m_size)
+    pos++;
+
+  if (pos == iter.m_size)
+  {
+    jam();
+    iter.setNull();
+    return false;
+  }
+  else
+  {
+    jam();
+    RowMap::load(iter.m_map_ptr, pos, iter.m_ref);
+    iter.m_element_no = pos;
+    if (var == 0)
+    {
+      jam();
+      iter.m_row_ptr = get_row_ptr_stack(iter.m_ref);
+    }
+    else
+    {
+      jam();
+      iter.m_row_ptr = get_row_ptr_var(iter.m_ref);      
+    }
+    return true;
+  }
+}
+
+bool
+Dbspj::next(RowMapIterator & iter)
+{
+  Uint32 pos = iter.m_element_no + 1;
+  while (RowMap::isNull(iter.m_map_ptr, pos) && pos < iter.m_size)
+    pos++;
+
+  if (pos == iter.m_size)
+  {
+    jam();
+    iter.setNull();
+    return false;
+  }
+  else
+  {
+    jam();
+    RowMap::load(iter.m_map_ptr, pos, iter.m_ref);
+    iter.m_element_no = pos;
+    if (iter.m_ref.m_allocator == 0)
+    {
+      jam();
+      iter.m_row_ptr = get_row_ptr_stack(iter.m_ref);
+    }
+    else
+    {
+      jam();
+      iter.m_row_ptr = get_row_ptr_var(iter.m_ref);      
+    }
+    return true;
+  }
+}
+
+bool
+Dbspj::next(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr, 
+            RowMapIterator & iter, RowMapIteratorPtr start)
+{
+  Uint32 var = (requestPtr.p->m_bits & Request::RT_VAR_ALLOC) != 0;
+  RowMap& map = treeNodePtr.p->m_row_map;
+  ndbrequire(!map.isNull());
+  
+  if (var == 0)
+  {
+    jam();
+    iter.m_map_ptr = get_row_ptr_stack(map.m_map_ref);
+  }
+  else
+  {
+    jam();
+    iter.m_map_ptr = get_row_ptr_var(map.m_map_ref);
+  }
+  iter.m_size = map.m_size;
+
+  RowMap::load(iter.m_map_ptr, start.m_element_no, iter.m_ref);
+  iter.m_element_no = start.m_element_no;
+  return next(iter);
+}
+
+Uint32 *
+Dbspj::stackAlloc(RowBuffer & buffer, RowRef& dst, Uint32 sz)
+{
+  Ptr<RowPage> ptr;
+  LocalDLFifoList<RowPage> list(m_page_pool, buffer.m_page_list);
+  
+  Uint32 pos = buffer.m_stack.m_pos;
+  const Uint32 SIZE = RowPage::SIZE;
+  if (list.isEmpty() || (pos + sz) > SIZE)
+  {    
+    jam();
+    bool ret = allocPage(ptr);
+    if (unlikely(ret == false))
+    {
+      jam();
+      return 0;
+    }
+    
+    pos = 0;
+    list.addLast(ptr);
+  }
+  else
+  {
+    list.last(ptr);
+  }
+
+  dst.m_page_id = ptr.i;
+  dst.m_page_pos = pos;
+  dst.m_allocator = 0;
+  buffer.m_stack.m_pos = pos + sz;
+  return ptr.p->m_data + pos;
+}
+
+Uint32 *
+Dbspj::varAlloc(RowBuffer & buffer, RowRef& dst, Uint32 sz)
+{
+  Ptr<RowPage> ptr;
+  LocalDLFifoList<RowPage> list(m_page_pool, buffer.m_page_list);
+  
+  Uint32 free_space = buffer.m_var.m_free;
+  if (list.isEmpty() || free_space < (sz + 1))
+  {    
+    jam();
+    bool ret = allocPage(ptr);
+    if (unlikely(ret == false))
+    {
+      jam();
+      return 0;
+    }
+    
+    list.addLast(ptr);
+    ((Var_page*)ptr.p)->init();
+  }
+  else
+  {
+    jam();
+    list.last(ptr);
+  }
+  
+  Var_page * vp = (Var_page*)ptr.p;
+  Uint32 pos = vp->alloc_record(sz, (Var_page*)m_buffer0, Var_page::CHAIN);
+
+  dst.m_page_id = ptr.i;
+  dst.m_page_pos = pos;
+  dst.m_allocator = 1;
+  buffer.m_var.m_free = vp->free_space;
+  return vp->get_ptr(pos);
+}
+
+bool
+Dbspj::allocPage(Ptr<RowPage> & ptr)
+{
+  if (m_free_page_list.firstItem == RNIL)
+  {
+    jam();
+    ptr.p = (RowPage*)m_ctx.m_mm.alloc_page(RT_SPJ_DATABUFFER,
+                                            &ptr.i,
+                                            Ndbd_mem_manager::NDB_ZONE_ANY);
+    if (ptr.p == 0)
+    {
+      return false;
+    }
+    return true;
+  }
+  else
+  {
+    jam();
+    LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
+    bool ret = list.remove_front(ptr);
+    ndbrequire(ret);
+    return ret;
+  }
+}
+
+void
+Dbspj::releasePage(Ptr<RowPage> ptr)
+{
+  LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
+  list.add(ptr);
+}
+
+void
+Dbspj::releasePages(Uint32 first, Ptr<RowPage> last)
+{
+  LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
+  list.add(first, last);
+}
+
+void
+Dbspj::releaseGlobal(Signal * signal)
+{
+  Uint32 delay = 100;
+  LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
+  if (list.empty())
+  {
+    jam();
+    delay = 300;
+  }
+  else
+  {
+    Ptr<RowPage> ptr;
+    list.remove_front(ptr);
+    m_ctx.m_mm.release_page(RT_SPJ_DATABUFFER, ptr.i);
+  }
+  
+  signal->theData[0] = 0;
+  sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, delay, 1);
+}
+
+/**
+ * END - MODULE GENERIC
+ */
+
+/**
+ * MODULE LOOKUP
+ */
+const Dbspj::OpInfo
+Dbspj::g_LookupOpInfo =
+{
+  &Dbspj::lookup_build,
+  0, // prepare
+  &Dbspj::lookup_start,
+  &Dbspj::lookup_execTRANSID_AI,
+  &Dbspj::lookup_execLQHKEYREF,
+  &Dbspj::lookup_execLQHKEYCONF,
+  0, // execSCAN_FRAGREF
+  0, // execSCAN_FRAGCONF
+  &Dbspj::lookup_parent_row,
+  &Dbspj::lookup_parent_batch_complete,
+  0, // Dbspj::lookup_execSCAN_NEXTREQ
+  0, // Dbspj::lookup_complete
+  &Dbspj::lookup_abort,
+  &Dbspj::lookup_execNODE_FAILREP,
+  &Dbspj::lookup_cleanup
+};
+
+Uint32
+Dbspj::lookup_build(Build_context& ctx,
+		    Ptr<Request> requestPtr,
+		    const QueryNode* qn,
+                    const QueryNodeParameters* qp)
+{
+  Uint32 err = 0;
+  Ptr<TreeNode> treeNodePtr;
+  const QN_LookupNode * node = (const QN_LookupNode*)qn;
+  const QN_LookupParameters * param = (const QN_LookupParameters*)qp;
+  do
+  {
+    err = createNode(ctx, requestPtr, treeNodePtr);
+    if (unlikely(err != 0))
+    {
+      DEBUG_CRASH();
+      break;
+    }
+
+    treeNodePtr.p->m_info = &g_LookupOpInfo;
+    Uint32 transId1 = requestPtr.p->m_transId[0];
+    Uint32 transId2 = requestPtr.p->m_transId[1];
+    Uint32 savePointId = ctx.m_savepointId;
+
+    Uint32 treeBits = node->requestInfo;
+    Uint32 paramBits = param->requestInfo;
+    //ndbout_c("Dbspj::lookup_build() treeBits=%.8x paramBits=%.8x", 
+    //         treeBits, paramBits);
+    LqhKeyReq* dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
+    {
+      /**
+       * static variables
+       */
+      dst->tcBlockref = reference();
+      dst->clientConnectPtr = treeNodePtr.i;
+
+      /**
+       * TODO reference()+treeNodePtr.i is passed twice
+       *   this can likely be optimized using the requestInfo-bits
+       * UPDATE: This can be accomplished by *not* setApplicationAddressFlag
+       *         and patch LQH to then instead use tcBlockref/clientConnectPtr
+       */
+      dst->transId1 = transId1;
+      dst->transId2 = transId2;
+      dst->savePointId = savePointId;
+      dst->scanInfo = 0;
+      dst->attrLen = 0;
+      /** Initialy set reply ref to client, do_send will set SPJ refs if non-LEAF */
+      dst->variableData[0] = ctx.m_resultRef;
+      dst->variableData[1] = param->resultData;  
+      Uint32 requestInfo = 0;
+      LqhKeyReq::setOperation(requestInfo, ZREAD);
+      LqhKeyReq::setApplicationAddressFlag(requestInfo, 1);
+      LqhKeyReq::setDirtyFlag(requestInfo, 1);
+      LqhKeyReq::setSimpleFlag(requestInfo, 1);
+      LqhKeyReq::setNormalProtocolFlag(requestInfo, 0);  // Assume T_LEAF 
+      LqhKeyReq::setAnyValueFlag(requestInfo, 1);
+      LqhKeyReq::setNoDiskFlag(requestInfo, 
+                               (treeBits & DABits::NI_LINKED_DISK) == 0 &&
+                               (paramBits & DABits::PI_DISK_ATTR) == 0);
+      dst->requestInfo = requestInfo;
+    }
+
+    err = DbspjErr::InvalidTreeNodeSpecification;
+    if (unlikely(node->len < QN_LookupNode::NodeSize))
+    {
+      DEBUG_CRASH();
+      break;
+    }
+
+    if (treeBits & QN_LookupNode::L_UNIQUE_INDEX)
+    {
+      jam();
+      treeNodePtr.p->m_bits |= TreeNode::T_UNIQUE_INDEX_LOOKUP;
+    }
+
+    Uint32 tableId = node->tableId;
+    Uint32 schemaVersion = node->tableVersion;
+
+    Uint32 tableSchemaVersion = tableId + ((schemaVersion << 16) & 0xFFFF0000);
+    dst->tableSchemaVersion = tableSchemaVersion;
+
+    err = DbspjErr::InvalidTreeParametersSpecification;
+    DEBUG("param len: " << param->len);
+    if (unlikely(param->len < QN_LookupParameters::NodeSize))
+    {
+      DEBUG_CRASH();
+      break;
+    }
+
+    ctx.m_resultData = param->resultData;
+    treeNodePtr.p->m_lookup_data.m_api_resultRef = ctx.m_resultRef;
+    treeNodePtr.p->m_lookup_data.m_api_resultData = param->resultData;
+
+    /**
+     * Parse stuff common lookup/scan-frag
+     */
+    struct DABuffer nodeDA, paramDA;
+    nodeDA.ptr = node->optional;
+    nodeDA.end = nodeDA.ptr + (node->len - QN_LookupNode::NodeSize);
+    paramDA.ptr = param->optional;
+    paramDA.end = paramDA.ptr + (param->len - QN_LookupParameters::NodeSize);
+    err = parseDA(ctx, requestPtr, treeNodePtr,
+                  nodeDA, treeBits, paramDA, paramBits);
+    if (unlikely(err != 0))
+    {
+      DEBUG_CRASH();
+      break;
+    }
+
+    if (treeNodePtr.p->m_bits & TreeNode::T_ATTR_INTERPRETED)
+    {
+      jam();
+      LqhKeyReq::setInterpretedFlag(dst->requestInfo, 1);
+    }
+
+    /**
+     * Inherit batch size from parent
+     */
+    treeNodePtr.p->m_batch_size = 1;
+    if (treeNodePtr.p->m_parentPtrI != RNIL)
+    {
+      jam();
+      Ptr<TreeNode> parentPtr;
+      m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
+      treeNodePtr.p->m_batch_size = parentPtr.p->m_batch_size;
+    }
+
+    if (ctx.m_start_signal)
+    {
+      jam();
+      Signal * signal = ctx.m_start_signal;
+      const LqhKeyReq* src = (const LqhKeyReq*)signal->getDataPtr();
+#if NOT_YET
+      Uint32 instanceNo = 
+        blockToInstance(signal->header.theReceiversBlockNumber);
+      treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH, 
+                                                instanceNo, getOwnNodeId());
+#else
+      treeNodePtr.p->m_send.m_ref = 
+        numberToRef(DBLQH, getInstanceKey(src->tableSchemaVersion & 0xFFFF,
+                                          src->fragmentData & 0xFFFF),
+                    getOwnNodeId());
+#endif
+      
+      Uint32 hashValue = src->hashValue;
+      Uint32 fragId = src->fragmentData;
+      Uint32 requestInfo = src->requestInfo;
+      Uint32 attrLen = src->attrLen; // fragdist-key is in here
+
+      /**
+       * assertions
+       */
+      ndbassert(LqhKeyReq::getAttrLen(attrLen) == 0);         // Only long
+      ndbassert(LqhKeyReq::getScanTakeOverFlag(attrLen) == 0);// Not supported
+      ndbassert(LqhKeyReq::getReorgFlag(attrLen) == 0);       // Not supported
+      ndbassert(LqhKeyReq::getOperation(requestInfo) == ZREAD);
+      ndbassert(LqhKeyReq::getKeyLen(requestInfo) == 0);      // Only long
+      ndbassert(LqhKeyReq::getMarkerFlag(requestInfo) == 0);  // Only read
+      ndbassert(LqhKeyReq::getAIInLqhKeyReq(requestInfo) == 0);
+      ndbassert(LqhKeyReq::getSeqNoReplica(requestInfo) == 0);
+      ndbassert(LqhKeyReq::getLastReplicaNo(requestInfo) == 0);
+      ndbassert(LqhKeyReq::getApplicationAddressFlag(requestInfo) != 0);
+      ndbassert(LqhKeyReq::getSameClientAndTcFlag(requestInfo) == 0);
+
+#if TODO
+      /**
+       * Handle various lock-modes
+       */
+      static Uint8 getDirtyFlag(const UintR & requestInfo);
+      static Uint8 getSimpleFlag(const UintR & requestInfo);
+#endif
+
+      Uint32 dst_requestInfo = dst->requestInfo;
+      ndbassert(LqhKeyReq::getInterpretedFlag(requestInfo) ==
+                LqhKeyReq::getInterpretedFlag(dst_requestInfo));
+      ndbassert(LqhKeyReq::getNoDiskFlag(requestInfo) ==
+                LqhKeyReq::getNoDiskFlag(dst_requestInfo));
+
+      dst->hashValue = hashValue;
+      dst->fragmentData = fragId;
+      dst->attrLen = attrLen; // fragdist is in here
+      
+      treeNodePtr.p->m_send.m_keyInfoPtrI = ctx.m_keyPtr.i;
+      treeNodePtr.p->m_bits |= TreeNode::T_ONE_SHOT;
+    }
+    return 0;
+  } while (0);
+  
+  return err;
+}
+
+void
+Dbspj::lookup_start(Signal* signal,
+		    Ptr<Request> requestPtr,
+		    Ptr<TreeNode> treeNodePtr)
+{
+  lookup_send(signal, requestPtr, treeNodePtr);
+}
+
+void
+Dbspj::lookup_send(Signal* signal,
+		   Ptr<Request> requestPtr,
+		   Ptr<TreeNode> treeNodePtr)
+{
+  jam();
+
+  Uint32 cnt = 2;
+  if (treeNodePtr.p->isLeaf())
+  {
+    jam();
+    if (requestPtr.p->isLookup())
+    {
+      jam();
+      cnt = 0;
+    }
+    else
+    {
+      jam();
+      cnt = 1;
+    }
+  }
+  
+  LqhKeyReq* req = reinterpret_cast<LqhKeyReq*>(signal->getDataPtrSend());
+
+  memcpy(req, treeNodePtr.p->m_lookup_data.m_lqhKeyReq,
+	 sizeof(treeNodePtr.p->m_lookup_data.m_lqhKeyReq));
+  req->variableData[2] = requestPtr.p->m_rootResultData;
+  req->variableData[3] = treeNodePtr.p->m_send.m_correlation;
+
+  if (!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()))
+  {
+    // Non-LEAF want reply to SPJ instead of ApiClient.
+    LqhKeyReq::setNormalProtocolFlag(req->requestInfo, 1);
+    req->variableData[0] = reference();
+    req->variableData[1] = treeNodePtr.i;
+  }
+  else
+  {
+    jam();
+    /**
+     * Fake that TC sent this request,
+     *   so that it can route a maybe TCKEYREF
+     */
+    req->tcBlockref = requestPtr.p->m_senderRef;
+  }
+
+  SectionHandle handle(this);
+
+  Uint32 ref = treeNodePtr.p->m_send.m_ref;
+  Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
+  Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
+
+  if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
+  {
+    jam();
+    /**
+     * Pass sections to send
+     */
+    treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+    treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+  }
+  else
+  {
+    if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
+    {
+      jam();
+      Uint32 tmp = RNIL;
+      ndbrequire(dupSection(tmp, keyInfoPtrI)); // TODO handle error
+      keyInfoPtrI = tmp;
+    }
+    else
+    {
+      jam();
+      treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+    }
+
+    if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
+    {
+      jam();
+      Uint32 tmp = RNIL;
+      ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
+      attrInfoPtrI = tmp;
+    }
+    else
+    {
+      jam();
+      treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+    }
+  }
+
+  getSection(handle.m_ptr[0], keyInfoPtrI);
+  getSection(handle.m_ptr[1], attrInfoPtrI);
+  handle.m_cnt = 2;
+
+#if defined DEBUG_LQHKEYREQ
+  ndbout_c("LQHKEYREQ to %x", ref);
+  printLQHKEYREQ(stdout, signal->getDataPtrSend(),
+		 NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
+                 DBLQH);
+  printf("KEYINFO: ");
+  print(handle.m_ptr[0], stdout);
+  printf("ATTRINFO: ");
+  print(handle.m_ptr[1], stdout);
+#endif
+  
+  Uint32 Tnode = refToNode(ref);
+  if (Tnode == getOwnNodeId())
+  {
+    c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1);
+  }
+  else
+  {
+    c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1);
+  }
+
+  if (unlikely(!c_alive_nodes.get(Tnode)))
+  {
+    jam();
+    releaseSections(handle);
+    abort(signal, requestPtr, DbspjErr::NodeFailure);
+    return;
+  }
+  else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup()))
+  {
+    jam();
+    ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data));
+    requestPtr.p->m_outstanding += cnt;
+    requestPtr.p->m_lookup_node_data[Tnode] += cnt;
+    // number wrapped
+    ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0));
+  }
+
+  sendSignal(ref, GSN_LQHKEYREQ, signal,
+	     NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
+             JBB, &handle);
+
+  if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf())
+  {
+    jam();
+    /**
+     * Send TCKEYCONF with DirtyReadBit + Tnode,
+     *   so that API can discover if Tnode while waiting for result
+     */
+    Uint32 resultRef = req->variableData[0];
+    Uint32 resultData = req->variableData[1];
+
+    TcKeyConf* conf = (TcKeyConf*)signal->getDataPtrSend();
+    conf->apiConnectPtr = RNIL; // lookup transaction from operations...
+    conf->confInfo = 0;
+    TcKeyConf::setNoOfOperations(conf->confInfo, 1);
+    conf->transId1 = requestPtr.p->m_transId[0];
+    conf->transId2 = requestPtr.p->m_transId[1];
+    conf->operations[0].apiOperationPtr = resultData;
+    conf->operations[0].attrInfoLen = TcKeyConf::DirtyReadBit | Tnode;
+    Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength;
+    sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef);
+  }
+}
+
+void
+Dbspj::lookup_execTRANSID_AI(Signal* signal,
+			     Ptr<Request> requestPtr,
+			     Ptr<TreeNode> treeNodePtr,
+			     const RowPtr & rowRef)
+{
+  jam();
+
+  Uint32 Tnode = refToNode(signal->getSendersBlockRef());
+
+  {
+    LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+    Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+    Dependency_map::ConstDataBufferIterator it;
+    for (list.first(it); !it.isNull(); list.next(it))
+    {
+      jam();
+      Ptr<TreeNode> childPtr;
+      m_treenode_pool.getPtr(childPtr, * it.data);
+      ndbrequire(childPtr.p->m_info != 0&&childPtr.p->m_info->m_parent_row!=0);
+      (this->*(childPtr.p->m_info->m_parent_row))(signal,
+                                                  requestPtr, childPtr,rowRef);
+    }
+  }
+  ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
+
+  ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= 1);
+  requestPtr.p->m_lookup_node_data[Tnode] -= 1;
+
+  checkBatchComplete(signal, requestPtr, 1);
+}
+
+void
+Dbspj::lookup_execLQHKEYREF(Signal* signal,
+                            Ptr<Request> requestPtr,
+                            Ptr<TreeNode> treeNodePtr)
+{
+  const LqhKeyRef * rep = (LqhKeyRef*)signal->getDataPtr();
+  Uint32 errCode = rep->errorCode;
+  Uint32 Tnode = refToNode(signal->getSendersBlockRef());
+
+  if (requestPtr.p->isLookup())
+  {
+    jam();
+    
+    /* CONF/REF not requested for lookup-Leaf: */
+    ndbrequire(!treeNodePtr.p->isLeaf());
+
+    /**
+     * Scan-request does not need to
+     *   send TCKEYREF...
+     */
+    /**
+     * Return back to api...
+     *   NOTE: assume that signal is tampered with
+     */
+    Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
+    Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
+    TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
+    ref->connectPtr = resultData;
+    ref->transId[0] = requestPtr.p->m_transId[0];
+    ref->transId[1] = requestPtr.p->m_transId[1];
+    ref->errorCode = errCode;
+    ref->errorData = 0;
+
+    DEBUG("lookup_execLQHKEYREF, errorCode:" << errCode);
+
+    sendTCKEYREF(signal, resultRef, requestPtr.p->m_senderRef);
+
+    if (treeNodePtr.p->m_bits & TreeNode::T_UNIQUE_INDEX_LOOKUP)
+    {
+      /**
+       * If this is a "leaf" unique index lookup
+       *   emit extra TCKEYCONF as would have been done with ordinary
+       *   operation
+       */
+      LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+      Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+      Dependency_map::ConstDataBufferIterator it;
+      ndbrequire(list.first(it));
+      ndbrequire(list.getSize() == 1); // should only be 1 child
+      Ptr<TreeNode> childPtr;
+      m_treenode_pool.getPtr(childPtr, * it.data);
+      if (childPtr.p->m_bits & TreeNode::T_LEAF)
+      {
+        jam();
+        Uint32 resultRef = childPtr.p->m_lookup_data.m_api_resultRef;
+        Uint32 resultData = childPtr.p->m_lookup_data.m_api_resultData;
+        TcKeyConf* conf = (TcKeyConf*)signal->getDataPtr();
+        conf->apiConnectPtr = RNIL;
+        conf->confInfo = 0;
+        conf->gci_hi = 0;
+        TcKeyConf::setNoOfOperations(conf->confInfo, 1);
+        conf->transId1 = requestPtr.p->m_transId[0];
+        conf->transId2 = requestPtr.p->m_transId[1];
+        conf->operations[0].apiOperationPtr = resultData;
+        conf->operations[0].attrInfoLen =
+          TcKeyConf::DirtyReadBit |getOwnNodeId();
+        sendTCKEYCONF(signal, TcKeyConf::StaticLength + 2, resultRef, requestPtr.p->m_senderRef);
+      }
+    }
+  }
+  else
+  {
+    jam();
+    switch(errCode){
+    case 626: // Row not found
+    case 899: // Interpreter_exit_nok
+      jam();
+      break;
+    default:
+      jam();
+      abort(signal, requestPtr, errCode);
+    }
+  }
+  
+  Uint32 cnt = 2;
+  if (treeNodePtr.p->isLeaf())  // Can't be a lookup-Leaf, asserted above
+    cnt = 1;
+
+  ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= cnt);
+  requestPtr.p->m_lookup_node_data[Tnode] -= cnt;
+
+  checkBatchComplete(signal, requestPtr, cnt);
+}
+
+void
+Dbspj::lookup_execLQHKEYCONF(Signal* signal,
+                             Ptr<Request> requestPtr,
+                             Ptr<TreeNode> treeNodePtr)
+{
+  ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
+
+  Uint32 Tnode = refToNode(signal->getSendersBlockRef());
+
+  if (treeNodePtr.p->m_bits & TreeNode::T_USER_PROJECTION)
+  {
+    jam();
+    requestPtr.p->m_rows++;
+  }
+
+  ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= 1);
+  requestPtr.p->m_lookup_node_data[Tnode] -= 1;
+
+  checkBatchComplete(signal, requestPtr, 1);
+}
+
+void
+Dbspj::lookup_parent_row(Signal* signal,
+                          Ptr<Request> requestPtr,
+                          Ptr<TreeNode> treeNodePtr,
+                          const RowPtr & rowRef)
+{
+  /**
+   * Here we need to...
+   *   1) construct a key
+   *   2) compute hash     (normally TC)
+   *   3) get node for row (normally TC)
+   */
+  Uint32 err;
+  const LqhKeyReq* src = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
+  const Uint32 tableId = LqhKeyReq::getTableId(src->tableSchemaVersion);
+  const Uint32 corrVal = rowRef.m_src_correlation;
+
+  DEBUG("::lookup_parent_row");
+
+  do
+  {
+    Uint32 ptrI = RNIL;
+    if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
+    {
+      jam();
+      DEBUG("parent_row w/ T_KEYINFO_CONSTRUCTED");
+      /**
+       * Get key-pattern
+       */
+      LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+      Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
+
+      err = expand(ptrI, pattern, rowRef);
+      if (unlikely(err != 0))
+        break;
+
+      if (ptrI == RNIL)
+      {
+        jam();
+        /**
+         * We constructed a null-key...construct a zero-length key (even if we don't support it *now*)
+         *
+         *   (we actually did prior to joining mysql where null was treated as any other
+         *   value in a key). But mysql treats null in unique key as *wildcard*
+         *   which we don't support so well...and do nasty tricks in handler
+         *
+         * NOTE: should be *after* check for error
+         */
+        err = createEmptySection(ptrI);
+        if (unlikely(err != 0))
+          break;
+      }
+
+      treeNodePtr.p->m_send.m_keyInfoPtrI = ptrI;
+    }
+
+    BuildKeyReq tmp;
+    err = computeHash(signal, tmp, tableId, ptrI);
+    if (unlikely(err != 0))
+      break;
+
+    err = getNodes(signal, tmp, tableId);
+    if (unlikely(err != 0))
+      break;
+
+    Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
+    if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
+    {
+      jam();
+      Uint32 tmp = RNIL;
+      ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
+
+      Uint32 org_size;
+      {
+        SegmentedSectionPtr ptr;
+        getSection(ptr, tmp);
+        org_size = ptr.sz;
+      }
+
+      LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+      Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
+      err = expand(tmp, pattern, rowRef);
+      if (unlikely(err != 0))
+        break;
+
+      /**
+       * Update size of subsrouting section, which contains arguments
+       */
+      SegmentedSectionPtr ptr;
+      getSection(ptr, tmp);
+      Uint32 new_size = ptr.sz;
+      Uint32 * sectionptrs = ptr.p->theData;
+      sectionptrs[4] = new_size - org_size;
+
+      treeNodePtr.p->m_send.m_attrInfoPtrI = tmp;
+    }
+
+    /**
+     * Now send...
+     */
+
+    /**
+     * TODO merge better with lookup_start (refactor)
+     */
+    {
+      /* We set the upper half word of m_correlation to the tuple ID
+       * of the parent, such that the API can match this tuple with its 
+       * parent.
+       * Then we re-use the tuple ID of the parent as the 
+       * tuple ID for this tuple also. Since the tuple ID
+       * is unique within this batch and SPJ block for the parent operation,
+       * it must also be unique for this operation. 
+       * This ensures that lookup operations with no user projection will 
+       * work, since such operations will have the same tuple ID as their 
+       * parents. The API will then be able to match a tuple with its 
+       * grandparent, even if it gets no tuple for the parent operation.*/
+      treeNodePtr.p->m_send.m_correlation = 
+        (corrVal << 16) + (corrVal & 0xffff);
+
+      treeNodePtr.p->m_send.m_ref = tmp.receiverRef;
+      LqhKeyReq * dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
+      dst->hashValue = tmp.hashInfo[0];
+      dst->fragmentData = tmp.fragId;
+      Uint32 attrLen = 0;
+      LqhKeyReq::setDistributionKey(attrLen, tmp.fragDistKey);
+      dst->attrLen = attrLen;
+      lookup_send(signal, requestPtr, treeNodePtr);
+
+      if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
+      {
+        jam();
+        // restore
+        treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
+      }
+    }
+    return;
+  } while (0);
+
+  ndbrequire(false);
+}
+
+void
+Dbspj::lookup_parent_batch_complete(Signal* signal,
+                             Ptr<Request> requestPtr,
+                             Ptr<TreeNode> treeNodePtr)
+{
+  jam();
+
+  /**
+   * lookups are performed directly...so we're not really interested in
+   *   parent_batch_complete...we only pass-through
+   */
+
+  /**
+   * but this method should only be called if we have T_REPORT_BATCH_COMPLETE
+   */
+  ndbassert(treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE);
+
+  reportBatchComplete(signal, requestPtr, treeNodePtr);
+}
+
+void
+Dbspj::lookup_abort(Signal* signal, 
+                    Ptr<Request> requestPtr,
+                    Ptr<TreeNode> treeNodePtr)
+{
+  jam();
+}
+
+Uint32
+Dbspj::lookup_execNODE_FAILREP(Signal* signal,
+                               Ptr<Request> requestPtr,
+                               Ptr<TreeNode> treeNodePtr,
+                               NdbNodeBitmask mask)
+{
+  jam();
+  Uint32 node = 0;
+  Uint32 sum = 0;
+  while (requestPtr.p->m_outstanding &&
+         ((node = mask.find(node + 1)) != NdbNodeBitmask::NotFound))
+  {
+    Uint32 cnt = requestPtr.p->m_lookup_node_data[node];
+    sum += cnt;
+    ndbassert(requestPtr.p->m_outstanding >= sum);
+    requestPtr.p->m_lookup_node_data[node] = 0;
+  }
+
+  if (sum)
+  {
+    jam();
+    ndbrequire(requestPtr.p->m_outstanding >= sum);
+    requestPtr.p->m_outstanding -= sum;
+  }
+
+  return sum;
+}
+
+void
+Dbspj::lookup_cleanup(Ptr<Request> requestPtr,
+                      Ptr<TreeNode> treeNodePtr)
+{
+  cleanup_common(requestPtr, treeNodePtr);
+}
+
+
+Uint32
+Dbspj::handle_special_hash(Uint32 tableId, Uint32 dstHash[4],
+                           const Uint64* src,
+                           Uint32 srcLen,       // Len in #32bit words
+                           const KeyDescriptor* desc)
+{
+  const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS= 
+    (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
+  Uint64 alignedWorkspace[MAX_KEY_SIZE_IN_LONG_WORDS * MAX_XFRM_MULTIPLY];
+  const bool hasVarKeys = desc->noOfVarKeys > 0;
+  const bool hasCharAttr = desc->hasCharAttr;
+  const bool compute_distkey = desc->noOfDistrKeys > 0;
+  
+  const Uint64 *hashInput = 0;
+  Uint32 inputLen = 0;
+  Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
+  Uint32 * keyPartLenPtr;
+
+  /* Normalise KeyInfo into workspace if necessary */
+  if (hasCharAttr || (compute_distkey && hasVarKeys))
+  {
+    hashInput = alignedWorkspace;
+    keyPartLenPtr = keyPartLen;
+    inputLen = xfrm_key(tableId, 
+                        (Uint32*)src, 
+                        (Uint32*)alignedWorkspace, 
+                        sizeof(alignedWorkspace) >> 2, 
+                        keyPartLenPtr);
+    if (unlikely(inputLen == 0))
+    {
+      return 290;  // 'Corrupt key in TC, unable to xfrm'
+    }
+  } 
+  else 
+  {
+    /* Keyinfo already suitable for hash */
+    hashInput = src;
+    inputLen = srcLen;
+    keyPartLenPtr = 0;
+  }
+  
+  /* Calculate primary key hash */
+  md5_hash(dstHash, hashInput, inputLen);
+  
+  /* If the distribution key != primary key then we have to
+   * form a distribution key from the primary key and calculate 
+   * a separate distribution hash based on this
+   */
+  if (compute_distkey)
+  {
+    jam();
+    
+    Uint32 distrKeyHash[4];
+    /* Reshuffle primary key columns to get just distribution key */
+    Uint32 len = create_distr_key(tableId, (Uint32*)hashInput, (Uint32*)alignedWorkspace, keyPartLenPtr);
+    /* Calculate distribution key hash */
+    md5_hash(distrKeyHash, alignedWorkspace, len);
+
+    /* Just one word used for distribution */
+    dstHash[1] = distrKeyHash[1];
+  }
+  return 0;
+}
+
+Uint32
+Dbspj::computeHash(Signal* signal,
+		   BuildKeyReq& dst, Uint32 tableId, Uint32 ptrI)
+{
+  /**
+   * Essentially the same code as in Dbtc::hash().
+   * The code for user defined partitioning has been removed though.
+   */
+  SegmentedSectionPtr ptr;
+  getSection(ptr, ptrI);
+
+  /* NOTE:  md5_hash below require 64-bit alignment
+   */
+  const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
+    (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
+  Uint64 tmp64[MAX_KEY_SIZE_IN_LONG_WORDS];
+  Uint32 *tmp32 = (Uint32*)tmp64;
+  copy(tmp32, ptr);
+
+  const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId);
+  ndbrequire(desc != NULL);
+
+  bool need_special_hash = desc->hasCharAttr | (desc->noOfDistrKeys > 0);
+  if (need_special_hash)
+  {
+    jam();
+    return handle_special_hash(tableId, dst.hashInfo, tmp64, ptr.sz, desc);
+  }
+  else
+  {
+    jam();
+    md5_hash(dst.hashInfo, tmp64, ptr.sz);
+    return 0;
+  }
+}
+
+Uint32
+Dbspj::getNodes(Signal* signal, BuildKeyReq& dst, Uint32 tableId)
+{
+  Uint32 err;
+  DiGetNodesReq * req = (DiGetNodesReq *)&signal->theData[0];
+  req->tableId = tableId;
+  req->hashValue = dst.hashInfo[1];
+  req->distr_key_indicator = 0; // userDefinedPartitioning not supported!
+
+#if 1
+  EXECUTE_DIRECT(DBDIH, GSN_DIGETNODESREQ, signal,
+                 DiGetNodesReq::SignalLength);
+#else
+  sendSignal(DBDIH_REF, GSN_DIGETNODESREQ, signal,
+             DiGetNodesReq::SignalLength, JBB);
+  jamEntry();
+
+#endif
+
+  DiGetNodesConf * conf = (DiGetNodesConf *)&signal->theData[0];
+  err = signal->theData[0];
+  Uint32 Tdata2 = conf->reqinfo;
+  Uint32 nodeId = conf->nodes[0];
+  Uint32 instanceKey = (Tdata2 >> 24) & 127;
+
+  DEBUG("HASH to nodeId:" << nodeId << ", instanceKey:" << instanceKey);
+
+  jamEntry();
+  if (unlikely(err != 0))
+    goto error;
+
+  dst.fragId = conf->fragId;
+  dst.fragDistKey = (Tdata2 >> 16) & 255;
+  dst.receiverRef = numberToRef(DBLQH, instanceKey, nodeId);
+
+  return 0;
+
+error:
+  /**
+   * TODO handle error
+   */
+  ndbrequire(false);
+  return err;
 }
+
 /**
- * END - MODULE GENERIC
+ * END - MODULE LOOKUP
  */
 
 /**
- * MODULE LOOKUP
+ * MODULE SCAN FRAG
+ *
+ * NOTE: This may only be root node
  */
 const Dbspj::OpInfo
-Dbspj::g_LookupOpInfo =
+Dbspj::g_ScanFragOpInfo =
 {
-  &Dbspj::lookup_build,
-  &Dbspj::lookup_start,
-  &Dbspj::lookup_execTRANSID_AI,
-  &Dbspj::lookup_execLQHKEYREF,
-  &Dbspj::lookup_execLQHKEYCONF,
-  0, // execSCAN_FRAGREF
-  0, // execSCAN_FRAGCONF
-  &Dbspj::lookup_start_child,
-  0, // Dbspj::lookup_execSCAN_NEXTREQ
-  0, // Dbspj::lookup_complete
-  0, // Dbspj::lookup_abort
-  &Dbspj::lookup_cleanup,
-  &Dbspj::lookup_count_descendant_signal
+  &Dbspj::scanFrag_build,
+  0, // prepare
+  &Dbspj::scanFrag_start,
+  &Dbspj::scanFrag_execTRANSID_AI,
+  0, // execLQHKEYREF
+  0, // execLQHKEYCONF
+  &Dbspj::scanFrag_execSCAN_FRAGREF,
+  &Dbspj::scanFrag_execSCAN_FRAGCONF,
+  &Dbspj::scanFrag_parent_row,
+  &Dbspj::scanFrag_parent_batch_complete,
+  &Dbspj::scanFrag_execSCAN_NEXTREQ,
+  0, // Dbspj::scanFrag_complete
+  &Dbspj::scanFrag_abort,
+  0, // execNODE_FAILREP,
+  &Dbspj::scanFrag_cleanup
 };
 
 Uint32
-Dbspj::lookup_build(Build_context& ctx,
-		    Ptr<Request> requestPtr,
-		    const QueryNode* qn,
-                    const QueryNodeParameters* qp)
+Dbspj::scanFrag_build(Build_context& ctx,
+		      Ptr<Request> requestPtr,
+		      const QueryNode* qn,
+		      const QueryNodeParameters* qp)
 {
   Uint32 err = 0;
   Ptr<TreeNode> treeNodePtr;
-  const QN_LookupNode * node = (const QN_LookupNode*)qn;
-  const QN_LookupParameters * param = (const QN_LookupParameters*)qp;
+  const QN_ScanFragNode * node = (const QN_ScanFragNode*)qn;
+  const QN_ScanFragParameters * param = (const QN_ScanFragParameters*)qp;
+
   do
   {
     err = createNode(ctx, requestPtr, treeNodePtr);
     if (unlikely(err != 0))
-    {
-      DEBUG_CRASH();
       break;
-    }
 
-    treeNodePtr.p->m_info = &g_LookupOpInfo;
+    requestPtr.p->m_bits |= Request::RT_SCAN;
+    treeNodePtr.p->m_info = &g_ScanFragOpInfo;
+    treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
+    treeNodePtr.p->m_batch_size = ctx.m_batch_size_rows;
+
+    ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
+    dst->senderData = treeNodePtr.i;
+    dst->resultRef = reference();
+    dst->resultData = treeNodePtr.i;
+    dst->savePointId = ctx.m_savepointId;
+
     Uint32 transId1 = requestPtr.p->m_transId[0];
     Uint32 transId2 = requestPtr.p->m_transId[1];
-    Uint32 savePointId = ctx.m_savepointId;
+    dst->transId1 = transId1;
+    dst->transId2 = transId2;
 
     Uint32 treeBits = node->requestInfo;
     Uint32 paramBits = param->requestInfo;
-    //ndbout_c("Dbspj::lookup_build() treeBits=%.8x paramBits=%.8x", 
+    //ndbout_c("Dbspj::scanFrag_build() treeBits=%.8x paramBits=%.8x", 
     //         treeBits, paramBits);
-    LqhKeyReq* dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
-    {
-      /**
-       * static variables
-       */
-      dst->tcBlockref = reference();
-      dst->clientConnectPtr = treeNodePtr.i;
-
-      /**
-       * TODO reference()+treeNodePtr.i is passed twice
-       *   this can likely be optimized using the requestInfo-bits
-       * UPDATE: This can be accomplished by *not* setApplicationAddressFlag
-       *         and patch LQH to then instead use tcBlockref/clientConnectPtr
-       */
-      dst->transId1 = transId1;
-      dst->transId2 = transId2;
-      dst->savePointId = savePointId;
-      dst->scanInfo = 0;
-      dst->attrLen = 0;
-      /** Initialy set reply ref to client, do_send will set SPJ refs if non-LEAF */
-      dst->variableData[0] = ctx.m_resultRef;
-      dst->variableData[1] = param->resultData;  
-      Uint32 requestInfo = 0;
-      LqhKeyReq::setOperation(requestInfo, ZREAD);
-      LqhKeyReq::setApplicationAddressFlag(requestInfo, 1);
-      LqhKeyReq::setDirtyFlag(requestInfo, 1);
-      LqhKeyReq::setSimpleFlag(requestInfo, 1);
-      LqhKeyReq::setNormalProtocolFlag(requestInfo, 0);  // Assume T_LEAF 
-      LqhKeyReq::setAnyValueFlag(requestInfo, 1);
-      LqhKeyReq::setNoDiskFlag(requestInfo, 
+    Uint32 requestInfo = 0;
+    ScanFragReq::setReadCommittedFlag(requestInfo, 1);
+    ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
+    ScanFragReq::setAnyValueFlag(requestInfo, 1);
+    ScanFragReq::setNoDiskFlag(requestInfo, 
                                (treeBits & DABits::NI_LINKED_DISK) == 0 &&
                                (paramBits & DABits::PI_DISK_ATTR) == 0);
-      dst->requestInfo = requestInfo;
-    }
+    dst->requestInfo = requestInfo;
 
     err = DbspjErr::InvalidTreeNodeSpecification;
-    if (unlikely(node->len < QN_LookupNode::NodeSize))
-    {
-      DEBUG_CRASH();
+    DEBUG("scanFrag_build: len=" << node->len);
+    if (unlikely(node->len < QN_ScanFragNode::NodeSize))
       break;
-    }
-
-    if (treeBits & QN_LookupNode::L_UNIQUE_INDEX)
-    {
-      jam();
-      treeNodePtr.p->m_bits |= TreeNode::T_UNIQUE_INDEX_LOOKUP;
-    }
-
-    Uint32 tableId = node->tableId;
-    Uint32 schemaVersion = node->tableVersion;
 
-    Uint32 tableSchemaVersion = tableId + ((schemaVersion << 16) & 0xFFFF0000);
-    dst->tableSchemaVersion = tableSchemaVersion;
+    dst->tableId = node->tableId;
+    dst->schemaVersion = node->tableVersion;
 
     err = DbspjErr::InvalidTreeParametersSpecification;
     DEBUG("param len: " << param->len);
-    if (unlikely(param->len < QN_LookupParameters::NodeSize))
+    if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
     {
+      jam();
       DEBUG_CRASH();
       break;
     }
 
     ctx.m_resultData = param->resultData;
-    treeNodePtr.p->m_lookup_data.m_api_resultRef = ctx.m_resultRef;
-    treeNodePtr.p->m_lookup_data.m_api_resultData = param->resultData;
-    treeNodePtr.p->m_lookup_data.m_outstanding = 0;
 
     /**
      * Parse stuff common lookup/scan-frag
      */
     struct DABuffer nodeDA, paramDA;
     nodeDA.ptr = node->optional;
-    nodeDA.end = nodeDA.ptr + (node->len - QN_LookupNode::NodeSize);
+    nodeDA.end = nodeDA.ptr + (node->len - QN_ScanFragNode::NodeSize);
     paramDA.ptr = param->optional;
-    paramDA.end = paramDA.ptr + (param->len - QN_LookupParameters::NodeSize);
+    paramDA.end = paramDA.ptr + (param->len - QN_ScanFragParameters::NodeSize);
     err = parseDA(ctx, requestPtr, treeNodePtr,
                   nodeDA, treeBits, paramDA, paramBits);
     if (unlikely(err != 0))
     {
+      jam();
       DEBUG_CRASH();
       break;
     }
 
-    if (treeNodePtr.p->m_bits & TreeNode::T_ATTR_INTERPRETED)
+    ctx.m_scan_cnt++;
+    /**
+     * In the scenario with only 1 scan in tree,
+     *   register cursor here, so we don't need to search for in after build
+     * If m_scan_cnt > 1,
+     *   then this list will simply be cleared after build
+     */
+    registerCursor(requestPtr, treeNodePtr);
+
+    if (ctx.m_start_signal)
     {
       jam();
-      LqhKeyReq::setInterpretedFlag(dst->requestInfo, 1);
-    }
+      Signal* signal = ctx.m_start_signal;
+      const ScanFragReq* src = (const ScanFragReq*)(signal->getDataPtr());
 
-    treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
+#if NOT_YET
+      Uint32 instanceNo = 
+        blockToInstance(signal->header.theReceiversBlockNumber);
+      treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH, 
+                                                instanceNo, getOwnNodeId());
+#else
+      treeNodePtr.p->m_send.m_ref = 
+        numberToRef(DBLQH, getInstanceKey(src->tableId,
+                                          src->fragmentNoKeyLen),
+                    getOwnNodeId());
+#endif
+      
+      Uint32 fragId = src->fragmentNoKeyLen;
+      Uint32 requestInfo = src->requestInfo;
+      Uint32 batch_size_bytes = src->batch_size_bytes;
+      Uint32 batch_size_rows = src->batch_size_rows;
+      
+#ifdef VM_TRACE
+      Uint32 savePointId = src->savePointId;
+      Uint32 tableId = src->tableId;
+      Uint32 schemaVersion = src->schemaVersion;
+      Uint32 transId1 = src->transId1;
+      Uint32 transId2 = src->transId2;
+#endif
+      ndbassert(ScanFragReq::getLockMode(requestInfo) == 0);
+      ndbassert(ScanFragReq::getHoldLockFlag(requestInfo) == 0);
+      ndbassert(ScanFragReq::getKeyinfoFlag(requestInfo) == 0);
+      ndbassert(ScanFragReq::getReadCommittedFlag(requestInfo) == 1);
+      ndbassert(ScanFragReq::getLcpScanFlag(requestInfo) == 0);
+      //ScanFragReq::getAttrLen(requestInfo); // ignore
+      ndbassert(ScanFragReq::getReorgFlag(requestInfo) == 0);
+      
+      Uint32 tupScanFlag = ScanFragReq::getTupScanFlag(requestInfo);
+      Uint32 rangeScanFlag = ScanFragReq::getRangeScanFlag(requestInfo);
+      Uint32 descendingFlag = ScanFragReq::getDescendingFlag(requestInfo);
+      Uint32 scanPrio = ScanFragReq::getScanPrio(requestInfo);
+      
+      Uint32 dst_requestInfo = dst->requestInfo;
+      
+      ScanFragReq::setTupScanFlag(dst_requestInfo,tupScanFlag);
+      ScanFragReq::setRangeScanFlag(dst_requestInfo,rangeScanFlag);
+      ScanFragReq::setDescendingFlag(dst_requestInfo,descendingFlag);
+      ScanFragReq::setScanPrio(dst_requestInfo,scanPrio);
+
+      /**
+       * 'NoDiskFlag' should agree with information in treeNode
+       */
+      ndbassert(ScanFragReq::getNoDiskFlag(requestInfo) ==
+                ScanFragReq::getNoDiskFlag(dst_requestInfo));
+
+      dst->fragmentNoKeyLen = fragId;
+      dst->requestInfo = dst_requestInfo;
+      dst->batch_size_bytes = batch_size_bytes;
+      dst->batch_size_rows = batch_size_rows;
+      
+#ifdef VM_TRACE
+      ndbassert(dst->savePointId == savePointId);
+      ndbassert(dst->tableId == tableId);
+      ndbassert(dst->schemaVersion == schemaVersion);
+      ndbassert(dst->transId1 == transId1);
+      ndbassert(dst->transId2 == transId2);
+#endif
+      
+      treeNodePtr.p->m_send.m_keyInfoPtrI = ctx.m_keyPtr.i;
+      treeNodePtr.p->m_bits |= TreeNode::T_ONE_SHOT;
+
+      if (rangeScanFlag)
+      {
+        c_Counters.incr_counter(CI_RANGE_SCANS_RECEIVED, 1);
+      }
+      else
+      {
+        c_Counters.incr_counter(CI_TABLE_SCANS_RECEIVED, 1);
+      }
+    }
+    else
+    {
+      ndbrequire(false);
+    }
 
     return 0;
   } while (0);
@@ -1215,1343 +3444,1394 @@ Dbspj::lookup_build(Build_context& ctx,
 }
 
 void
-Dbspj::lookup_start(Signal* signal,
-		    Ptr<Request> requestPtr,
-		    Ptr<TreeNode> treeNodePtr,
-		    SegmentedSectionPtr keyInfo)
+Dbspj::scanFrag_start(Signal* signal,
+		      Ptr<Request> requestPtr,
+		      Ptr<TreeNode> treeNodePtr)
+{      
+  scanFrag_send(signal, requestPtr, treeNodePtr);
+}
+
+void
+Dbspj::scanFrag_send(Signal* signal,
+		     Ptr<Request> requestPtr,
+		     Ptr<TreeNode> treeNodePtr)
 {
-  const LqhKeyReq* src = reinterpret_cast<const LqhKeyReq*>(signal->getDataPtr());
+  jam();
 
-#if NOT_YET
-  Uint32 instanceNo = blockToInstance(signal->header.theReceiversBlockNumber);
-  treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH, instanceNo, getOwnNodeId());
-#else
-  treeNodePtr.p->m_send.m_ref = 
-    numberToRef(DBLQH, getInstanceKey(src->tableSchemaVersion & 0xFFFF,
-                                      src->fragmentData & 0xFFFF),
-                getOwnNodeId());
-#endif
-
-  Uint32 hashValue = src->hashValue;
-  Uint32 fragId = src->fragmentData;
-  Uint32 requestInfo = src->requestInfo;
-  Uint32 attrLen = src->attrLen; // fragdist-key is in here
-  Uint32 interpretedFlag = LqhKeyReq::getInterpretedFlag(requestInfo);
-
-  /**
-   * assertions
-   */
-  ndbassert(LqhKeyReq::getAttrLen(attrLen) == 0);         // Only long
-  ndbassert(LqhKeyReq::getScanTakeOverFlag(attrLen) == 0);// Not supported
-  ndbassert(LqhKeyReq::getReorgFlag(attrLen) == 0);       // Not supported
-  ndbassert(LqhKeyReq::getOperation(requestInfo) == ZREAD);
-  ndbassert(LqhKeyReq::getKeyLen(requestInfo) == 0);      // Only long
-  ndbassert(LqhKeyReq::getMarkerFlag(requestInfo) == 0);  // Only read
-  ndbassert(LqhKeyReq::getAIInLqhKeyReq(requestInfo) == 0);
-  ndbassert(LqhKeyReq::getSeqNoReplica(requestInfo) == 0);
-  ndbassert(LqhKeyReq::getLastReplicaNo(requestInfo) == 0);
-  ndbassert(LqhKeyReq::getApplicationAddressFlag(requestInfo) != 0);
-  ndbassert(LqhKeyReq::getSameClientAndTcFlag(requestInfo) == 0);
+  requestPtr.p->m_outstanding++;
+  requestPtr.p->m_cnt_active ++;
+  treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
+
+  ScanFragReq* req = reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
+
+  memcpy(req, treeNodePtr.p->m_scanfrag_data.m_scanFragReq,
+	 sizeof(treeNodePtr.p->m_scanfrag_data.m_scanFragReq));
+  req->variableData[0] = requestPtr.p->m_rootResultData;
+  req->variableData[1] = treeNodePtr.p->m_send.m_correlation;
+
+  SectionHandle handle(this);
+
+  Uint32 ref = treeNodePtr.p->m_send.m_ref;
+  Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
+  Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
 
-#if TODO
   /**
-   * Handle various lock-modes
+   * ScanFrag may only be used as root-node, i.e T_ONE_SHOT
    */
-  static Uint8 getDirtyFlag(const UintR & requestInfo);
-  static Uint8 getSimpleFlag(const UintR & requestInfo);
-#endif
+  ndbrequire(treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT);
 
-  LqhKeyReq * dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
-  Uint32 dst_requestInfo = dst->requestInfo;
   /**
-   * 'InterpretedFlag' and 'NoDiskFlag' should agree with information in treeNode
+   * Pass sections to send
    */
-  ndbassert(LqhKeyReq::getInterpretedFlag(requestInfo) ==
-            LqhKeyReq::getInterpretedFlag(dst_requestInfo));
-  ndbassert(LqhKeyReq::getNoDiskFlag(requestInfo) ==
-            LqhKeyReq::getNoDiskFlag(dst_requestInfo));
+  treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+  treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
 
-  dst->hashValue = hashValue;
-  dst->requestInfo = dst_requestInfo;
-  dst->fragmentData = fragId;
-  dst->attrLen = attrLen; // fragdist is in here
+  getSection(handle.m_ptr[0], attrInfoPtrI);
+  handle.m_cnt = 1;
 
-  treeNodePtr.p->m_send.m_keyInfoPtrI = keyInfo.i;
+  if (keyInfoPtrI != RNIL)
+  {
+    jam();
+    getSection(handle.m_ptr[1], keyInfoPtrI);
+    handle.m_cnt = 2;
+  }
 
-  lookup_send(signal, requestPtr, treeNodePtr);
+#ifdef DEBUG_SCAN_FRAGREQ
+  ndbout_c("SCAN_FRAGREQ to %x", ref);
+  printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
+                    NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
+                    DBLQH);
+  printf("ATTRINFO: ");
+  print(handle.m_ptr[0], stdout);
+  if (handle.m_cnt > 1)
+  {
+    printf("KEYINFO: ");
+    print(handle.m_ptr[1], stdout);
+  }
+#endif
+
+  if (ScanFragReq::getRangeScanFlag(req->requestInfo))
+  {
+    c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
+  }
+  else
+  {
+    c_Counters.incr_counter(CI_LOCAL_TABLE_SCANS_SENT, 1);
+  }
+
+  ndbrequire(refToNode(ref) == getOwnNodeId());
+  sendSignal(ref, GSN_SCAN_FRAGREQ, signal,
+	     NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
+             JBB, &handle);
+
+  treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
+  treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
 }
 
 void
-Dbspj::lookup_send(Signal* signal,
-		   Ptr<Request> requestPtr,
-		   Ptr<TreeNode> treeNodePtr)
+Dbspj::scanFrag_execTRANSID_AI(Signal* signal,
+			       Ptr<Request> requestPtr,
+			       Ptr<TreeNode> treeNodePtr,
+			       const RowPtr & rowRef)
 {
   jam();
+  treeNodePtr.p->m_scanfrag_data.m_rows_received++;
+
+  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+  Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+  Dependency_map::ConstDataBufferIterator it;
 
-  if (treeNodePtr.p->m_state == TreeNode::TN_INACTIVE)
+  {
+    for (list.first(it); !it.isNull(); list.next(it))
+    {
+      jam();
+      Ptr<TreeNode> childPtr;
+      m_treenode_pool.getPtr(childPtr, * it.data);
+      ndbrequire(childPtr.p->m_info != 0&&childPtr.p->m_info->m_parent_row!=0);
+      (this->*(childPtr.p->m_info->m_parent_row))(signal,
+                                                   requestPtr, childPtr,rowRef);
+    }
+  }
+
+  if (treeNodePtr.p->m_scanfrag_data.m_rows_received == 
+      treeNodePtr.p->m_scanfrag_data.m_rows_expecting)
   {
     jam();
-    treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
-    requestPtr.p->m_cnt_active++;
+
+    if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+    {
+      jam();
+      reportBatchComplete(signal, requestPtr, treeNodePtr);
+    }
+    
+    checkBatchComplete(signal, requestPtr, 1);
+    return;
   }
+}
 
-  LqhKeyReq* req = reinterpret_cast<LqhKeyReq*>(signal->getDataPtrSend());
+void
+Dbspj::scanFrag_execSCAN_FRAGREF(Signal* signal,
+                                 Ptr<Request> requestPtr,
+                                 Ptr<TreeNode> treeNodePtr)
+{
+  const ScanFragRef* rep = 
+    reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
+  Uint32 errCode = rep->errorCode;
 
-  memcpy(req, treeNodePtr.p->m_lookup_data.m_lqhKeyReq,
-	 sizeof(treeNodePtr.p->m_lookup_data.m_lqhKeyReq));
-  req->variableData[2] = requestPtr.p->m_rootResultData;
-  req->variableData[3] = treeNodePtr.p->m_send.m_correlation;
+  DEBUG("scanFrag_execSCAN_FRAGREF, rep->senderData:" << rep->senderData
+         << ", requestPtr.p->m_senderData:" << requestPtr.p->m_senderData);
 
-  if (!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()))
-  {
-    // Non-LEAF want reply to SPJ instead of ApiClient.
-    LqhKeyReq::setNormalProtocolFlag(req->requestInfo, 1);
-    req->variableData[0] = reference();
-    req->variableData[1] = treeNodePtr.i;
-  }
+  ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
+  ndbrequire(requestPtr.p->m_cnt_active);
+  requestPtr.p->m_cnt_active --;
+  ndbrequire(requestPtr.p->m_outstanding);
+  requestPtr.p->m_outstanding --;
+  treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
 
-  SectionHandle handle(this);
+  abort(signal, requestPtr, errCode);
+}
 
-  Uint32 ref = treeNodePtr.p->m_send.m_ref;
-  Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
-  Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
 
-  if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
+void
+Dbspj::scanFrag_execSCAN_FRAGCONF(Signal* signal,
+                                  Ptr<Request> requestPtr,
+                                  Ptr<TreeNode> treeNodePtr)
+{
+  const ScanFragConf * conf = 
+    reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
+  Uint32 rows = conf->completedOps;
+  Uint32 done = conf->fragmentCompleted;
+  
+  ndbrequire(done <= 2); // 0, 1, 2 (=ZSCAN_FRAG_CLOSED)
+
+  ndbassert(treeNodePtr.p->m_scanfrag_data.m_rows_expecting == ~Uint32(0));
+  treeNodePtr.p->m_scanfrag_data.m_rows_expecting = rows;
+  if (treeNodePtr.p->isLeaf())
   {
-    jam();
     /**
-     * Pass sections to send
+     * If this is a leaf node, then no rows will be sent to the SPJ block,
+     * as there are no child operations to instantiate.
      */
-    treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
-    treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+    treeNodePtr.p->m_scanfrag_data.m_rows_received = rows;
   }
-  else
+
+  requestPtr.p->m_rows += rows;
+  if (done)
   {
-    if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
-    {
-      jam();
-      Uint32 tmp = RNIL;
-      ndbrequire(dupSection(tmp, keyInfoPtrI)); // TODO handle error
-      keyInfoPtrI = tmp;
-    }
-    else
+    ndbrequire(requestPtr.p->m_cnt_active);
+    requestPtr.p->m_cnt_active --;
+    treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
+  }
+
+  if (treeNodePtr.p->m_scanfrag_data.m_rows_expecting ==
+      treeNodePtr.p->m_scanfrag_data.m_rows_received)
+  {
+    jam();
+
+    if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
     {
       jam();
-      treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+      reportBatchComplete(signal, requestPtr, treeNodePtr);
     }
 
-    if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
+    checkBatchComplete(signal, requestPtr, 1);
+    return;
+  }
+
+  if (unlikely((requestPtr.p->m_state & Request::RS_ABORTING) != 0))
+  {
+    jam();
+    /**
+     * We should have sent SCAN_NEXTREQ(close=true) 
+     *   and will get a done=true response...
+     *
+     * If 
+     *   done=true, no more response from LQH will arrive
+     *              call checkBatchComplete is not done...
+     *
+     *   done=false, the "close=true" signal is still being processed
+     */
+    if (done)
     {
       jam();
-      Uint32 tmp = RNIL;
-      ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
-      attrInfoPtrI = tmp;
+      if (! (treeNodePtr.p->m_scanfrag_data.m_rows_expecting ==
+             treeNodePtr.p->m_scanfrag_data.m_rows_received))
+      {
+        jam();
+        checkBatchComplete(signal, requestPtr, 1);
+      }
+      return;
     }
     else
     {
       jam();
-      treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+      /**
+       * resetting m_rows_expecting to ~0
+       *   as another SCAN_FRAGCONF should arrive
+       */
+      treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
     }
   }
+}
 
-  getSection(handle.m_ptr[0], keyInfoPtrI);
-  getSection(handle.m_ptr[1], attrInfoPtrI);
-  handle.m_cnt = 2;
-
-#if defined DEBUG_LQHKEYREQ
-  ndbout_c("LQHKEYREQ to %x", ref);
-  printLQHKEYREQ(stdout, signal->getDataPtrSend(),
-		 NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
-                 DBLQH);
-  printf("KEYINFO: ");
-  print(handle.m_ptr[0], stdout);
-  printf("ATTRINFO: ");
-  print(handle.m_ptr[1], stdout);
-#endif
-  
-  if (refToNode(ref) == getOwnNodeId())
-  {
-    c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1);
-  }
-  else
-  {
-    c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1);
-  }
+void
+Dbspj::scanFrag_parent_row(Signal* signal,
+                            Ptr<Request> requestPtr,
+                            Ptr<TreeNode> treeNodePtr,
+                            const RowPtr & rowRef)
+{
+  jam();
+  ndbrequire(false);
+}
 
-  sendSignal(ref, GSN_LQHKEYREQ, signal,
-	     NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
-             JBB, &handle);
+void
+Dbspj::scanFrag_parent_batch_complete(Signal* signal,
+                                      Ptr<Request> requestPtr,
+                                      Ptr<TreeNode> treeNodePtr)
+{
+  jam();
+  ndbrequire(false);
+}
 
-  if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf())
-  {
-    jam();
-    Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
-    Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
-    TcKeyConf* conf = (TcKeyConf*)signal->getDataPtr();
-    conf->apiConnectPtr = RNIL;
-    conf->confInfo = 0;
-    conf->gci_hi = 0;
-    TcKeyConf::setNoOfOperations(conf->confInfo, 1);
-    conf->transId1 = requestPtr.p->m_transId[0];
-    conf->transId2 = requestPtr.p->m_transId[1];
-    conf->operations[0].apiOperationPtr = resultData;
-    conf->operations[0].attrInfoLen = TcKeyConf::DirtyReadBit |(refToNode(ref));
-    sendSignal(resultRef, GSN_TCKEYCONF, signal,
-               TcKeyConf::StaticLength + 2, JBB);
-  }
+void
+Dbspj::scanFrag_execSCAN_NEXTREQ(Signal* signal, 
+                                 Ptr<Request> requestPtr,
+                                 Ptr<TreeNode> treeNodePtr)
+{
+  jamEntry();
 
-  Uint32 add = 2;
-  if (treeNodePtr.p->isLeaf())
-  {
-    jam();
-    /** Lookup queries leaf nodes should not reply to SPJ */ 
-    add = requestPtr.p->isLookup() ? 0 : 1;
-  }
-  treeNodePtr.p->m_lookup_data.m_outstanding += add;
+  const ScanFragReq * org =
+    (ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
 
-  const Ptr<TreeNode> root = getRoot(requestPtr.p->m_nodes);
-  (this->*(root.p->m_info->m_count_descendant_signal))(NULL,
-                                                       requestPtr,
-                                                       treeNodePtr,
-                                                       root,
-                                                       GSN_LQHKEYREQ);
+  ScanFragNextReq* req = 
+    reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
+  req->senderData = treeNodePtr.i;
+  req->closeFlag = 0;
+  req->transId1 = requestPtr.p->m_transId[0];
+  req->transId2 = requestPtr.p->m_transId[1];
+  req->batch_size_rows = org->batch_size_rows;
+  req->batch_size_bytes = org->batch_size_bytes;
 
-  /** Lookup leaf-request may finish immediately - LQH reply directly to API */ 
-  if (treeNodePtr.p->m_lookup_data.m_outstanding == 0)
-  {
-    jam();
-    ndbrequire(requestPtr.p->isLookup());
-    ndbrequire(treeNodePtr.p->isLeaf());
-    nodeFinished(signal, requestPtr, treeNodePtr);
-  }
-}
+  DEBUG("scanFrag_execSCAN_NEXTREQ to: " << treeNodePtr.p->m_send.m_ref
+        << ", senderData: " << req->senderData);
+  
+  sendSignal(treeNodePtr.p->m_send.m_ref, 
+             GSN_SCAN_NEXTREQ, 
+             signal, 
+             ScanFragNextReq::SignalLength, 
+             JBB);
+  
+  treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
+  treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
+  requestPtr.p->m_outstanding++;
+}//Dbspj::scanFrag_execSCAN_NEXTREQ()
 
 void
-Dbspj::lookup_execTRANSID_AI(Signal* signal,
-			     Ptr<Request> requestPtr,
-			     Ptr<TreeNode> treeNodePtr,
-			     const RowRef & rowRef)
+Dbspj::scanFrag_abort(Signal* signal, 
+                      Ptr<Request> requestPtr,
+                      Ptr<TreeNode> treeNodePtr)
 {
   jam();
-
+  
+  if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
   {
-    LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-    Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
-    Dependency_map::ConstDataBufferIterator it;
-    for (list.first(it); !it.isNull(); list.next(it))
+    jam();
+
+    ScanFragNextReq* req = 
+      reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
+    req->senderData = treeNodePtr.i;
+    req->closeFlag = ZTRUE;
+    req->transId1 = requestPtr.p->m_transId[0];
+    req->transId2 = requestPtr.p->m_transId[1];
+    req->batch_size_rows = 0;
+    req->batch_size_bytes = 0;
+
+    sendSignal(treeNodePtr.p->m_send.m_ref, 
+               GSN_SCAN_NEXTREQ, 
+               signal, 
+               ScanFragNextReq::SignalLength, 
+               JBB);
+
+    if (treeNodePtr.p->m_scanfrag_data.m_rows_expecting != ~Uint32(0))
     {
       jam();
-      Ptr<TreeNode> childPtr;
-      m_treenode_pool.getPtr(childPtr, * it.data);
-      ndbrequire(childPtr.p->m_info != 0&&childPtr.p->m_info->m_start_child!=0);
-      (this->*(childPtr.p->m_info->m_start_child))(signal,
-                                                   requestPtr, childPtr,rowRef);
+      // We were idle at the time..
+      requestPtr.p->m_outstanding++;
+      treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
     }
   }
-  ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
-  ndbrequire(treeNodePtr.p->m_lookup_data.m_outstanding);
-  treeNodePtr.p->m_lookup_data.m_outstanding --;
-
-  const Ptr<TreeNode> root = getRoot(requestPtr.p->m_nodes);
-  (this->*(root.p->m_info->m_count_descendant_signal))(signal,
-                                                       requestPtr,
-                                                       treeNodePtr,
-                                                       root,
-                                                       GSN_TRANSID_AI);
-  if (treeNodePtr.p->m_lookup_data.m_outstanding == 0)
-  {
-    jam();
-    nodeFinished(signal, requestPtr, treeNodePtr);
-  }
 }
 
+
 void
-Dbspj::lookup_execLQHKEYREF(Signal* signal,
-                            Ptr<Request> requestPtr,
-                            Ptr<TreeNode> treeNodePtr)
+Dbspj::scanFrag_cleanup(Ptr<Request> requestPtr,
+                        Ptr<TreeNode> treeNodePtr)
 {
-  if (requestPtr.p->isLookup())
-  {
-    /* CONF/REF not requested for lookup-Leaf: */
-    ndbrequire(!treeNodePtr.p->isLeaf());
+  cleanup_common(requestPtr, treeNodePtr);
+}
 
-    /**
-     * Scan-request does not need to
-     *   send TCKEYREF...
-     */
-    const LqhKeyRef * rep = (LqhKeyRef*)signal->getDataPtr();
-    Uint32 errCode = rep->errorCode;
+/**
+ * END - MODULE SCAN FRAG
+ */
 
-    /**
-     * Return back to api...
-     *   NOTE: assume that signal is tampered with
-     */
-    Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
-    Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
-    Uint32 transId[2] = { requestPtr.p->m_transId[0],
-                          requestPtr.p->m_transId[1] };
-    TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
-    ref->connectPtr = resultData;
-    ref->transId[0] = transId[0];
-    ref->transId[1] = transId[1];
-    ref->errorCode = errCode;
-    ref->errorData = 0;
+/**
+ * MODULE SCAN INDEX
+ *
+ * NOTE: This may not be root-node
+ */
+const Dbspj::OpInfo
+Dbspj::g_ScanIndexOpInfo =
+{
+  &Dbspj::scanIndex_build,
+  &Dbspj::scanIndex_prepare,
+  0, // start
+  &Dbspj::scanIndex_execTRANSID_AI,
+  0, // execLQHKEYREF
+  0, // execLQHKEYCONF
+  &Dbspj::scanIndex_execSCAN_FRAGREF,
+  &Dbspj::scanIndex_execSCAN_FRAGCONF,
+  &Dbspj::scanIndex_parent_row,
+  &Dbspj::scanIndex_parent_batch_complete,
+  &Dbspj::scanIndex_execSCAN_NEXTREQ,
+  &Dbspj::scanIndex_complete,
+  &Dbspj::scanIndex_abort,
+  &Dbspj::scanIndex_execNODE_FAILREP,
+  &Dbspj::scanIndex_cleanup
+};
 
-    DEBUG("lookup_execLQHKEYREF, errorCode:" << errCode);
+Uint32
+Dbspj::scanIndex_build(Build_context& ctx,
+                       Ptr<Request> requestPtr,
+                       const QueryNode* qn,
+                       const QueryNodeParameters* qp)
+{
+  Uint32 err = 0;
+  Ptr<TreeNode> treeNodePtr;
+  const QN_ScanIndexNode * node = (const QN_ScanIndexNode*)qn;
+  const QN_ScanIndexParameters * param = (const QN_ScanIndexParameters*)qp;
+  
+  do
+  {
+    err = createNode(ctx, requestPtr, treeNodePtr);
+    if (unlikely(err != 0))
+      break;
+    
+    Uint32 batchSize = param->batchSize;
 
-    sendSignal(resultRef, GSN_TCKEYREF, signal,
-               TcKeyRef::SignalLength, JBB);
+    requestPtr.p->m_bits |= Request::RT_SCAN;
+    requestPtr.p->m_bits |= Request::RT_NEED_PREPARE;
+    requestPtr.p->m_bits |= Request::RT_NEED_COMPLETE;
+    treeNodePtr.p->m_info = &g_ScanIndexOpInfo;
+    treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
+    treeNodePtr.p->m_bits |= TreeNode::T_NEED_REPORT_BATCH_COMPLETED;
+    treeNodePtr.p->m_batch_size = batchSize & 0xFFFF;
 
-    if (treeNodePtr.p->m_bits & TreeNode::T_UNIQUE_INDEX_LOOKUP)
+    ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanindex_data.m_scanFragReq;
+    dst->senderData = treeNodePtr.i;
+    dst->resultRef = reference();
+    dst->resultData = treeNodePtr.i;
+    dst->savePointId = ctx.m_savepointId;
+    dst->batch_size_rows  = batchSize & 0xFFFF;
+    dst->batch_size_bytes = batchSize >> 16;
+    
+    Uint32 transId1 = requestPtr.p->m_transId[0];
+    Uint32 transId2 = requestPtr.p->m_transId[1];
+    dst->transId1 = transId1;
+    dst->transId2 = transId2;
+    
+    Uint32 treeBits = node->requestInfo;
+    Uint32 paramBits = param->requestInfo;
+    Uint32 requestInfo = 0;
+    ScanFragReq::setRangeScanFlag(requestInfo, 1);
+    ScanFragReq::setReadCommittedFlag(requestInfo, 1);
+    ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
+    ScanFragReq::setAnyValueFlag(requestInfo, 1);
+    ScanFragReq::setNoDiskFlag(requestInfo, 
+                               (treeBits & DABits::NI_LINKED_DISK) == 0 &&
+                               (paramBits & DABits::PI_DISK_ATTR) == 0);
+    dst->requestInfo = requestInfo;
+
+    err = DbspjErr::InvalidTreeNodeSpecification;
+    DEBUG("scanIndex_build: len=" << node->len);
+    if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
+      break;
+
+    dst->tableId = node->tableId;
+    dst->schemaVersion = node->tableVersion;
+    
+    err = DbspjErr::InvalidTreeParametersSpecification;
+    DEBUG("param len: " << param->len);
+    if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
     {
-      /**
-       * If this is a "leaf" unique index lookup
-       *   emit extra TCKEYCONF as would have been done with ordinary
-       *   operation
-       */
-      LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-      Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
-      Dependency_map::ConstDataBufferIterator it;
-      ndbrequire(list.first(it));
-      ndbrequire(list.getSize() == 1); // should only be 1 child
-      Ptr<TreeNode> childPtr;
-      m_treenode_pool.getPtr(childPtr, * it.data);
-      if (childPtr.p->m_bits & TreeNode::T_LEAF)
-      {
-        jam();
-        Uint32 resultRef = childPtr.p->m_lookup_data.m_api_resultRef;
-        Uint32 resultData = childPtr.p->m_lookup_data.m_api_resultData;
-        TcKeyConf* conf = (TcKeyConf*)signal->getDataPtr();
-        conf->apiConnectPtr = RNIL;
-        conf->confInfo = 0;
-        conf->gci_hi = 0;
-        TcKeyConf::setNoOfOperations(conf->confInfo, 1);
-        conf->transId1 = requestPtr.p->m_transId[0];
-        conf->transId2 = requestPtr.p->m_transId[1];
-        conf->operations[0].apiOperationPtr = resultData;
-        conf->operations[0].attrInfoLen =
-          TcKeyConf::DirtyReadBit |getOwnNodeId();
-        sendSignal(resultRef, GSN_TCKEYCONF, signal,
-                   TcKeyConf::StaticLength + 2, JBB);
-      }
+      jam();
+      DEBUG_CRASH();
+      break;
     }
-  }
+    
+    ctx.m_resultData = param->resultData;
+
+    /**
+     * Parse stuff
+     */
+    struct DABuffer nodeDA, paramDA;
+    nodeDA.ptr = node->optional;
+    nodeDA.end = nodeDA.ptr + (node->len - QN_ScanIndexNode::NodeSize);
+    paramDA.ptr = param->optional;
+    paramDA.end = paramDA.ptr + (param->len - QN_ScanIndexParameters::NodeSize);
+    
+    err = parseScanIndex(ctx, requestPtr, treeNodePtr,
+                         nodeDA, treeBits, paramDA, paramBits);
 
-  Uint32 cnt = 2;
-  if (treeNodePtr.p->isLeaf())  // Can't be a lookup-Leaf, asserted above
-    cnt = 1;
+    if (unlikely(err != 0))
+    {
+      jam();
+      DEBUG_CRASH();
+      break;
+    }
+    
+    /**
+     * Since we T_NEED_REPORT_BATCH_COMPLETED, we set 
+     *   this on all our parents...
+     */
+    Ptr<TreeNode> nodePtr;
+    nodePtr.i = treeNodePtr.p->m_parentPtrI;
+    while (nodePtr.i != RNIL)
+    {
+      jam();
+      m_treenode_pool.getPtr(nodePtr);
+      nodePtr.p->m_bits |= TreeNode::T_REPORT_BATCH_COMPLETE;
+      nodePtr.p->m_bits |= TreeNode::T_NEED_REPORT_BATCH_COMPLETED;
+      nodePtr.i = nodePtr.p->m_parentPtrI;
+    }
 
-  ndbrequire(treeNodePtr.p->m_lookup_data.m_outstanding >= cnt);
-  treeNodePtr.p->m_lookup_data.m_outstanding -= cnt;
+    ctx.m_scan_cnt++;
+    /**
+     * In the scenario with only 1 scan in tree,
+     *   register cursor here, so we don't need to search for in after build
+     * If m_scan_cnt > 1,
+     *   then this list will simply be cleared after build
+     */
+    registerCursor(requestPtr, treeNodePtr);
 
-  const Ptr<TreeNode> root = getRoot(requestPtr.p->m_nodes);
-  (this->*(root.p->m_info->m_count_descendant_signal))(signal,
-                                                       requestPtr,
-                                                       treeNodePtr,
-                                                       root,
-                                                       GSN_LQHKEYREF);
-  if (treeNodePtr.p->m_lookup_data.m_outstanding == 0)
-  {
-    jam();
-    nodeFinished(signal, requestPtr, treeNodePtr);
-  }
+    return 0;
+  } while (0);
+  
+  return err;
 }
 
-void
-Dbspj::lookup_execLQHKEYCONF(Signal* signal,
-                             Ptr<Request> requestPtr,
-                             Ptr<TreeNode> treeNodePtr)
+Uint32
+Dbspj::parseScanIndex(Build_context& ctx,
+                      Ptr<Request> requestPtr,
+                      Ptr<TreeNode> treeNodePtr,
+                      DABuffer tree, Uint32 treeBits,
+                      DABuffer param, Uint32 paramBits)
 {
-  ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
-  ndbrequire(treeNodePtr.p->m_lookup_data.m_outstanding);
-  treeNodePtr.p->m_lookup_data.m_outstanding --;
+  Uint32 err = 0;
 
-  const Ptr<TreeNode> root = getRoot(requestPtr.p->m_nodes);
-  (this->*(root.p->m_info->m_count_descendant_signal))(signal,
-                                                       requestPtr,
-                                                       treeNodePtr,
-                                                       root,
-                                                       GSN_LQHKEYCONF);
+  typedef QN_ScanIndexNode Node;
+  typedef QN_ScanIndexParameters Params;
 
-  if (treeNodePtr.p->m_lookup_data.m_outstanding == 0)
+  do
   {
     jam();
-    nodeFinished(signal, requestPtr, treeNodePtr);
-  }
-}
-
-void
-Dbspj::lookup_start_child(Signal* signal,
-                          Ptr<Request> requestPtr,
-                          Ptr<TreeNode> treeNodePtr,
-                          const RowRef & rowRef)
-{
-  /**
-   * Here we need to...
-   *   1) construct a key
-   *   2) compute hash     (normally TC)
-   *   3) get node for row (normally TC)
-   */
-  Uint32 err;
-  const LqhKeyReq* src = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
-  const Uint32 tableId = LqhKeyReq::getTableId(src->tableSchemaVersion);
-  const Uint32 corrVal = rowRef.m_src_correlation;
 
-  DEBUG("::lookup_start_child");
+    ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+    data.m_fragments.init();
+    data.m_frags_complete = 0;
 
-  do
-  {
-    Uint32 ptrI = RNIL;
-    if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
+    if (treeBits & Node::SI_PRUNE_PATTERN)
     {
-      jam();
-      DEBUG("start_child w/ T_KEYINFO_CONSTRUCTED");
-      /**
-       * Get key-pattern
-       */
+      Uint32 len_cnt = * tree.ptr ++;
+      Uint32 len = len_cnt & 0xFFFF; // length of pattern in words
+      Uint32 cnt = len_cnt >> 16;    // no of parameters
+      
       LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-      Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
+      ndbrequire((cnt==0) == ((treeBits & Node::SI_PRUNE_PARAMS) ==0));
+      ndbrequire((cnt==0) == ((paramBits & Params::SIP_PRUNE_PARAMS)==0));
+      
+      if (treeBits & Node::SI_PRUNE_LINKED)
+      {
+        jam();
 
-      err = expand(ptrI, pattern, rowRef.m_row_data.m_section);
-      if (unlikely(err != 0))
-        break;
+        data.m_prunePattern.init();
+        Local_pattern_store pattern(pool, data.m_prunePattern);
 
-      if (ptrI == RNIL)
+        /**
+         * Expand pattern into a new pattern (with linked values)
+         */
+        err = expand(pattern, tree, len, param, cnt);
+        treeNodePtr.p->m_bits |= TreeNode::T_PRUNE_PATTERN;
+        c_Counters.incr_counter(CI_PRUNNED_RANGE_SCANS_RECEIVED, 1);
+      }
+      else
       {
         jam();
         /**
-         * We constructed a null-key...construct a zero-length key (even if we don't support it *now*)
-         *
-         *   (we actually did prior to joining mysql where null was treated as any other
-         *   value in a key). But mysql treats null in unique key as *wildcard*
-         *   which we don't support so well...and do nasty tricks in handler
-         *
-         * NOTE: should be *after* check for error
+         * Expand pattern directly into 
+         *   This means a "fixed" pruning from here on
+         *   i.e guaranteed single partition
          */
-        err = createEmptySection(ptrI);
-        if (unlikely(err != 0))
-          break;
-      }
+        Uint32 prunePtrI = RNIL;
+        err = expand(prunePtrI, tree, len, param, cnt);
+        data.m_constPrunePtrI = prunePtrI;
 
-      treeNodePtr.p->m_send.m_keyInfoPtrI = ptrI;
+        /**
+         * We may not compute the partition for the hash-key here
+         *   as we have not yet opened a read-view
+         */
+        treeNodePtr.p->m_bits |= TreeNode::T_CONST_PRUNE;
+        c_Counters.incr_counter(CI_CONST_PRUNNED_RANGE_SCANS_RECEIVED, 1);
+      }
     }
 
-    BuildKeyReq tmp;
-    err = computeHash(signal, tmp, tableId, ptrI);
-    if (unlikely(err != 0))
-      break;
-
-    err = getNodes(signal, tmp, tableId);
-    if (unlikely(err != 0))
-      break;
-
-    Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
-    if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
+    if ((treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE) == 0 &&
+        ((treeBits & Node::SI_PARALLEL) || 
+         ((paramBits & Params::SIP_PARALLEL))))
     {
       jam();
-      Uint32 tmp = RNIL;
-      ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
-
-      Uint32 org_size;
-      {
-        SegmentedSectionPtr ptr;
-        getSection(ptr, tmp);
-        org_size = ptr.sz;
-      }
-
-      LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-      Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
-      err = expand(tmp, pattern, rowRef.m_row_data.m_section);
-      if (unlikely(err != 0))
-        break;
-
-      /**
-       * Update size of subsrouting section, which contains arguments
-       */
-      SegmentedSectionPtr ptr;
-      getSection(ptr, tmp);
-      Uint32 new_size = ptr.sz;
-      Uint32 * sectionptrs = ptr.p->theData;
-      sectionptrs[4] = new_size - org_size;
-
-      treeNodePtr.p->m_send.m_attrInfoPtrI = tmp;
+      treeNodePtr.p->m_bits |= TreeNode::T_SCAN_PARALLEL;
     }
 
-    /**
-     * Now send...
-     */
+    return parseDA(ctx, requestPtr, treeNodePtr, 
+                   tree, treeBits, param, paramBits);
+  } while(0);
+  
+  DEBUG_CRASH();
+  return err;
+}
 
-    /**
-     * TODO merge better with lookup_start (refactor)
-     */
-    {
-      /* We set the upper half word of m_correlation to the tuple ID
-       * of the parent, such that the API can match this tuple with its 
-       * parent.
-       * Then we re-use the tuple ID of the parent as the 
-       * tuple ID for this tuple also. Since the tuple ID
-       * is unique within this batch and SPJ block for the parent operation,
-       * it must also be unique for this operation. 
-       * This ensures that lookup operations with no user projection will 
-       * work, since such operations will have the same tuple ID as their 
-       * parents. The API will then be able to match a tuple with its 
-       * grandparent, even if it gets no tuple for the parent operation.*/
-      treeNodePtr.p->m_send.m_correlation = 
-        (corrVal << 16) + (corrVal & 0xffff);
+void
+Dbspj::scanIndex_prepare(Signal * signal, 
+                         Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
+{
+  jam();
 
-      treeNodePtr.p->m_send.m_ref = tmp.receiverRef;
-      LqhKeyReq * dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
-      dst->hashValue = tmp.hashInfo[0];
-      dst->fragmentData = tmp.fragId;
-      Uint32 attrLen = 0;
-      LqhKeyReq::setDistributionKey(attrLen, tmp.fragDistKey);
-      dst->attrLen = attrLen;
-      lookup_send(signal, requestPtr, treeNodePtr);
+  ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanindex_data.m_scanFragReq;
 
-      if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
-      {
-        jam();
-        // restore
-        treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
-      }
-    }
-    return;
-  } while (0);
+  DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtrSend();
+  req->senderRef = reference();
+  req->senderData = treeNodePtr.i;
+  req->tableId = dst->tableId;
+  req->schemaTransId = 0;
+  sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_REQ, signal,
+             DihScanTabReq::SignalLength, JBB);
 
-  ndbrequire(false);
+  requestPtr.p->m_outstanding++;
 }
 
 void
-Dbspj::lookup_cleanup(Ptr<Request> requestPtr,
-                      Ptr<TreeNode> treeNodePtr)
+Dbspj::execDIH_SCAN_TAB_REF(Signal* signal)
 {
-  cleanup_common(requestPtr, treeNodePtr);
+  jamEntry();
+  ndbrequire(false);
 }
 
-
-Uint32
-Dbspj::handle_special_hash(Uint32 tableId, Uint32 dstHash[4],
-                           const Uint64* src,
-                           Uint32 srcLen,       // Len in #32bit words
-                           const KeyDescriptor* desc)
+void
+Dbspj::execDIH_SCAN_TAB_CONF(Signal* signal)
 {
-  const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS= 
-    (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
-  Uint64 alignedWorkspace[MAX_KEY_SIZE_IN_LONG_WORDS * MAX_XFRM_MULTIPLY];
-  const bool hasVarKeys = desc->noOfVarKeys > 0;
-  const bool hasCharAttr = desc->hasCharAttr;
-  const bool compute_distkey = desc->noOfDistrKeys > 0;
-  
-  const Uint64 *hashInput = 0;
-  Uint32 inputLen = 0;
-  Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
-  Uint32 * keyPartLenPtr;
-
-  /* Normalise KeyInfo into workspace if necessary */
-  if (hasCharAttr || (compute_distkey && hasVarKeys))
-  {
-    hashInput = alignedWorkspace;
-    keyPartLenPtr = keyPartLen;
-    inputLen = xfrm_key(tableId, 
-                        (Uint32*)src, 
-                        (Uint32*)alignedWorkspace, 
-                        sizeof(alignedWorkspace) >> 2, 
-                        keyPartLenPtr);
-    if (unlikely(inputLen == 0))
-    {
-      return 290;  // 'Corrupt key in TC, unable to xfrm'
-    }
-  } 
-  else 
-  {
-    /* Keyinfo already suitable for hash */
-    hashInput = src;
-    inputLen = srcLen;
-    keyPartLenPtr = 0;
-  }
-  
-  /* Calculate primary key hash */
-  md5_hash(dstHash, hashInput, inputLen);
-  
-  /* If the distribution key != primary key then we have to
-   * form a distribution key from the primary key and calculate 
-   * a separate distribution hash based on this
-   */
-  if (compute_distkey)
-  {
-    jam();
-    
-    Uint32 distrKeyHash[4];
-    /* Reshuffle primary key columns to get just distribution key */
-    Uint32 len = create_distr_key(tableId, (Uint32*)hashInput, (Uint32*)alignedWorkspace, keyPartLenPtr);
-    /* Calculate distribution key hash */
-    md5_hash(distrKeyHash, alignedWorkspace, len);
-
-    /* Just one word used for distribution */
-    dstHash[1] = distrKeyHash[1];
-  }
-  return 0;
-}
+  jamEntry();
+  DihScanTabConf * conf = (DihScanTabConf*)signal->getDataPtr();
 
-Uint32
-Dbspj::computeHash(Signal* signal,
-		   BuildKeyReq& dst, Uint32 tableId, Uint32 ptrI)
-{
-  /**
-   * Essentially the same code as in Dbtc::hash().
-   * The code for user defined partitioning has been removed though.
-   */
-  SegmentedSectionPtr ptr;
-  getSection(ptr, ptrI);
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, conf->senderData);
+  ndbrequire(treeNodePtr.p->m_info == &g_ScanIndexOpInfo);
 
-  /* NOTE:  md5_hash below require 64-bit alignment
-   */
-  const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
-    (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
-  Uint64 tmp64[MAX_KEY_SIZE_IN_LONG_WORDS];
-  Uint32 *tmp32 = (Uint32*)tmp64;
-  copy(tmp32, ptr);
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
 
-  const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId);
-  ndbrequire(desc != NULL);
+  Uint32 cookie = conf->scanCookie;
+  Uint32 fragCount = conf->fragmentCount;
+  ScanFragReq * dst = (ScanFragReq*)data.m_scanFragReq;
+   
+  if (conf->reorgFlag)
+  {
+    jam();
+    ScanFragReq::setReorgFlag(dst->requestInfo, 1);
+  }
 
-  bool need_special_hash = desc->hasCharAttr | (desc->noOfDistrKeys > 0);
-  if (need_special_hash)
+  data.m_fragCount = fragCount;
+  data.m_scanCookie = cookie;
+
+  const Uint32 prunemask = TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE;
+  const bool pruned = (treeNodePtr.p->m_bits & prunemask) != 0;
+
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+
+  Ptr<ScanIndexFrag> fragPtr;
+  Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+  if (likely(m_scanindexfrag_pool.seize(requestPtr.p->m_arena, fragPtr)))
   {
     jam();
-    return handle_special_hash(tableId, dst.hashInfo, tmp64, ptr.sz, desc);
+    fragPtr.p->init(0);
+    list.addLast(fragPtr);
   }
   else
   {
     jam();
-    md5_hash(dst.hashInfo, tmp64, ptr.sz);
-    return 0;
+    goto error1;
   }
-}
+  data.m_currentFragmentPtrI = fragPtr.i;
 
-Uint32
-Dbspj::getNodes(Signal* signal, BuildKeyReq& dst, Uint32 tableId)
-{
-  Uint32 err;
-  DiGetNodesReq * req = (DiGetNodesReq *)&signal->theData[0];
-  req->tableId = tableId;
-  req->hashValue = dst.hashInfo[1];
-  req->distr_key_indicator = 0; // userDefinedPartitioning not supported!
+  if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
+  {
+    jam();
 
-#if 1
-  EXECUTE_DIRECT(DBDIH, GSN_DIGETNODESREQ, signal,
-                 DiGetNodesReq::SignalLength);
-#else
-  sendSignal(DBDIH_REF, GSN_DIGETNODESREQ, signal,
-             DiGetNodesReq::SignalLength, JBB);
-  jamEntry();
+    // TODO we need a different variant of computeHash here,
+    // since m_constPrunePtrI does not contain full primary key
+    // but only parts in distribution key
 
-#endif
+    BuildKeyReq tmp;
+    Uint32 tableId = dst->tableId;
+    Uint32 err = computeHash(signal, tmp, tableId, data.m_constPrunePtrI);
+    if (unlikely(err != 0))
+      goto error;
 
-  DiGetNodesConf * conf = (DiGetNodesConf *)&signal->theData[0];
-  err = signal->theData[0];
-  Uint32 Tdata2 = conf->reqinfo;
-  Uint32 nodeId = conf->nodes[0];
-  Uint32 instanceKey = (Tdata2 >> 24) & 127;
+    releaseSection(data.m_constPrunePtrI);
+    data.m_constPrunePtrI = RNIL;
+    
+    err = getNodes(signal, tmp, tableId);
+    if (unlikely(err != 0))
+      goto error;
 
-  DEBUG("HASH to nodeId:" << nodeId << ", instanceKey:" << instanceKey);
+    fragPtr.p->m_fragId = tmp.fragId; 
+    fragPtr.p->m_ref = tmp.receiverRef;
+  }
+  else if (fragCount == 1)
+  {
+    jam();
+    /**
+     * This is roughly equivalent to T_CONST_PRUNE
+     *   pretend that is it const-pruned
+     */
+    if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
+    {
+      jam();
+      LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+      Local_pattern_store pattern(pool, data.m_prunePattern);
+      pattern.release();
+    }
+    data.m_constPrunePtrI = RNIL;
+    Uint32 clear = TreeNode::T_PRUNE_PATTERN | TreeNode::T_SCAN_PARALLEL;
+    treeNodePtr.p->m_bits &= ~clear;
+    treeNodePtr.p->m_bits |= TreeNode::T_CONST_PRUNE;
+  }
+  else
+  {
+    for (Uint32 i = 1; i<fragCount; i++)
+    {
+      jam();
+      Ptr<ScanIndexFrag> fragPtr;
+      if (likely(m_scanindexfrag_pool.seize(requestPtr.p->m_arena, fragPtr)))
+      {
+        jam();
+        fragPtr.p->init(i);
+        list.addLast(fragPtr);
+      }
+      else
+      {
+        goto error1;
+      }
+    }
+  }
 
-  jamEntry();
-  if (unlikely(err != 0))
-    goto error;
+  if (!pruned)
+  {
+    jam();
+    Uint32 tableId = ((ScanFragReq*)data.m_scanFragReq)->tableId;
+    DihScanGetNodesReq * req = (DihScanGetNodesReq*)signal->getDataPtrSend();
+    req->senderRef = reference();
+    req->senderData = treeNodePtr.i;
+    req->tableId = tableId;
+    req->scanCookie = cookie;
 
-  dst.fragId = conf->fragId;
-  dst.fragDistKey = (Tdata2 >> 16) & 255;
-  dst.receiverRef = numberToRef(DBLQH, instanceKey, nodeId);
+    Uint32 cnt = 0;
+    for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
+    {
+      jam();
+      req->fragId = fragPtr.p->m_fragId;
+      sendSignal(DBDIH_REF, GSN_DIH_SCAN_GET_NODES_REQ, signal,
+                 DihScanGetNodesReq::SignalLength, JBB);
+      cnt++;
+    }
+    requestPtr.p->m_outstanding += cnt;
+  }
 
-  return 0;
+  checkPrepareComplete(signal, requestPtr, 1);
+
+  return;
 
+error1:
 error:
-  /**
-   * TODO handle error
-   */
   ndbrequire(false);
-  return err;
 }
 
-/**
- * END - MODULE LOOKUP
- */
-
-/**
- * MODULE SCAN FRAG
- */
-const Dbspj::OpInfo
-Dbspj::g_ScanFragOpInfo =
+void
+Dbspj::execDIH_SCAN_GET_NODES_REF(Signal* signal)
 {
-  &Dbspj::scanFrag_build,
-  &Dbspj::scanFrag_start,
-  &Dbspj::scanFrag_execTRANSID_AI,
-  0, // execLQHKEYREF
-  0, // execLQHKEYCONF
-  &Dbspj::scanFrag_execSCAN_FRAGREF,
-  &Dbspj::scanFrag_execSCAN_FRAGCONF,
-  &Dbspj::scanFrag_start_child,
-  &Dbspj::scanFrag_execSCAN_NEXTREQ,
-  0, // Dbspj::scanFrag_complete
-  0, // Dbspj::scanFrag_abort
-  &Dbspj::scanFrag_cleanup,
-  &Dbspj::scanFrag_count_descendant_signal
-};
+  jamEntry();
+  ndbrequire(false);
+}
 
-Uint32
-Dbspj::scanFrag_build(Build_context& ctx,
-		      Ptr<Request> requestPtr,
-		      const QueryNode* qn,
-		      const QueryNodeParameters* qp)
+void
+Dbspj::execDIH_SCAN_GET_NODES_CONF(Signal* signal)
 {
-  Uint32 err = 0;
-  Ptr<TreeNode> treeNodePtr;
-  const QN_ScanFragNode * node = (const QN_ScanFragNode*)qn;
-  const QN_ScanFragParameters * param = (const QN_ScanFragParameters*)qp;
+  jamEntry();
 
-  do
-  {
-    err = createNode(ctx, requestPtr, treeNodePtr);
-    if (unlikely(err != 0))
-      break;
+  DihScanGetNodesConf * conf = (DihScanGetNodesConf*)signal->getDataPtr();
 
-    requestPtr.p->m_bits |= Request::RT_SCAN;
-    treeNodePtr.p->m_info = &g_ScanFragOpInfo;
-    treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
+  Uint32 senderData = conf->senderData;
+  Uint32 node = conf->nodes[0];
+  Uint32 fragId = conf->fragId;
+  Uint32 instanceKey = conf->instanceKey;
 
-    treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_IDLE;
-    treeNodePtr.p->m_scanfrag_data.m_scan_status = 0;
-    treeNodePtr.p->m_scanfrag_data.m_pending_close = false;
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, senderData);
+  ndbrequire(treeNodePtr.p->m_info == &g_ScanIndexOpInfo);
 
-    ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
-    dst->senderData = treeNodePtr.i;
-    dst->resultRef = reference();
-    dst->resultData = treeNodePtr.i;
-    dst->savePointId = ctx.m_savepointId;
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
 
-    Uint32 transId1 = requestPtr.p->m_transId[0];
-    Uint32 transId2 = requestPtr.p->m_transId[1];
-    dst->transId1 = transId1;
-    dst->transId2 = transId2;
+  Ptr<ScanIndexFrag> fragPtr;
+  Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+  ndbrequire(scanIndex_findFrag(list, fragPtr, fragId) == 0);
 
-    Uint32 treeBits = node->requestInfo;
-    Uint32 paramBits = param->requestInfo;
-    //ndbout_c("Dbspj::scanFrag_build() treeBits=%.8x paramBits=%.8x", 
-    //         treeBits, paramBits);
-    Uint32 requestInfo = 0;
-    ScanFragReq::setReadCommittedFlag(requestInfo, 1);
-    ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
-    ScanFragReq::setAnyValueFlag(requestInfo, 1);
-    ScanFragReq::setNoDiskFlag(requestInfo, 
-                               (treeBits & DABits::NI_LINKED_DISK) == 0 &&
-                               (paramBits & DABits::PI_DISK_ATTR) == 0);
-    dst->requestInfo = requestInfo;
+  fragPtr.p->m_ref = numberToRef(DBLQH, instanceKey, node);
 
-    err = DbspjErr::InvalidTreeNodeSpecification;
-    DEBUG("scanFrag_build: len=" << node->len);
-    if (unlikely(node->len < QN_ScanFragNode::NodeSize))
-      break;
 
-    dst->tableId = node->tableId;
-    dst->schemaVersion = node->tableVersion;
+  Ptr<Request> requestPtr;
+  m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
+  checkPrepareComplete(signal, requestPtr, 1);
+}
 
-    err = DbspjErr::InvalidTreeParametersSpecification;
-    DEBUG("param len: " << param->len);
-    if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
+Uint32
+Dbspj::scanIndex_findFrag(Local_ScanIndexFrag_list & list, 
+                          Ptr<ScanIndexFrag> & fragPtr, Uint32 fragId)
+{
+  for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
+  {
+    jam();
+    if (fragPtr.p->m_fragId == fragId)
     {
       jam();
-      DEBUG_CRASH();
-      break;
+      return 0;
     }
+  }
+  
+  return 99; // TODO
+}
 
-    ctx.m_resultData = param->resultData;
+void
+Dbspj::scanIndex_parent_row(Signal* signal,
+                             Ptr<Request> requestPtr,
+                             Ptr<TreeNode> treeNodePtr,
+                             const RowPtr & rowRef)
+{
+  jam();
 
-    /**
-     * Parse stuff common lookup/scan-frag
-     */
-    struct DABuffer nodeDA, paramDA;
-    nodeDA.ptr = node->optional;
-    nodeDA.end = nodeDA.ptr + (node->len - QN_ScanFragNode::NodeSize);
-    paramDA.ptr = param->optional;
-    paramDA.end = paramDA.ptr + (param->len - QN_ScanFragParameters::NodeSize);
-    err = parseDA(ctx, requestPtr, treeNodePtr,
-                  nodeDA, treeBits, paramDA, paramBits);
-    if (unlikely(err != 0))
+  Uint32 err;
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+
+  /**
+   * Construct range definition,
+   *   and if prune pattern enabled
+   *   stuff it onto correct scanindexFrag
+   */
+  do
+  {
+    Ptr<ScanIndexFrag> fragPtr;
+    Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+    LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+    if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
+    {
+      jam();
+
+      /**
+       * TODO: Expand into linear memory instead
+       *       of expanding into sections, and then copy
+       *       section into linear
+       */
+      Local_pattern_store pattern(pool, data.m_prunePattern);
+      Uint32 pruneKeyPtrI = RNIL;
+      err = expand(pruneKeyPtrI, pattern, rowRef);
+      if (unlikely(err != 0))
+      {
+        DEBUG_CRASH();
+        break;
+      }
+
+      // TODO we need a different variant of computeHash here,
+      // since pruneKeyPtrI does not contain full primary key
+      // but only parts in distribution key
+
+      BuildKeyReq tmp;
+      ScanFragReq * dst = (ScanFragReq*)data.m_scanFragReq;
+      Uint32 tableId = dst->tableId;
+      err = computeHash(signal, tmp, tableId, pruneKeyPtrI);
+      releaseSection(pruneKeyPtrI); // see ^ TODO
+      if (unlikely(err != 0))
+      {
+        DEBUG_CRASH();
+        break;
+      }
+      
+      err = getNodes(signal, tmp, tableId);
+      if (unlikely(err != 0))
+      {
+        DEBUG_CRASH();
+        break;
+      }
+      
+      err = scanIndex_findFrag(list, fragPtr, tmp.fragId);
+      if (unlikely(err != 0))
+      {
+        DEBUG_CRASH();
+        break;
+      }
+
+      if (fragPtr.p->m_ref == 0)
+      {
+        jam();
+        fragPtr.p->m_ref = tmp.receiverRef;
+      }
+      else
+      {
+        /**
+         * TODO: not 100% sure if this is correct with reorg ongoing...
+         *       but scanning "old" should regardless be safe as we still have
+         *       scanCookie
+         */
+        ndbassert(fragPtr.p->m_ref == tmp.receiverRef);
+      }
+    }
+    else
+    {
+      jam();
+      /**
+       * If const prune, or no-prune, store on first fragment,
+       * and send to 1 or all resp.
+       */
+      list.first(fragPtr);
+    }
+    
+    Uint32 ptrI = fragPtr.p->m_rangePtrI;
+    if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
+    {
+      jam();
+      Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
+      err = expand(ptrI, pattern, rowRef);
+      if (unlikely(err != 0))
+      {
+        DEBUG_CRASH();
+        break;
+      }
+    }
+    else
     {
       jam();
-      DEBUG_CRASH();
-      break;
+      // Fixed key...fix later...
+      ndbrequire(false);
     }
+    fragPtr.p->m_rangePtrI = ptrI;
+    scanIndex_fixupBound(fragPtr, ptrI, rowRef.m_src_correlation);
 
-    treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
+    if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
+    {
+      jam();
+      /**
+       * We being a T_ONE_SHOT means that we're only be called
+       *   with parent_row once, i.e batch is complete
+       */
+      scanIndex_parent_batch_complete(signal, requestPtr, treeNodePtr);
+    }
 
-    return 0;
+    return;
   } while (0);
-
-  return err;
+  
+  ndbrequire(false);
 }
 
+    
 void
-Dbspj::scanFrag_start(Signal* signal,
-		      Ptr<Request> requestPtr,
-		      Ptr<TreeNode> treeNodePtr,
-		      SegmentedSectionPtr keyInfo)
+Dbspj::scanIndex_fixupBound(Ptr<ScanIndexFrag> fragPtr,
+                            Uint32 ptrI, Uint32 corrVal)
 {
-  const ScanFragReq* src = reinterpret_cast<const ScanFragReq*>(signal->getDataPtr());
-
-#if NOT_YET
-  Uint32 instanceNo = blockToInstance(signal->header.theReceiversBlockNumber);
-  treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH, instanceNo, getOwnNodeId());
-#else
-  treeNodePtr.p->m_send.m_ref = 
-    numberToRef(DBLQH, getInstanceKey(src->tableId,
-                                      src->fragmentNoKeyLen),
-                getOwnNodeId());
-#endif
-
-  Uint32 fragId = src->fragmentNoKeyLen;
-  Uint32 requestInfo = src->requestInfo;
-  Uint32 batch_size_bytes = src->batch_size_bytes;
-  Uint32 batch_size_rows = src->batch_size_rows;
-
-#ifdef VM_TRACE
-  Uint32 savePointId = src->savePointId;
-  Uint32 tableId = src->tableId;
-  Uint32 schemaVersion = src->schemaVersion;
-  Uint32 transId1 = src->transId1;
-  Uint32 transId2 = src->transId2;
-#endif
-
-  ndbassert(ScanFragReq::getLockMode(requestInfo) == 0);
-  ndbassert(ScanFragReq::getHoldLockFlag(requestInfo) == 0);
-  ndbassert(ScanFragReq::getKeyinfoFlag(requestInfo) == 0);
-  ndbassert(ScanFragReq::getReadCommittedFlag(requestInfo) == 1);
-  ndbassert(ScanFragReq::getLcpScanFlag(requestInfo) == 0);
-  //ScanFragReq::getAttrLen(requestInfo); // ignore
-  ndbassert(ScanFragReq::getReorgFlag(requestInfo) == 0);
-
-  Uint32 tupScanFlag = ScanFragReq::getTupScanFlag(requestInfo);
-  Uint32 rangeScanFlag = ScanFragReq::getRangeScanFlag(requestInfo);
-  Uint32 descendingFlag = ScanFragReq::getDescendingFlag(requestInfo);
-  Uint32 scanPrio = ScanFragReq::getScanPrio(requestInfo);
-
-  if (rangeScanFlag)
-  {
-    c_Counters.incr_counter(CI_RANGE_SCANS_RECEIVED, 1);
-  }
-  else
-  {
-    c_Counters.incr_counter(CI_TABLE_SCANS_RECEIVED, 1);
-  }
-
-  ScanFragReq * dst =(ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
-  Uint32 dst_requestInfo = dst->requestInfo;
   /**
-   * 'NoDiskFlag' should agree with information in treeNode
+   * Index bounds...need special tender and care...
+   *
+   * 1) Set #bound no, bound-size, and renumber attributes
    */
-  ndbassert(ScanFragReq::getNoDiskFlag(requestInfo) ==
-            ScanFragReq::getNoDiskFlag(dst_requestInfo));
-
-  ScanFragReq::setTupScanFlag(dst_requestInfo,tupScanFlag);
-  ScanFragReq::setRangeScanFlag(dst_requestInfo,rangeScanFlag);
-  ScanFragReq::setDescendingFlag(dst_requestInfo,descendingFlag);
-  ScanFragReq::setScanPrio(dst_requestInfo,scanPrio);
+  SectionReader r0(ptrI, getSectionSegmentPool());
+  ndbrequire(r0.step(fragPtr.p->m_range_builder.m_range_size));
+  Uint32 boundsz = r0.getSize() - fragPtr.p->m_range_builder.m_range_size;
+  Uint32 boundno = fragPtr.p->m_range_builder.m_range_cnt + 1;
 
-  dst->fragmentNoKeyLen = fragId;
-  dst->requestInfo = dst_requestInfo;
-  dst->batch_size_bytes = batch_size_bytes;
-  dst->batch_size_rows = batch_size_rows;
+  Uint32 tmp;
+  ndbrequire(r0.peekWord(&tmp));
+  tmp |= (boundsz << 16) | ((corrVal & 0xFFF) << 4);
+  ndbrequire(r0.updateWord(tmp));
+  ndbrequire(r0.step(1));
+
+  Uint32 id = 0;
+  Uint32 len32;
+  do 
+  {
+    ndbrequire(r0.peekWord(&tmp));
+    AttributeHeader ah(tmp);
+    Uint32 len = ah.getByteSize();
+    AttributeHeader::init(&tmp, id++, len);
+    ndbrequire(r0.updateWord(tmp));
+    len32 = (len + 3) >> 2;
+  } while (r0.step(1 + len32));
 
-#ifdef VM_TRACE
-  ndbassert(dst->savePointId == savePointId);
-  ndbassert(dst->tableId == tableId);
-  ndbassert(dst->schemaVersion == schemaVersion);
-  ndbassert(dst->transId1 == transId1);
-  ndbassert(dst->transId2 == transId2);
-#endif
+  fragPtr.p->m_range_builder.m_range_cnt = boundno;
+  fragPtr.p->m_range_builder.m_range_size = r0.getSize();
+}
 
-  treeNodePtr.p->m_send.m_keyInfoPtrI = keyInfo.i;
+void
+Dbspj::scanIndex_parent_batch_complete(Signal* signal,
+                                       Ptr<Request> requestPtr,
+                                       Ptr<TreeNode> treeNodePtr)
+{
+  jam();
 
-  scanFrag_send(signal, requestPtr, treeNodePtr);
+  /**
+   * When parent's batch is complete, we send our batch
+   */
+  scanIndex_send(signal, requestPtr, treeNodePtr);
 }
 
 void
-Dbspj::scanFrag_send(Signal* signal,
-		     Ptr<Request> requestPtr,
-		     Ptr<TreeNode> treeNodePtr)
+Dbspj::scanIndex_send(Signal* signal,
+                      Ptr<Request> requestPtr,
+                      Ptr<TreeNode> treeNodePtr)
 {
   jam();
 
-  ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_INACTIVE);
-  treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
-  requestPtr.p->m_cnt_active++;
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+  const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
 
-  ScanFragReq* req = reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
+  data.m_rows_received = 0;
+  data.m_rows_expecting = 0;
+  data.m_frags_outstanding = 0;
+
+  Uint32 cnt = 1;
+  Uint32 bs_rows = org->batch_size_rows;
+  Uint32 bs_bytes = org->batch_size_bytes;
+  if (treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL)
+  {
+    jam();
+    ndbrequire(data.m_fragCount > data.m_frags_complete);
+    cnt = data.m_fragCount - data.m_frags_complete;
+    bs_rows /= cnt;
+    bs_bytes /= cnt;
 
-  memcpy(req, treeNodePtr.p->m_scanfrag_data.m_scanFragReq,
-	 sizeof(treeNodePtr.p->m_scanfrag_data.m_scanFragReq));
+    if (bs_rows == 0)
+      bs_rows = 1;
+  }
+
+  /**
+   * keys,
+   * - sliced out to each ScanIndexFrag => release = true
+   * - all kept on first ScanIndexFrag => release = false
+   */
+  Uint32 prunemask = TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE;
+  bool release = (treeNodePtr.p->m_bits & prunemask) != 0;
+
+  ScanFragReq* req = reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
+  memcpy(req, org, sizeof(data.m_scanFragReq));
   req->variableData[0] = requestPtr.p->m_rootResultData;
   req->variableData[1] = treeNodePtr.p->m_send.m_correlation;
+  req->batch_size_bytes = bs_bytes;
+  req->batch_size_rows = bs_rows;
 
-  SectionHandle handle(this);
+  Ptr<ScanIndexFrag> fragPtr;
+  Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
 
-  Uint32 ref = treeNodePtr.p->m_send.m_ref;
-  Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
-  Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
+  Uint32 keyInfoPtrI;
+  if (release == false)
+  {
+    jam();
+    list.first(fragPtr);
+    keyInfoPtrI = fragPtr.p->m_rangePtrI;
+    if (keyInfoPtrI == RNIL)
+    {
+      jam();
+      return;
+    }
+  }
 
-  if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
+  m_scanindexfrag_pool.getPtr(fragPtr, data.m_currentFragmentPtrI);
+  for (Uint32 i = 0; i < cnt && !fragPtr.isNull(); list.next(fragPtr))
   {
     jam();
+
+    SectionHandle handle(this);
+
+    Uint32 ref = fragPtr.p->m_ref;
+    Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
+
     /**
-     * Pass sections to send
+     * Set data specific for this fragment
      */
-    treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
-    treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
-  }
-  else
-  {
-    if (keyInfoPtrI != RNIL)
+    req->fragmentNoKeyLen = fragPtr.p->m_fragId;
+
+    if (release)
     {
       jam();
-      if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
-      {
-        jam();
-        Uint32 tmp = RNIL;
-        ndbrequire(dupSection(tmp, keyInfoPtrI)); // TODO handle error
-        keyInfoPtrI = tmp;
-      }
-      else
+      keyInfoPtrI = fragPtr.p->m_rangePtrI;
+      if (keyInfoPtrI == RNIL)
       {
         jam();
-        treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+        fragPtr.p->m_state = 1; // complete
+        continue;
       }
-    }
 
-    if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
-    {
-      jam();
+      /**
+       * If we'll use sendSignal() and we need to send the attrInfo several
+       *   times, we need to copy them
+       */
       Uint32 tmp = RNIL;
       ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
       attrInfoPtrI = tmp;
     }
+    
+    getSection(handle.m_ptr[0], attrInfoPtrI);
+    getSection(handle.m_ptr[1], keyInfoPtrI);
+    handle.m_cnt = 2;
+
+#if defined DEBUG_SCAN_FRAGREQ
+    {
+      ndbout_c("SCAN_FRAGREQ to %x", ref);
+      printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
+                        NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
+                        DBLQH);
+      printf("ATTRINFO: ");
+      print(handle.m_ptr[0], stdout);
+      printf("KEYINFO: ");
+      print(handle.m_ptr[1], stdout);
+    }
+#endif
+
+    if (refToNode(ref) == getOwnNodeId())
+    {
+      c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
+    }
+    else
+    {
+      c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1);
+    }
+    
+    if (release)
+    {
+      jam();
+      sendSignal(ref, GSN_SCAN_FRAGREQ, signal,
+                 NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle);
+    }
     else
     {
       jam();
-      treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+      sendSignalNoRelease(ref, GSN_SCAN_FRAGREQ, signal,
+                          NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle);
     }
-  }
-
-  getSection(handle.m_ptr[0], attrInfoPtrI);
-  handle.m_cnt = 1;
+    handle.clear();
 
-  if (keyInfoPtrI != RNIL)
-  {
-    jam();
-    getSection(handle.m_ptr[1], keyInfoPtrI);
-    handle.m_cnt = 2;
+    i++;
+    fragPtr.p->m_state = 0; // running
+    data.m_frags_outstanding++;
   }
 
-#ifdef DEBUG_SCAN_FRAGREQ
-  ndbout_c("SCAN_FRAGREQ to %x", ref);
-  printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
-                    NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
-                    DBLQH);
-  printf("ATTRINFO: ");
-  print(handle.m_ptr[0], stdout);
-  if (handle.m_cnt > 1)
+  if (fragPtr.i == RNIL)
   {
-    printf("KEYINFO: ");
-    print(handle.m_ptr[1], stdout);
+    jam();
+    list.first(fragPtr);
   }
-#endif
+  data.m_currentFragmentPtrI = fragPtr.i;
 
-  if (ScanFragReq::getRangeScanFlag(req->requestInfo))
-  {
-    c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
-  }
-  else
+  if (data.m_frags_outstanding == 0)
   {
-    c_Counters.incr_counter(CI_LOCAL_TABLE_SCANS_SENT, 1);
+    jam();
+    return;
   }
 
-  sendSignal(ref, GSN_SCAN_FRAGREQ, signal,
-	     NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
-             JBB, &handle);
-
-  ndbassert (!treeNodePtr.p->m_scanfrag_data.m_pending_close);
-  treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_RUNNING;
-  treeNodePtr.p->m_scanfrag_data.m_scan_status = 0;
-  treeNodePtr.p->m_scanfrag_data.m_scan_fragconf_received = false;
-  treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_rows_expecting = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_keyconfs_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_silent_keyconfs_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_keyrefs_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_keyreqs_sent = 0;
-  treeNodePtr.p->m_scanfrag_data.m_missing_descendant_rows = 0;
-
-  /**
-   * Save position where next-scan-req should continue or close
-   */
-  treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_RUNNING;
-  requestPtr.p->m_currentNodePtrI = treeNodePtr.i;
-}
-
-/** Return true if scan batch is complete. This happens when all scan 
- * rows and all results for descendant lookups have been received.*/
-static bool isScanComplete(const Dbspj::ScanFragData& scanFragData)
-{
-  return scanFragData.m_scan_fragconf_received &&
-    // All rows for root scan received.
-    scanFragData.m_rows_received == scanFragData.m_rows_expecting &&
-    // All rows for descendant lookups received.
-    scanFragData.m_missing_descendant_rows == 0 &&
-    // All descendant lookup operations are complete.
-    scanFragData.m_descendant_keyreqs_sent == 
-    scanFragData.m_descendant_keyconfs_received + 
-    scanFragData.m_descendant_silent_keyconfs_received + 
-    scanFragData.m_descendant_keyrefs_received;
+  requestPtr.p->m_cnt_active ++;
+  requestPtr.p->m_outstanding++;
+  treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
 }
-
+                      
 void
-Dbspj::scanFrag_execTRANSID_AI(Signal* signal,
+Dbspj::scanIndex_execTRANSID_AI(Signal* signal,
 			       Ptr<Request> requestPtr,
 			       Ptr<TreeNode> treeNodePtr,
-			       const RowRef & rowRef)
+			       const RowPtr & rowRef)
 {
   jam();
-  treeNodePtr.p->m_scanfrag_data.m_rows_received++;
+  
+  LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+  Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
+  Dependency_map::ConstDataBufferIterator it;
 
   {
-    LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
-    Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
-    Dependency_map::ConstDataBufferIterator it;
     for (list.first(it); !it.isNull(); list.next(it))
     {
       jam();
       Ptr<TreeNode> childPtr;
       m_treenode_pool.getPtr(childPtr, * it.data);
-      ndbrequire(childPtr.p->m_info != 0&&childPtr.p->m_info->m_start_child!=0);
-      (this->*(childPtr.p->m_info->m_start_child))(signal,
-                                                   requestPtr, childPtr,rowRef);
+      ndbrequire(childPtr.p->m_info != 0&&childPtr.p->m_info->m_parent_row!=0);
+      (this->*(childPtr.p->m_info->m_parent_row))(signal,
+                                                  requestPtr, childPtr,rowRef);
     }
   }
 
-  if (isScanComplete(treeNodePtr.p->m_scanfrag_data))
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+  data.m_rows_received++;
+
+  if (data.m_frags_outstanding == 0 && 
+      data.m_rows_received == data.m_rows_expecting)
   {
     jam();
-    scanFrag_batch_complete(signal, requestPtr, treeNodePtr);
+    /**
+     * Finished...
+     */
+    if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+    {
+      jam();
+      reportBatchComplete(signal, requestPtr, treeNodePtr);
+    }
+    
+    checkBatchComplete(signal, requestPtr, 1);
+    return;
   }
 }
 
 void
-Dbspj::scanFrag_execSCAN_FRAGREF(Signal* signal,
-                                 Ptr<Request> requestPtr,
-                                 Ptr<TreeNode> treeNodePtr)
+Dbspj::scanIndex_execSCAN_FRAGCONF(Signal* signal,
+                                   Ptr<Request> requestPtr,
+                                   Ptr<TreeNode> treeNodePtr)
 {
-  const ScanFragRef* const rep = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
-  Uint32 errCode = rep->errorCode;
-
-  /**
-   * Return back to api...
-   *   NOTE: assume that signal is tampered with
-   */
-  ndbassert (rep->transId1 == requestPtr.p->m_transId[0]);
-  ndbassert (rep->transId2 == requestPtr.p->m_transId[1]);
+  jam();
 
-  DEBUG("scanFrag_execSCAN_FRAGREF, rep->senderData:" << rep->senderData
-         << ", requestPtr.p->m_senderData:" << requestPtr.p->m_senderData);
+  const ScanFragConf * conf = (const ScanFragConf*)(signal->getDataPtr());
 
-  ScanFragRef* const ref = reinterpret_cast<ScanFragRef*>(signal->getDataPtrSend());
+  Uint32 rows = conf->completedOps;
+  Uint32 done = conf->fragmentCompleted;
 
-  ref->senderData = requestPtr.p->m_senderData;
-  ref->errorCode = errCode;
-  ref->transId1 = requestPtr.p->m_transId[0];
-  ref->transId2 = requestPtr.p->m_transId[1];
+  requestPtr.p->m_rows += rows;
 
-  sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGREF, signal,
-	     ScanFragRef::SignalLength, JBB);
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
 
-  treeNodePtr.p->m_scanfrag_data.m_scan_fragconf_received = true;
-//treeNodePtr.p->m_scanfrag_data.m_scan_status = 2;  // (2=ZSCAN_FRAG_CLOSED)
-  ndbassert (isScanComplete(treeNodePtr.p->m_scanfrag_data));
+  if (!treeNodePtr.p->isLeaf())
+  {
+    jam();
+    data.m_rows_expecting += rows;
+  }
+  ndbrequire(data.m_frags_outstanding);
+  data.m_frags_outstanding--;
 
-  /**
-   * SCAN_FRAGREF implies that datanodes closed the cursor.
-   *  -> Pending close is effectively a NOOP, reset it
-   */
-  if (treeNodePtr.p->m_scanfrag_data.m_pending_close)
+  if (done)
   {
     jam();
-    treeNodePtr.p->m_scanfrag_data.m_pending_close = false;
-    DEBUG(" SCAN_FRAGREF, had pending close which can be ignored (is closed)");
+    Ptr<ScanIndexFrag> fragPtr;
+    {
+      Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+      scanIndex_findFrag(list, fragPtr, conf->fragId);
+    }
+    ndbrequire(fragPtr.p->m_state == 0);
+    fragPtr.p->m_state = done;
+    data.m_frags_complete ++;
+    ndbrequire(data.m_frags_complete <= data.m_fragCount);
+
+    if (data.m_frags_complete == data.m_fragCount)
+    {
+      jam();
+      ndbrequire(requestPtr.p->m_cnt_active);
+      requestPtr.p->m_cnt_active --;
+      treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
+      data.m_frags_complete = 0; // reset
+    }
   }
 
-  /**
-   * Cleanup operation on SPJ block, remove all allocated resources.
-   */
+  if (data.m_frags_outstanding == 0 && 
+      data.m_rows_received == data.m_rows_expecting)
   {
     jam();
-    treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_IDLE;
-    nodeFinished(signal, requestPtr, treeNodePtr);
+    /**
+     * Finished...
+     */
+    if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+    {
+      jam();
+      reportBatchComplete(signal, requestPtr, treeNodePtr);
+    }
+    
+    checkBatchComplete(signal, requestPtr, 1);
+    return;
   }
 }
 
-
 void
-Dbspj::scanFrag_execSCAN_FRAGCONF(Signal* signal,
+Dbspj::scanIndex_execSCAN_FRAGREF(Signal* signal,
                                   Ptr<Request> requestPtr,
                                   Ptr<TreeNode> treeNodePtr)
 {
-  const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
-  Uint32 rows = conf->completedOps;
-  Uint32 done = conf->fragmentCompleted;
+  jam();
+  const ScanFragRef * ref = (const ScanFragRef*)(signal->getDataPtr());
 
-  ndbrequire(done <= 2); // 0, 1, 2 (=ZSCAN_FRAG_CLOSED)
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
 
-  treeNodePtr.p->m_scanfrag_data.m_scan_status = done;
-  treeNodePtr.p->m_scanfrag_data.m_rows_expecting = rows;
-  if (treeNodePtr.p->isLeaf())
-  {
-    /**
-     * If this is a leaf node, then no rows will be sent to the SPJ block,
-     * as there are no child operations to instantiate.
-     */
-    treeNodePtr.p->m_scanfrag_data.m_rows_received = rows;
-  }
-  treeNodePtr.p->m_scanfrag_data.m_scan_fragconf_received = true;
-  if (isScanComplete(treeNodePtr.p->m_scanfrag_data))
+  Ptr<ScanIndexFrag> fragPtr;
   {
-    jam();
-    scanFrag_batch_complete(signal, requestPtr, treeNodePtr);
+    Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+    scanIndex_findFrag(list, fragPtr, ref->fragId);
   }
-}
+  ndbrequire(false);
+}  
 
 void
-Dbspj::scanFrag_batch_complete(Signal* signal,
-                               Ptr<Request> requestPtr,
-                               Ptr<TreeNode> treeNodePtr)
+Dbspj::scanIndex_execSCAN_NEXTREQ(Signal* signal, 
+                                  Ptr<Request> requestPtr,
+                                  Ptr<TreeNode> treeNodePtr)
 {
-  DEBUG("scanFrag_batch_complete()");
+  jam();
 
-  if (treeNodePtr.p->m_scanfrag_data.m_pending_close)
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+
+  data.m_rows_received = 0;
+  data.m_rows_expecting = 0;
+  ndbassert(data.m_frags_outstanding == 0);
+
+  ndbrequire(data.m_fragCount > data.m_frags_complete);
+  Uint32 cnt = data.m_fragCount - data.m_frags_complete;
+  if ((treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL) == 0)
   {
     jam();
-    ndbrequire(treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_RUNNING);
-    treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_STARTED;
-
-    DEBUG("scanFrag_batch_complete() - has pending close, ignore this reply, request close");
-
-    ScanFragNextReq* req = reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
+    cnt = 1;
+  }
 
-    /**
-     * SCAN_NEXTREQ(close) was requested while we where waiting for 
-     * datanodes to complete this request. 
-     *   - Send close request to LQH now.
-     *   - Suppress reply to TC/API, will reply later when close is conf'ed
-     */
-    req->closeFlag = ZTRUE;
-    req->senderData = treeNodePtr.i;
-    req->transId1 = requestPtr.p->m_transId[0];
-    req->transId2 = requestPtr.p->m_transId[1];
-    req->batch_size_rows = 0;
-    req->batch_size_bytes = 0;
+  const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
+  ScanFragNextReq* req = 
+    reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
+  req->senderData = treeNodePtr.i;
+  req->closeFlag = 0;
+  req->transId1 = requestPtr.p->m_transId[0];
+  req->transId2 = requestPtr.p->m_transId[1];
+  req->batch_size_rows = org->batch_size_rows;
+  req->batch_size_bytes = org->batch_size_bytes;
+  
+  Ptr<ScanIndexFrag> fragPtr;
+  m_scanindexfrag_pool.getPtr(fragPtr, data.m_currentFragmentPtrI);
+  Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+  for (Uint32 i = 0; i < cnt && !fragPtr.isNull(); list.next(fragPtr))
+  {
+    jam();
+    if (fragPtr.p->m_state == 0)
+    {
+      jam();
 
-    treeNodePtr.p->m_scanfrag_data.m_pending_close = false;
-    scanFrag_execSCAN_NEXTREQ(signal, requestPtr, treeNodePtr);
-    return;
+      i++;
+      data.m_frags_outstanding++;
+      sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal, 
+                 ScanFragNextReq::SignalLength, 
+                 JBB);
+    }
+    else if (fragPtr.p->m_state == Uint16(~0))
+    {
+      jam();
+      /**
+       * not sent...this should only be possible with ! T_SCAN_PARALLEL
+       *   which is not yet implemented
+       */
+      ndbrequire(false);
+    }
+  }
+  
+  if (fragPtr.i == RNIL)
+  {
+    jam();
+    list.first(fragPtr);
   }
-
+  data.m_currentFragmentPtrI = fragPtr.i;
+  
   /**
-   * one batch complete...
-   *   if tree contains several scans...this is harder...
-   *   but for now just reply to TC (and possibly cleanup)
+   * cursor should not have been positioned here...
+   *   unless we actually had something more to send.
+   *   so require that we did actually send something
    */
-  ScanFragConf* conf = reinterpret_cast<ScanFragConf*>(signal->getDataPtrSend());
-  conf->senderData = requestPtr.p->m_senderData;
-  conf->transId1 = requestPtr.p->m_transId[0];
-  conf->transId2 = requestPtr.p->m_transId[1];
-  conf->completedOps = treeNodePtr.p->m_scanfrag_data.m_rows_expecting
-    + treeNodePtr.p->m_scanfrag_data.m_descendant_keyconfs_received;
-  conf->fragmentCompleted = treeNodePtr.p->m_scanfrag_data.m_scan_status;
-  conf->total_len = 0; // Not supported...
+  ndbrequire(data.m_frags_outstanding > 0);
 
-  sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGCONF, signal,
-	     ScanFragConf::SignalLength, JBB);
+  requestPtr.p->m_cnt_active ++;
+  requestPtr.p->m_outstanding++;
+  ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
+}
 
-  if (treeNodePtr.p->m_scanfrag_data.m_scan_status == 2)
-  {
-    jam();
-    ndbrequire(treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_RUNNING ||
-               treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_CLOSING);
-    /**
-     * EOF for scan
-     */
-    treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_IDLE;
-    nodeFinished(signal, requestPtr, treeNodePtr);
-  }
-  else
+void
+Dbspj::scanIndex_complete(Signal* signal,
+                          Ptr<Request> requestPtr,
+                          Ptr<TreeNode> treeNodePtr)
+{
+  jam();
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+  ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanindex_data.m_scanFragReq;
+  if (!data.m_fragments.isEmpty())
   {
     jam();
-    ndbrequire(treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_RUNNING);
-    /**
-     * Check position where next-scan-req should continue
-     */
-    treeNodePtr.p->m_scanfrag_data.m_scan_state = ScanFragData::SF_STARTED;
-    assert(requestPtr.p->m_currentNodePtrI == treeNodePtr.i);
+    DihScanTabCompleteRep* rep=(DihScanTabCompleteRep*)signal->getDataPtrSend();
+    rep->tableId = dst->tableId;
+    rep->scanCookie = data.m_scanCookie;
+    sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_COMPLETE_REP,
+               signal, DihScanTabCompleteRep::SignalLength, JBB);
   }
 }
 
 void
-Dbspj::scanFrag_start_child(Signal* signal,
-                            Ptr<Request> requestPtr,
-                            Ptr<TreeNode> treeNodePtr,
-                            const RowRef & rowRef)
+Dbspj::scanIndex_abort(Signal* signal,
+                       Ptr<Request> requestPtr,
+                       Ptr<TreeNode> treeNodePtr)
 {
   jam();
   ndbrequire(false);
 }
 
-void
-Dbspj::scanFrag_execSCAN_NEXTREQ(Signal* signal, 
-                                 Ptr<Request> requestPtr,
-                                 Ptr<TreeNode> treeNodePtr)
+Uint32
+Dbspj::scanIndex_execNODE_FAILREP(Signal* signal,
+                                  Ptr<Request> requestPtr,
+                                  Ptr<TreeNode> treeNodePtr,
+                                  NdbNodeBitmask nodes)
 {
-  jamEntry();
-  ndbassert (treeNodePtr.p->m_scanfrag_data.m_scan_state == ScanFragData::SF_STARTED);
-
-  ScanFragNextReq* nextReq = reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
-  nextReq->senderData = treeNodePtr.i;
-  ndbassert (nextReq->transId1 == requestPtr.p->m_transId[0]);
-  ndbassert (nextReq->transId2 == requestPtr.p->m_transId[1]);
-
-  DEBUG("scanFrag_execSCAN_NEXTREQ to: " << treeNodePtr.p->m_send.m_ref
-      << ", senderData: " << nextReq->senderData);
-
-  sendSignal(treeNodePtr.p->m_send.m_ref, 
-             GSN_SCAN_NEXTREQ, 
-             signal, 
-             ScanFragNextReq::SignalLength, 
-             JBB);
-
-  treeNodePtr.p->m_scanfrag_data.m_scan_state = (nextReq->closeFlag == ZTRUE)
-    ? ScanFragData::SF_CLOSING 
-    : ScanFragData::SF_RUNNING;
-
-  treeNodePtr.p->m_scanfrag_data.m_scan_status = 0;
-  treeNodePtr.p->m_scanfrag_data.m_scan_fragconf_received = false;
-  treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_rows_expecting = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_keyconfs_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_silent_keyconfs_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_keyrefs_received = 0;
-  treeNodePtr.p->m_scanfrag_data.m_descendant_keyreqs_sent = 0;
-  treeNodePtr.p->m_scanfrag_data.m_missing_descendant_rows = 0;
-}//Dbspj::scanFrag_execSCAN_NEXTREQ()
-
+  jam();
+  ndbrequire(false);
+  return 0;
+}
 
 void
-Dbspj::scanFrag_cleanup(Ptr<Request> requestPtr,
-                        Ptr<TreeNode> treeNodePtr)
+Dbspj::scanIndex_cleanup(Ptr<Request> requestPtr,
+                         Ptr<TreeNode> treeNodePtr)
 {
-  cleanup_common(requestPtr, treeNodePtr);
-}
+  ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
+  Local_ScanIndexFrag_list list(m_scanindexfrag_pool, data.m_fragments);
+  list.remove();
 
-void
-Dbspj::scanFrag_count_descendant_signal(Signal* signal,
-                                        Ptr<Request> requestPtr,
-                                        Ptr<TreeNode> treeNodePtr,
-                                        Ptr<TreeNode> rootPtr,
-                                        Uint32 globalSignalNo)
-{
-  const bool trace = false;
-
-  switch(globalSignalNo){
-  case GSN_TRANSID_AI:
-    rootPtr.p->m_scanfrag_data.m_missing_descendant_rows--;
-    if (trace)
-    {
-      ndbout << "Dbspj::scanFrag_count_descendant_signal() decremented "
-        "m_scanfrag_data.m_missing_descendant_rows to "<< 
-        rootPtr.p->m_scanfrag_data.m_missing_descendant_rows << endl;
-    }
-    break;
-  case GSN_LQHKEYCONF:
-    jam();
-    if (treeNodePtr.p->m_bits & TreeNode::T_USER_PROJECTION)
-    {
-      rootPtr.p->m_scanfrag_data.m_descendant_keyconfs_received++;
-      if (trace)
-      {
-        ndbout << "Dbspj::scanFrag_count_descendant_signal() incremented "
-          "m_scanfrag_data.m_descendant_keyconfs_received to "<< 
-          rootPtr.p->m_scanfrag_data.m_descendant_keyconfs_received << endl;
-      }
-    }
-    else
-    {
-      /* There is no user projection. Typically, this will be the operation
-       * that retrieves an index tuple as part of an index lookup operation.
-       * (Only the base table tuple will then be sent to the API.)*/
-      rootPtr.p->m_scanfrag_data.m_descendant_silent_keyconfs_received++;
-      if (trace)
-      {
-        ndbout << "Dbspj::scanFrag_count_descendant_signal() incremented "
-          "m_scanfrag_data.m_descendant_silent_keyconfs_received to "
-               << rootPtr.p->m_scanfrag_data.
-          m_descendant_silent_keyconfs_received 
-               << endl;
-      }
-    }
-    // Check if this is a non-leaf.
-    if (treeNodePtr.p->m_dependent_nodes.firstItem!=RNIL)
-    {
-      /* Since this is a non-leaf, the SPJ block should also receive
-       * a TRANSID_AI message for this operation.*/
-      rootPtr.p->m_scanfrag_data.m_missing_descendant_rows++;
-      if (trace)
-      {
-        ndbout << "Dbspj::scanFrag_count_descendant_signal() incremented "
-          "m_scanfrag_data.m_missing_descendant_rows to "<< 
-          rootPtr.p->m_scanfrag_data.m_missing_descendant_rows << endl;
-      }
-    }
-    break;
-  case GSN_LQHKEYREF:
-    jam();
-    rootPtr.p->m_scanfrag_data.m_descendant_keyrefs_received++;
-    if (trace)
-    {
-      ndbout << "Dbspj::scanFrag_count_descendant_signal() incremented "
-        "m_scanfrag_data.m_descendant_keyrefs_received to "<< 
-        rootPtr.p->m_scanfrag_data.m_descendant_keyrefs_received << endl;
-    }
-    break;
-  case GSN_LQHKEYREQ:
-    jam();
-    rootPtr.p->m_scanfrag_data.m_descendant_keyreqs_sent++;
-    if (trace)
-    {
-      ndbout << "Dbspj::scanFrag_count_descendant_signal() incremented "
-        "m_scanfrag_data.m_descendant_keyreqs_sent to "<< 
-        rootPtr.p->m_scanfrag_data.m_descendant_keyreqs_sent << endl;
-    }
-    break;
-  default:
+  if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
+  {
     jam();
-    ndbrequire(false);
+    LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
+    Local_pattern_store pattern(pool, data.m_prunePattern);
+    pattern.release();    
   }
-  if (isScanComplete(rootPtr.p->m_scanfrag_data))
+  else if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
   {
     jam();
-    ndbrequire(globalSignalNo!=GSN_LQHKEYREQ);
-    scanFrag_batch_complete(signal, requestPtr, rootPtr);
+    if (data.m_constPrunePtrI != RNIL)
+    {
+      jam();
+      releaseSection(data.m_constPrunePtrI);
+      data.m_constPrunePtrI = RNIL;
+    }
   }
+
+  cleanup_common(requestPtr, treeNodePtr);
 }
 
 /**
- * END - MODULE SCAN FRAG
+ * END - MODULE SCAN INDEX
  */
 
 /**
@@ -2566,6 +4846,8 @@ Dbspj::getOpInfo(Uint32 op)
     return &Dbspj::g_LookupOpInfo;
   case QueryNode::QN_SCAN_FRAG:
     return &Dbspj::g_ScanFragOpInfo;
+  case QueryNode::QN_SCAN_INDEX:
+    return &Dbspj::g_ScanIndexOpInfo;
   default:
     return 0;
   }
@@ -2628,17 +4910,19 @@ error:
  *   which can be used to do random access inside the row
  */
 Uint32
-Dbspj::buildRowHeader(RowRef::Header * header, SegmentedSectionPtr ptr)
+Dbspj::buildRowHeader(RowPtr::Header * header, SegmentedSectionPtr ptr)
 {
   Uint32 tmp, len;
-  Uint32 * dst = (Uint32*)header->m_headers;
+  Uint32 * dst = header->m_offset;
   const Uint32 * const save = dst;
   SectionReader r0(ptr, getSectionSegmentPool());
+  Uint32 offset = 0;
   do
   {
+    * dst++ = offset;
     r0.getWord(&tmp);
     len = AttributeHeader::getDataSize(tmp);
-    * dst++ = tmp;
+    offset += 1 + len;
   } while (r0.step(len));
 
   return header->m_len = static_cast<Uint32>(dst - save);
@@ -2649,15 +4933,17 @@ Dbspj::buildRowHeader(RowRef::Header * h
  *   which can be used to do random access inside the row
  */
 Uint32
-Dbspj::buildRowHeader(RowRef::Header * header, const Uint32 *& src, Uint32 len)
+Dbspj::buildRowHeader(RowPtr::Header * header, const Uint32 *& src, Uint32 len)
 {
-  Uint32 * dst = (Uint32*)header->m_headers;
+  Uint32 * dst = header->m_offset;
   const Uint32 * save = dst;
+  Uint32 offset = 0;
   for (Uint32 i = 0; i<len; i++)
   {
+    * dst ++ = offset;
     Uint32 tmp = * src++;
     Uint32 tmp_len = AttributeHeader::getDataSize(tmp);
-    * dst++ = tmp;
+    offset += 1 + tmp_len;
     src += tmp_len;
   }
 
@@ -2680,17 +4966,12 @@ Dbspj::appendToPattern(Local_pattern_sto
 
 Uint32
 Dbspj::appendParamToPattern(Local_pattern_store& dst,
-                          const RowRef::Linear & row, Uint32 col)
+                          const RowPtr::Linear & row, Uint32 col)
 {
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   const Uint32 * ptr = row.m_data + offset;
   Uint32 len = AttributeHeader::getDataSize(* ptr ++);
   /* Param COL's converted to DATA when appended to pattern */
@@ -2723,7 +5004,7 @@ err:
 }
 
 void
-Dbspj::getCorrelationData(const RowRef::Section & row, 
+Dbspj::getCorrelationData(const RowPtr::Section & row, 
                           Uint32 col,
                           Uint32& rootStreamId,
                           Uint32& correlationNumber)
@@ -2731,14 +5012,9 @@ Dbspj::getCorrelationData(const RowRef::
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
   SegmentedSectionPtr ptr(row.m_dataPtr);
   SectionReader reader(ptr, getSectionSegmentPool());
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   ndbrequire(reader.step(offset));
   Uint32 tmp;
   ndbrequire(reader.getWord(&tmp));
@@ -2748,20 +5024,32 @@ Dbspj::getCorrelationData(const RowRef::
   ndbrequire(reader.getWord(&correlationNumber));
 }
 
+void
+Dbspj::getCorrelationData(const RowPtr::Linear & row, 
+                          Uint32 col,
+                          Uint32& rootStreamId,
+                          Uint32& correlationNumber)
+{
+  /**
+   * TODO handle errors
+   */
+  Uint32 offset = row.m_header->m_offset[col];
+  Uint32 tmp = row.m_data[offset];
+  Uint32 len = AttributeHeader::getDataSize(tmp);
+  ndbrequire(len == 2);
+  rootStreamId = row.m_data[offset+1];
+  correlationNumber = row.m_data[offset+2];
+}
+
 Uint32
-Dbspj::appendColToSection(Uint32 & dst, const RowRef::Section & row, Uint32 col)
+Dbspj::appendColToSection(Uint32 & dst, const RowPtr::Section & row, Uint32 col)
 {
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
   SegmentedSectionPtr ptr(row.m_dataPtr);
   SectionReader reader(ptr, getSectionSegmentPool());
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   ndbrequire(reader.step(offset));
   Uint32 tmp;
   ndbrequire(reader.getWord(&tmp));
@@ -2770,55 +5058,40 @@ Dbspj::appendColToSection(Uint32 & dst, 
 }
 
 Uint32
-Dbspj::appendColToSection(Uint32 & dst, const RowRef::Linear & row, Uint32 col)
+Dbspj::appendColToSection(Uint32 & dst, const RowPtr::Linear & row, Uint32 col)
 {
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   const Uint32 * ptr = row.m_data + offset;
   Uint32 len = AttributeHeader::getDataSize(* ptr ++);
   return appendToSection(dst, ptr, len) ? 0 : DbspjErr::InvalidPattern;
 }
 
 Uint32
-Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowRef::Linear & row, 
+Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowPtr::Linear & row, 
                                Uint32 col)
 {
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   const Uint32 * ptr = row.m_data + offset;
   Uint32 len = AttributeHeader::getDataSize(* ptr);
   return appendToSection(dst, ptr, 1 + len) ? 0 : DbspjErr::InvalidPattern;
 }
 
 Uint32
-Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowRef::Section & row, 
+Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowPtr::Section & row, 
                                Uint32 col)
 {
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
   SegmentedSectionPtr ptr(row.m_dataPtr);
   SectionReader reader(ptr, getSectionSegmentPool());
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   ndbrequire(reader.step(offset));
   Uint32 tmp;
   ndbrequire(reader.peekWord(&tmp));
@@ -2831,19 +5104,14 @@ Dbspj::appendAttrinfoToSection(Uint32 & 
  * a fragment id and the composite PK value (all PK columns concatenated)
  */
 Uint32
-Dbspj::appendPkColToSection(Uint32 & dst, const RowRef::Section & row, Uint32 col)
+Dbspj::appendPkColToSection(Uint32 & dst, const RowPtr::Section & row, Uint32 col)
 {
   /**
    * TODO handle errors
    */
-  const Uint32 * header = (const Uint32*)row.m_header->m_headers;
   SegmentedSectionPtr ptr(row.m_dataPtr);
   SectionReader reader(ptr, getSectionSegmentPool());
-  Uint32 offset = 0;
-  for (Uint32 i = 0; i<col; i++)
-  {
-    offset += 1 + AttributeHeader::getDataSize(* header++);
-  }
+  Uint32 offset = row.m_header->m_offset[col];
   ndbrequire(reader.step(offset));
   Uint32 tmp;
   ndbrequire(reader.getWord(&tmp));
@@ -2852,6 +5120,134 @@ Dbspj::appendPkColToSection(Uint32 & dst
   return appendTreeToSection(dst, reader, len-1);
 }
 
+/**
+ * 'PkCol' is the composite NDB$PK column in an unique index consisting of
+ * a fragment id and the composite PK value (all PK columns concatenated)
+ */
+Uint32
+Dbspj::appendPkColToSection(Uint32 & dst, const RowPtr::Linear & row, Uint32 col)
+{
+  Uint32 offset = row.m_header->m_offset[col];
+  Uint32 tmp = row.m_data[offset];
+  Uint32 len = AttributeHeader::getDataSize(tmp);
+  return appendToSection(dst, row.m_data+offset+2, len - 1) ? 0 : /** todo error code */ 1;
+}
+
+Uint32
+Dbspj::appendFromParent(Uint32 & dst, Local_pattern_store& pattern,
+                        Local_pattern_store::ConstDataBufferIterator& it,
+                        Uint32 levels, const RowPtr & rowptr)
+{
+  Ptr<TreeNode> treeNodePtr;
+  m_treenode_pool.getPtr(treeNodePtr, rowptr.m_src_node_ptrI);
+  Uint32 corrVal = rowptr.m_src_correlation;
+  RowPtr targetRow;
+  while (levels--)
+  {
+    jam();
+    if (unlikely(treeNodePtr.p->m_parentPtrI == RNIL))
+    {
+      DEBUG_CRASH();
+      return DbspjErr::InvalidPattern;
+    }
+    m_treenode_pool.getPtr(treeNodePtr, treeNodePtr.p->m_parentPtrI);
+    if (unlikely((treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP) == 0))
+    {
+      DEBUG_CRASH();
+      return DbspjErr::InvalidPattern;
+    }
+    
+    RowRef ref;
+    treeNodePtr.p->m_row_map.copyto(ref);
+    Uint32 allocator = ref.m_allocator;
+    const Uint32 * mapptr;
+    if (allocator == 0)
+    {
+      jam();
+      mapptr = get_row_ptr_var(ref);
+    }
+    else
+    {
+      jam();
+      mapptr = get_row_ptr_var(ref);
+    }
+    
+    Uint32 pos = corrVal >> 16; // parent corr-val
+    if (unlikely(! (pos < treeNodePtr.p->m_row_map.m_size)))
+    {
+      DEBUG_CRASH();
+      return DbspjErr::InvalidPattern;
+    }
+
+    // load ref to parent row
+    treeNodePtr.p->m_row_map.load(mapptr, pos, ref);
+    
+    const Uint32 * rowptr;
+    if (allocator == 0)
+    {
+      jam();
+      rowptr = get_row_ptr_stack(ref);
+    }
+    else
+    {
+      jam();
+      rowptr = get_row_ptr_var(ref);
+    }
+    setupRowPtr(treeNodePtr, targetRow, ref, rowptr);
+
+    if (levels)
+    {
+      jam();
+      Uint32 dummy;
+      getCorrelationData(targetRow.m_row_data.m_linear,
+                         targetRow.m_row_data.m_linear.m_header->m_len - 1,
+                         dummy,
+                         corrVal);
+    }
+  }
+
+  if (unlikely(it.isNull()))
+  {
+    DEBUG_CRASH();
+    return DbspjErr::InvalidPattern;
+  }
+
+  Uint32 info = *it.data;
+  Uint32 type = QueryPattern::getType(info);
+  Uint32 val = QueryPattern::getLength(info);
+  pattern.next(it);
+  switch(type){
+  case QueryPattern::P_COL:
+    jam();
+    return appendColToSection(dst, targetRow.m_row_data.m_linear, val);
+    break;
+  case QueryPattern::P_UNQ_PK:
+    jam();
+    return appendPkColToSection(dst, targetRow.m_row_data.m_linear, val);
+    break;
+  case QueryPattern::P_ATTRINFO:
+    jam();
+    return appendAttrinfoToSection(dst, targetRow.m_row_data.m_linear, val);
+    break;
+  case QueryPattern::P_DATA:
+    jam();
+    // retreiving DATA from parent...is...an error
+    break;
+  case QueryPattern::P_PARENT:
+    jam();
+    // no point in nesting P_PARENT...an error
+    break;
+  case QueryPattern::P_PARAM:
+  case QueryPattern::P_PARAM_HEADER:
+    jam();
+    // should have been expanded during build
+    break;
+  }
+
+  DEBUG_CRASH();
+  return DbspjErr::InvalidPattern;
+}
+
 Uint32
 Dbspj::appendDataToSection(Uint32 & ptrI,
                            Local_pattern_store& pattern,
@@ -2945,25 +5341,76 @@ Dbspj::createEmptySection(Uint32 & dst)
   return DbspjErr::OutOfSectionMemory;
 }
 
-const Ptr<Dbspj::TreeNode> 
-Dbspj::getRoot(TreeNode_list::Head& head)
+/**
+ * This function takes a pattern and a row and expands it into a section
+ */
+Uint32
+Dbspj::expandS(Uint32 & _dst, Local_pattern_store& pattern,
+               const RowPtr & row)
 {
-  //assert(rootNode->m_magic==TreeNode::MAGIC);
-  Ptr<TreeNode> rootPtr;
-  const Local_TreeNode_list list(m_treenode_pool, head);
-  const bool found = list.first(rootPtr); 
-  ndbassert(found);
-  ndbassert(!rootPtr.isNull());
-  ndbassert(rootPtr.p->m_node_no==0);
-  return rootPtr;
+  Uint32 err;
+  Uint32 dst = _dst;
+  Local_pattern_store::ConstDataBufferIterator it;
+  pattern.first(it);
+  while (!it.isNull())
+  {
+    Uint32 info = *it.data;
+    Uint32 type = QueryPattern::getType(info);
+    Uint32 val = QueryPattern::getLength(info);
+    pattern.next(it);
+    switch(type){
+    case QueryPattern::P_COL:
+      jam();
+      err = appendColToSection(dst, row.m_row_data.m_section, val);
+      break;
+    case QueryPattern::P_UNQ_PK:
+      jam();
+      err = appendPkColToSection(dst, row.m_row_data.m_section, val);
+      break;
+    case QueryPattern::P_ATTRINFO:
+      jam();
+      err = appendAttrinfoToSection(dst, row.m_row_data.m_section, val);
+      break;
+    case QueryPattern::P_DATA:
+      jam();
+      err = appendDataToSection(dst, pattern, it, val);
+      break;
+    case QueryPattern::P_PARENT:
+      jam();
+      // P_PARENT is a prefix to another pattern token
+      // that permits code to access rows from earlier than imediate parent
+      // val is no of levels to move up the tree
+      err = appendFromParent(dst, pattern, it, val, row);
+      break;
+    // PARAM's converted to DATA by ::expand(pattern...)
+    case QueryPattern::P_PARAM:
+    case QueryPattern::P_PARAM_HEADER:
+    default:
+      jam();
+      err = DbspjErr::InvalidPattern;
+      DEBUG_CRASH();
+    }
+    if (unlikely(err != 0))
+    {
+      jam();
+      DEBUG_CRASH();
+      goto error;
+    }
+  }
+
+  _dst = dst;
+  return 0;
+error:
+  jam();
+  return err;
 }
 
 /**
  * This function takes a pattern and a row and expands it into a section
  */
 Uint32
-Dbspj::expand(Uint32 & _dst, Local_pattern_store& pattern,
-              const RowRef::Section & row)
+Dbspj::expandL(Uint32 & _dst, Local_pattern_store& pattern,
+               const RowPtr & row)
 {
   Uint32 err;
   Uint32 dst = _dst;
@@ -2978,20 +5425,27 @@ Dbspj::expand(Uint32 & _dst, Local_patte
     switch(type){
     case QueryPattern::P_COL:
       jam();
-      err = appendColToSection(dst, row, val);
+      err = appendColToSection(dst, row.m_row_data.m_linear, val);
       break;
     case QueryPattern::P_UNQ_PK:
       jam();
-      err = appendPkColToSection(dst, row, val);
+      err = appendPkColToSection(dst, row.m_row_data.m_linear, val);
       break;
     case QueryPattern::P_ATTRINFO:
       jam();
-      err = appendAttrinfoToSection(dst, row, val);
+      err = appendAttrinfoToSection(dst, row.m_row_data.m_linear, val);
       break;
     case QueryPattern::P_DATA:
       jam();
       err = appendDataToSection(dst, pattern, it, val);
       break;
+    case QueryPattern::P_PARENT:
+      jam();
+      // P_PARENT is a prefix to another pattern token
+      // that permits code to access rows from earlier than imediate parent
+      // val is no of levels to move up the tree
+      err = appendFromParent(dst, pattern, it, val, row);
+      break;
     // PARAM's converted to DATA by ::expand(pattern...)
     case QueryPattern::P_PARAM:
     case QueryPattern::P_PARAM_HEADER:
@@ -3024,10 +5478,10 @@ Dbspj::expand(Uint32 & ptrI, DABuffer& p
    */
   Uint32 err;
   Uint32 tmp[1+MAX_ATTRIBUTES_IN_TABLE];
-  struct RowRef::Linear row;
+  struct RowPtr::Linear row;
   row.m_data = param.ptr;
-  row.m_header = (RowRef::Header*)tmp;
-  buildRowHeader((RowRef::Header*)tmp, param.ptr, paramCnt);
+  row.m_header = (RowPtr::Header*)tmp;
+  buildRowHeader((RowPtr::Header*)tmp, param.ptr, paramCnt);
 
   Uint32 dst = ptrI;
   const Uint32 * ptr = pattern.ptr;
@@ -3110,10 +5564,10 @@ Dbspj::expand(Local_pattern_store& dst, 
 
   Uint32 err;
   Uint32 tmp[1+MAX_ATTRIBUTES_IN_TABLE];
-  struct RowRef::Linear row;
-  row.m_header = (RowRef::Header*)tmp;
+  struct RowPtr::Linear row;
+  row.m_header = (RowPtr::Header*)tmp;
   row.m_data = param.ptr;
-  buildRowHeader((RowRef::Header*)tmp, param.ptr, paramCnt);
+  buildRowHeader((RowPtr::Header*)tmp, param.ptr, paramCnt);
 
   const Uint32 * end = pattern.ptr + len;
   for (; pattern.ptr < end; )
@@ -3204,6 +5658,7 @@ Dbspj::parseDA(Build_context& ctx,
       }
 
       err = 0;
+      
       for (Uint32 i = 0; i<cnt; i++)
       {
         DEBUG("adding " << dst[i] << " as parent");
@@ -3217,6 +5672,14 @@ Dbspj::parseDA(Build_context& ctx,
           break;
         }
         parentPtr.p->m_bits &= ~(Uint32)TreeNode::T_LEAF;
+
+        if (i == 0)
+        {
+          /**
+           * Save parent (only 0 for now, i.e only trees)
+           */
+          treeNodePtr.p->m_parentPtrI = parentPtr.i;
+        }
       }
 
       if (unlikely(err != 0))
@@ -3504,17 +5967,18 @@ Dbspj::parseDA(Build_context& ctx,
         /**
          * Insert a flush of this partial result set
          */
-        Uint32 flush[3];
+        Uint32 flush[4];
         flush[0] = AttributeHeader::FLUSH_AI << 16;
         flush[1] = ctx.m_resultRef;
         flush[2] = ctx.m_resultData;
-        if (!appendToSection(attrInfoPtrI, flush, 3))
+        flush[3] = ctx.m_senderRef; // RouteRef
+        if (!appendToSection(attrInfoPtrI, flush, 4))
         {
           DEBUG_CRASH();
           break;
         }
 
-        sum_read += len + 3;
+        sum_read += len + 4;
       }
 
       if (treeBits & DABits::NI_LINKED_ATTR)
@@ -3583,11 +6047,36 @@ Dbspj::parseDA(Build_context& ctx,
       treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
     } // if (((treeBits & mask) | (paramBits & DABits::PI_ATTR_LIST)) != 0)
 
+#ifdef JONAS_TESTING_ROW_BUFFERING
+    // TODO: test-only!
+    treeNodePtr.p->m_bits |= TreeNode::T_ROW_BUFFER;
+    treeNodePtr.p->m_bits |= TreeNode::T_ROW_BUFFER_MAP;
+    treeNodePtr.p->m_bits |= TreeNode::T_MULTI_SCAN;
+#endif
+
+    if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
+    {
+      jam();
+      requestPtr.p->m_bits |= Request::RT_ROW_BUFFERS;
+
+      if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP)
+      {
+        jam();
+        treeNodePtr.p->m_row_map.init();
+      }
+      else
+      {
+        jam();
+        treeNodePtr.p->m_row_list.init();
+      }
+    }
+
     return 0;
   } while (0);
 
   return err;
 }
+
 /**
  * END - MODULE COMMON PARSE/UNPACK
  */

=== modified file 'storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2010-05-06 08:42:59 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp	2010-05-20 11:18:08 +0000
@@ -7659,7 +7659,21 @@ void Dbtc::timeOutFoundFragLab(Signal* s
 {
   ScanFragRecPtr ptr;
   c_scan_frag_pool.getPtr(ptr, TscanConPtr);
-  DEBUG(TscanConPtr << " timeOutFoundFragLab: scanFragState = "<< ptr.p->scanFragState);
+#ifdef VM_TRACE
+  {
+    ScanRecordPtr scanptr;
+    scanptr.i = ptr.p->scanRec;
+    ptrCheckGuard(scanptr, cscanrecFileSize, scanRecord);
+    ApiConnectRecordPtr TlocalApiConnectptr;
+    TlocalApiConnectptr.i = scanptr.p->scanApiRec;
+    ptrCheckGuard(TlocalApiConnectptr, capiConnectFilesize, apiConnectRecord);
+
+    DEBUG("[ H'" << hex << TlocalApiConnectptr.p->transid[0]
+	<< " H'" << TlocalApiConnectptr.p->transid[1] << "] "
+        << TscanConPtr << " timeOutFoundFragLab: scanFragState = "
+        << ptr.p->scanFragState);
+  }
+#endif
 
   const Uint32 time_out_param= ctimeOutValue;
   const Uint32 old_time_out_param= c_abortRec.oldTimeOutValue;

=== modified file 'storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp'
--- a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp	2010-05-06 08:42:59 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp	2010-05-20 11:18:08 +0000
@@ -2461,7 +2461,7 @@ private:
   void update_lcp(KeyReqStruct *req_struct, const Uint32* src, Uint32 len);
 
   void flush_read_buffer(KeyReqStruct *, const Uint32* outBuf,
-			 Uint32 resultRef, Uint32 resultData);
+			 Uint32 resultRef, Uint32 resultData, Uint32 routeRef);
 public:
   /**
    * Used by Restore...

=== modified file 'storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp'
--- a/storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp	2010-03-10 09:36:44 +0000
+++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupRoutines.cpp	2010-05-20 11:18:08 +0000
@@ -2424,8 +2424,9 @@ Dbtup::read_pseudo(const Uint32 * inBuff
     jam();
     Uint32 resultRef = inBuffer[inPos];
     Uint32 resultData = inBuffer[inPos + 1];
-    flush_read_buffer(req_struct, outBuf, resultRef, resultData);
-    return 2;
+    Uint32 routeRef = inBuffer[inPos + 2];
+    flush_read_buffer(req_struct, outBuf, resultRef, resultData, routeRef);
+    return 3;
   }
   case AttributeHeader::READ_ANY_VALUE:
   {
@@ -2614,24 +2615,49 @@ error:  
 void
 Dbtup::flush_read_buffer(KeyReqStruct *req_struct,
 			 const Uint32 * outBuf,
-			 Uint32 resultRef, Uint32 resultData)
+			 Uint32 resultRef,
+                         Uint32 resultData,
+                         Uint32 routeRef)
 {
   Uint32 sig1= req_struct->trans_id1;
   Uint32 sig2= req_struct->trans_id2;
   Uint32 len = (req_struct->out_buf_index >> 2) - 1;
   Signal * signal = req_struct->signal;
 
+  bool connectedToNode= getNodeInfo(refToNode(resultRef)).m_connected;
+
+  LinearSectionPtr ptr[3];
+  ptr[0].p= (Uint32*)outBuf; // Should really remove this
+  ptr[0].sz= len;
+
   TransIdAI * transIdAI=  (TransIdAI *)signal->getDataPtrSend();
   transIdAI->connectPtr= resultData;
   transIdAI->transId[0]= sig1;
   transIdAI->transId[1]= sig2;
 
-  LinearSectionPtr ptr[3];
-  ptr[0].p= (Uint32*)outBuf; // Should really remove this
-  ptr[0].sz= len;
-  sendSignal(resultRef, GSN_TRANSID_AI, signal, 3, JBB, ptr, 1);
+  if (likely(connectedToNode))
+  {
+    sendSignal(resultRef, GSN_TRANSID_AI, signal, 3, JBB, ptr, 1);
+  }
+  else
+  {
+    jam();
+    if (outBuf == signal->theData + 3)
+    {
+      jam();
+      /**
+       * TUP guesses that it can EXECUTE_DIRECT if own-node,
+       *  it then puts outBuf == signal->theData+3
+       */
+      memmove(signal->theData+25, signal->theData+3, 4*len);
+      ptr[0].p = signal->theData+25;
+    }
+    transIdAI->attrData[0] = resultRef;
+    sendSignal(routeRef, GSN_TRANSID_AI_R, signal, 4, JBB, ptr, 1);
+  }
 
   req_struct->out_buf_index = 0; // Reset buffer
+  req_struct->out_buf_bits = 0;
 }
 
 Uint32

=== modified file 'storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2010-05-06 08:42:59 +0000
+++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2010-05-20 11:18:08 +0000
@@ -1876,6 +1876,9 @@ void Ndbcntr::execNODE_FAILREP(Signal* s
   sendSignal(LGMAN_REF, GSN_NODE_FAILREP, signal,
              NodeFailRep::SignalLength, JBB);
 
+  sendSignal(DBSPJ_REF, GSN_NODE_FAILREP, signal,
+             NodeFailRep::SignalLength, JBB);
+
   if (c_stopRec.stopReq.senderRef)
   {
     jam();

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2010-03-16 15:56:18 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp	2010-05-20 11:18:08 +0000
@@ -95,6 +95,7 @@ public:
     WAITING_FOR_FAILCONF1 = 3,
     WAITING_FOR_FAILCONF2 = 4,
     WAITING_FOR_FAILCONF3 = 5,
+    WAITING_FOR_FAILCONF4 = 7,
     WAITING_FOR_NDB_FAILCONF = 6
   };
 

=== modified file 'storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp'
--- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2010-03-21 19:05:55 +0000
+++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp	2010-05-20 11:18:08 +0000
@@ -2783,6 +2783,11 @@ void Qmgr::sendApiFailReq(Signal* signal
     sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
                         RouteOrd::SignalLength,
                         JBA, &handle);
+
+    routeOrd->dstRef = DBSPJ_REF;
+    sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
+                        RouteOrd::SignalLength,
+                        JBA, &handle);
   }
 
   /* Suma always notified */
@@ -2827,6 +2832,11 @@ void Qmgr::execAPI_FAILCONF(Signal* sign
   else if (failedNodePtr.p->failState == WAITING_FOR_FAILCONF3)
   {
     jam();
+    failedNodePtr.p->failState = WAITING_FOR_FAILCONF4;
+  }
+  else if (failedNodePtr.p->failState == WAITING_FOR_FAILCONF4)
+  {
+    jam();
     failedNodePtr.p->failState = NORMAL;
   }
   else
@@ -3738,7 +3748,7 @@ void Qmgr::handleApiCloseComConf(Signal*
          */
         jam();
         sendApiFailReq(signal, nodeId, true); // sumaOnly
-        failedNodePtr.p->failState = WAITING_FOR_FAILCONF3;
+        failedNodePtr.p->failState = WAITING_FOR_FAILCONF4;
       }
       
       if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)

=== modified file 'storage/ndb/src/kernel/blocks/record_types.hpp'
--- a/storage/ndb/src/kernel/blocks/record_types.hpp	2010-01-29 10:50:20 +0000
+++ b/storage/ndb/src/kernel/blocks/record_types.hpp	2010-05-20 11:18:08 +0000
@@ -94,5 +94,6 @@
 #define RT_SPJ_TREENODE            MAKE_TID( 2, RG_QUERY_MEMORY)
 #define RT_SPJ_ARENA_BLOCK         MAKE_TID( 3, RG_QUERY_MEMORY)
 #define RT_SPJ_DATABUFFER          MAKE_TID( 4, RG_QUERY_MEMORY)
+#define RT_SPJ_SCANFRAG            MAKE_TID( 5, RG_QUERY_MEMORY)
 
 #endif

=== modified file 'storage/ndb/src/kernel/vm/DataBuffer2.hpp'
--- a/storage/ndb/src/kernel/vm/DataBuffer2.hpp	2009-05-06 12:15:41 +0000
+++ b/storage/ndb/src/kernel/vm/DataBuffer2.hpp	2010-05-20 11:18:08 +0000
@@ -42,13 +42,14 @@ public:
   /**
    * Head/anchor for data buffer
    */
-  struct Head {
-    Head() ;
-
+  struct HeadPOD
+  {
     Uint32 used;       // Words used
     Uint32 firstItem;  // First segment (or RNIL)
     Uint32 lastItem;   // Last segment (or RNIL)
 
+    void init() { used = 0; firstItem = lastItem = RNIL; }
+
     /**
      * Get size of databuffer, in words
      */
@@ -60,6 +61,18 @@ public:
     static Uint32 getSegmentSize() { return sz;}
   };
 
+  struct Head : public HeadPOD
+  {
+    Head();
+
+    Head& operator=(const HeadPOD& src) {
+      this->used = src.used;
+      this->firstItem = src.firstItem;
+      this->lastItem = src.lastItem;
+      return *this;
+    }
+  };
+
   /** Constructor */
   DataBuffer2(DataBufferPool &);
 
@@ -171,7 +184,7 @@ template<Uint32 sz, typename Pool>
 class LocalDataBuffer2 : public DataBuffer2<sz, Pool> {
 public:
   LocalDataBuffer2(typename DataBuffer2<sz, Pool>::DataBufferPool & thePool,
-                   typename DataBuffer2<sz, Pool>::Head & _src)
+                   typename DataBuffer2<sz, Pool>::HeadPOD & _src)
     : DataBuffer2<sz, Pool>(thePool), src(_src)
   {
     this->head = src;
@@ -181,15 +194,13 @@ public:
     src = this->head;
   }
 private:
-  typename DataBuffer2<sz, Pool>::Head & src;
+  typename DataBuffer2<sz, Pool>::HeadPOD & src;
 };
 
 template<Uint32 sz, typename Pool>
 inline
 DataBuffer2<sz, Pool>::Head::Head(){
-  used = 0;
-  firstItem = RNIL;
-  lastItem = RNIL;
+  this->init();
 }
 
 template<Uint32 sz, typename Pool>
@@ -287,7 +298,7 @@ void DataBuffer2<sz, Pool>::print(FILE* 
 
   Uint32 acc = 0;
   for(; ptr.i != RNIL; ){
-    thePool.getPtr(ptr);
+    ptr.p = (Segment*)thePool.getPtr(ptr.i);
     const Uint32 * rest = ptr.p->data;
     for(Uint32 i = 0; i<sz; i++){
       fprintf(out, " H'%.8x", rest[i]);

=== modified file 'storage/ndb/src/kernel/vm/SLFifoList.hpp'
--- a/storage/ndb/src/kernel/vm/SLFifoList.hpp	2009-05-26 18:53:34 +0000
+++ b/storage/ndb/src/kernel/vm/SLFifoList.hpp	2010-05-20 11:18:08 +0000
@@ -34,19 +34,31 @@ public:
   /**
    * List head
    */
-  struct Head 
+  struct HeadPOD
   {
-    Head();
     Uint32 firstItem;
     Uint32 lastItem;
 
 #ifdef VM_TRACE
     bool in_use;
 #endif
-
+    void init();
     inline bool isEmpty() const { return firstItem == RNIL;}
   };
   
+  struct Head : public HeadPOD
+  {
+    Head() { this->init();}
+
+    Head& operator=(const HeadPOD& src) {
+      this->firstItem = src.firstItem;
+      this->lastItem = src.lastItem;
+#ifdef VM_TRACE
+      this->in_use = src.in_use;
+#endif
+      return *this;
+    }
+  };
   SLFifoListImpl(P & thePool);
   
   bool seizeFirst(Ptr<T> &);
@@ -59,7 +71,8 @@ public:
   void addLast(Ptr<T> &);
   
   void removeFirst(Ptr<T> &);
-  
+  void remove() { head.init(); }
+
   /**
    *  Update i & p value according to <b>i</b>
    */
@@ -114,7 +127,7 @@ template <typename P, typename T, typena
 class LocalSLFifoListImpl : public SLFifoListImpl<P,T,U> 
 {
 public:
-  LocalSLFifoListImpl(P & thePool, typename SLFifoListImpl<P,T,U>::Head &_src)
+  LocalSLFifoListImpl(P & thePool, typename SLFifoListImpl<P,T,U>::HeadPOD&_src)
     : SLFifoListImpl<P,T,U>(thePool), src(_src)
   {
     this->head = src;
@@ -131,7 +144,7 @@ public:
     src = this->head;
   }
 private:
-  typename SLFifoListImpl<P,T,U>::Head & src;
+  typename SLFifoListImpl<P,T,U>::HeadPOD & src;
 };
 
 template <typename P, typename T, typename U>
@@ -143,12 +156,13 @@ SLFifoListImpl<P,T,U>::SLFifoListImpl(P 
 
 template <typename P, typename T, typename U>
 inline
-SLFifoListImpl<P,T,U>::Head::Head()
+void
+SLFifoListImpl<P,T,U>::HeadPOD::init()
 {
-  firstItem = RNIL;
-  lastItem = RNIL;
+  this->firstItem = RNIL;
+  this->lastItem = RNIL;
 #ifdef VM_TRACE
-  in_use = false;
+  this->in_use = false;
 #endif
 }
 

=== modified file 'storage/ndb/src/kernel/vm/SectionReader.cpp'
--- a/storage/ndb/src/kernel/vm/SectionReader.cpp	2009-05-27 15:21:45 +0000
+++ b/storage/ndb/src/kernel/vm/SectionReader.cpp	2010-05-20 11:18:08 +0000
@@ -117,6 +117,17 @@ SectionReader::peekWord(Uint32 * dst) co
 }
 
 bool
+SectionReader::updateWord(Uint32 value) const 
+{
+  if(m_pos < m_len){
+    Uint32 ind = m_pos % SectionSegment::DataLength;
+    m_currentSegment->theData[ind] = value;
+    return true;
+  }
+  return false;
+}
+
+bool
 SectionReader::peekWords(Uint32 * dst, Uint32 len) const {
   if(m_pos + len > m_len)
     return false;

=== modified file 'storage/ndb/src/kernel/vm/SectionReader.hpp'
--- a/storage/ndb/src/kernel/vm/SectionReader.hpp	2009-05-27 15:21:45 +0000
+++ b/storage/ndb/src/kernel/vm/SectionReader.hpp	2010-05-20 11:18:08 +0000
@@ -70,6 +70,11 @@ public:
   PosInfo getPos();
   bool setPos(PosInfo posinfo);
 
+  /**
+   * Update word at current position to <em>value</em>
+   */
+  bool updateWord(Uint32 value) const ;
+
 private:
   Uint32 m_pos;
   Uint32 m_len;

=== modified file 'storage/ndb/src/kernel/vm/SimulatedBlock.hpp'
--- a/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2010-01-29 10:50:20 +0000
+++ b/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2010-05-20 11:18:08 +0000
@@ -1233,11 +1233,11 @@ SimulatedBlock::EXECUTE_DIRECT(Uint32 bl
     b = b->getInstance(instanceNo);
   ndbassert(b != 0);
   ndbassert(givenInstanceNo != ZNIL || b->getThreadId() == getThreadId());
+  signal->header.theSendersBlockRef = reference();
 #ifdef VM_TRACE
   if(globalData.testOn){
     signal->header.theVerId_signalNumber = gsn;
     signal->header.theReceiversBlockNumber = numberToBlock(block, instanceNo);
-    signal->header.theSendersBlockRef = reference();
     globalSignalLoggers.executeDirect(signal->header,
 				      0,        // in
 				      &signal->theData[0],

=== modified file 'storage/ndb/src/ndbapi/NdbQueryOperation.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryOperation.cpp	2010-05-19 13:12:55 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryOperation.cpp	2010-05-20 11:18:08 +0000
@@ -17,14 +17,17 @@
 */
 
 
-#include "NdbQueryOperationImpl.hpp"
 #include <ndb_global.h>
-#include "NdbQueryBuilder.hpp"
+#include <NdbQueryBuilder.hpp>
 #include "NdbQueryBuilderImpl.hpp"
-#include "signaldata/TcKeyReq.hpp"
-#include "signaldata/TcKeyRef.hpp"
-#include "signaldata/ScanTab.hpp"
-#include "signaldata/QueryTree.hpp"
+
+#include "NdbQueryOperationImpl.hpp"
+
+#include <signaldata/TcKeyReq.hpp>
+#include <signaldata/TcKeyRef.hpp>
+#include <signaldata/ScanTab.hpp>
+#include <signaldata/QueryTree.hpp>
+#include <signaldata/DbspjErr.hpp>
 
 #include "AttributeHeader.hpp"
 #include "NdbRecord.hpp"
@@ -153,6 +156,11 @@ public:
     m_outstandingResults += delta;
   }
 
+  void clearOutstandingResults()
+  {
+    m_outstandingResults = 0;
+  }
+
   void setConfReceived()
   { 
     assert(!m_confReceived);
@@ -3806,16 +3814,24 @@ NdbQueryOperationImpl::execTCKEYREF(NdbA
     }
   }
 
-  // Compensate for children results not produced.
-  // (doSend() assumed all child results to be materialized)
-  Uint32 cnt = 0;
-  cnt += 1; // self
-  cnt += getNoOfDescendantOperations();
-  if (getNoOfChildOperations() > 0)
+  if (ref->errorCode != DbspjErr::NodeFailure)
+  {
+    // Compensate for children results not produced.
+    // (doSend() assumed all child results to be materialized)
+    Uint32 cnt = 0;
+    cnt += 1; // self
+    cnt += getNoOfDescendantOperations();
+    if (getNoOfChildOperations() > 0)
+    {
+      cnt += getNoOfLeafOperations();
+    }
+    getQuery().m_rootFrags[0].incrOutstandingResults(- Int32(cnt));
+  }
+  else
   {
-    cnt += getNoOfLeafOperations();
+    // consider frag-batch complete
+    getQuery().m_rootFrags[0].clearOutstandingResults();
   }
-  getQuery().m_rootFrags[0].incrOutstandingResults(- Int32(cnt));
 
   bool ret = false;
   if (getQuery().m_rootFrags[0].isFragBatchComplete()) { 

=== modified file 'storage/ndb/src/ndbapi/NdbTransaction.cpp'
--- a/storage/ndb/src/ndbapi/NdbTransaction.cpp	2010-03-09 11:05:11 +0000
+++ b/storage/ndb/src/ndbapi/NdbTransaction.cpp	2010-05-20 11:18:08 +0000
@@ -2956,6 +2956,21 @@ NdbTransaction::report_node_failure(Uint
     }
     tmp = tmp->next();
   }
+
+  /**
+   * TODO, only abort ones really needing abort
+   */
+  NdbQueryImpl* qtmp = m_firstActiveQuery;
+  while (qtmp != 0)
+  {
+    if (qtmp->getQueryDef().isScanQuery() == false)
+    {
+      count++;
+      qtmp->setErrorCode(4119);
+    }
+    qtmp = qtmp->getNext();
+  }
+
   tNoComp += count;
   theNoOfOpCompleted = tNoComp;
   if(count)

=== modified file 'storage/ndb/src/ndbapi/ndberror.c'
--- a/storage/ndb/src/ndbapi/ndberror.c	2010-05-06 08:42:59 +0000
+++ b/storage/ndb/src/ndbapi/ndberror.c	2010-05-20 11:18:08 +0000
@@ -139,6 +139,7 @@ ErrorBundle ErrorCodes[] = {
     "Transaction was committed but all read information was not "
     "received due to node crash" },
   { 4119, DMEC, NR, "Simple/dirty read failed due to node failure" },
+  { 20016, DMEC, NR, "Query aborted due to node failure" },
   
   /**
    * Node shutdown

=== modified file 'storage/ndb/test/include/HugoQueries.hpp'
--- a/storage/ndb/test/include/HugoQueries.hpp	2010-05-11 20:12:03 +0000
+++ b/storage/ndb/test/include/HugoQueries.hpp	2010-05-20 11:18:08 +0000
@@ -31,9 +31,12 @@ public:
   HugoQueries(const NdbQueryDef & query);
   virtual ~HugoQueries();
 
+  // Rows for for each of the operations
+  Vector<Uint32> m_rows_found;
+
   int runLookupQuery(Ndb*, int records, int batchsize = 1);
   int runScanQuery(Ndb*,
-                   int abort = 0,
+                   int abort = 4,
                    int parallelism = 0,
                    int scan_flags = 0);
 

=== modified file 'storage/ndb/test/ndbapi/testSpj.cpp'
--- a/storage/ndb/test/ndbapi/testSpj.cpp	2010-05-11 20:12:03 +0000
+++ b/storage/ndb/test/ndbapi/testSpj.cpp	2010-05-20 11:18:08 +0000
@@ -88,7 +88,6 @@ runLookupJoin(NDBT_Context* ctx, NDBT_St
     i++;
   }
   g_info << endl;
-  ctx->stopTest();
   return NDBT_OK;
 }
 
@@ -116,7 +115,6 @@ runScanJoin(NDBT_Context* ctx, NDBT_Step
     i++;
   }
   g_info << endl;
-  ctx->stopTest();
   return NDBT_OK;
 }
 
@@ -154,7 +152,6 @@ runJoin(NDBT_Context* ctx, NDBT_Step* st
     addMask(ctx, (1 << stepNo), "Running");
   }
   g_info << endl;
-  ctx->stopTest();
   return NDBT_OK;
 }
 
@@ -183,6 +180,17 @@ runRestarter(NDBT_Context* ctx, NDBT_Ste
   if (loops < restarter.getNumDbNodes())
     loops = restarter.getNumDbNodes();
 
+  NdbSleep_MilliSleep(200);
+  Uint32 running = ctx->getProperty("Running", (Uint32)0);
+  while (running == 0 && !ctx->isTestStopped())
+  {
+    NdbSleep_MilliSleep(100);
+    running = ctx->getProperty("Running", (Uint32)0);
+  }
+
+  if (ctx->isTestStopped())
+    return NDBT_FAILED;
+
   while(i<loops && result != NDBT_FAILED && !ctx->isTestStopped()){
 
     int id = lastId % restarter.getNumDbNodes();
@@ -193,13 +201,11 @@ runRestarter(NDBT_Context* ctx, NDBT_Ste
     int nodeId = restarter.getDbNodeId(id);
     ndbout << "Restart node " << nodeId << endl;
 
-    Uint32 running = ctx->getProperty("Running", (Uint32)0);
     if(restarter.restartOneDbNode(nodeId, false, true, true) != 0){
       g_err << "Failed to restartNextDbNode" << endl;
       result = NDBT_FAILED;
       break;
     }
-    ctx->setProperty("Running", (Uint32)0);
 
     if (restarter.waitNodesNoStart(&nodeId, 1))
     {
@@ -212,19 +218,26 @@ runRestarter(NDBT_Context* ctx, NDBT_Ste
     {
       Uint32 maxwait = 30;
       ndbout_c("running: 0x%.8x", running);
-      for (Uint32 checks = 0; checks < 3; checks++)
+      for (Uint32 checks = 0; checks < 3 && !ctx->isTestStopped(); checks++)
       {
-        for (; maxwait != 0; maxwait--)
+        ctx->setProperty("Running", (Uint32)0);
+        for (; maxwait != 0 && !ctx->isTestStopped(); maxwait--)
         {
-          if (ctx->getProperty("Running", (Uint32)0) == running)
+          if ((ctx->getProperty("Running", (Uint32)0) & running) == running)
             goto ok;
           NdbSleep_SecSleep(1);
         }
+
+        if (ctx->isTestStopped())
+        {
+          g_err << "Test stopped while waiting for progress!" << endl;
+          return NDBT_FAILED;
+        }
+
         g_err << "No progress made!!" << endl;
         return NDBT_FAILED;
     ok:
         g_err << "Progress made!! " << endl;
-        (void)1;
       }
     }
 
@@ -241,6 +254,34 @@ runRestarter(NDBT_Context* ctx, NDBT_Ste
       break;
     }
 
+    if (waitprogress)
+    {
+      Uint32 maxwait = 30;
+      ndbout_c("running: 0x%.8x", running);
+      for (Uint32 checks = 0; checks < 3 && !ctx->isTestStopped(); checks++)
+      {
+        ctx->setProperty("Running", (Uint32)0);
+        for (; maxwait != 0 && !ctx->isTestStopped(); maxwait--)
+        {
+          if ((ctx->getProperty("Running", (Uint32)0) & running) == running)
+            goto ok2;
+          NdbSleep_SecSleep(1);
+        }
+
+        if (ctx->isTestStopped())
+        {
+          g_err << "Test stopped while waiting for progress!" << endl;
+          return NDBT_FAILED;
+        }
+
+        g_err << "No progress made!!" << endl;
+        return NDBT_FAILED;
+    ok2:
+        g_err << "Progress made!! " << endl;
+        ctx->setProperty("Running", (Uint32)0);
+      }
+    }
+
     lastId++;
     i++;
   }
@@ -270,6 +311,8 @@ TESTCASE("NF_Join", ""){
   TC_PROPERTY("UntilStopped", 1);
   TC_PROPERTY("WaitProgress", 20);
   INITIALIZER(runLoadTable);
+  //STEPS(runScanJoin, 6);
+  //STEPS(runLookupJoin, 6);
   STEPS(runJoin, 6);
   STEP(runRestarter);
   FINALIZER(runClearTable);

=== modified file 'storage/ndb/test/run-test/files.cpp'
--- a/storage/ndb/test/run-test/files.cpp	2010-03-11 19:47:09 +0000
+++ b/storage/ndb/test/run-test/files.cpp	2010-05-20 11:18:08 +0000
@@ -319,7 +319,7 @@ setup_files(atrt_config& config, int set
         fprintf(fenv, "#!/bin/sh\n");
         fprintf(fenv, "cd %s\n", proc.m_proc.m_cwd.c_str());
         fprintf(fenv, "[ -f /etc/profile ] && . /etc/profile\n");
-        fprintf(fenv, ". env.sh\n");
+        fprintf(fenv, ". ./env.sh\n");
         fprintf(fenv, "ulimit -Sc unlimited\n");
         fprintf(fenv, "bash -i");
         fflush(fenv);

=== modified file 'storage/ndb/test/src/HugoQueries.cpp'
--- a/storage/ndb/test/src/HugoQueries.cpp	2010-05-11 20:12:03 +0000
+++ b/storage/ndb/test/src/HugoQueries.cpp	2010-05-20 11:18:08 +0000
@@ -1,19 +1,19 @@
 /*
-   Copyright (C) 2003 MySQL AB
-    All rights reserved. Use is subject to license terms.
+  Copyright (C) 2003 MySQL AB
+  All rights reserved. Use is subject to license terms.
 
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
 */
 
 #include "HugoQueries.hpp"
@@ -116,6 +116,10 @@ HugoQueries::runLookupQuery(Ndb* pNdb,
   int r = 0;
   int retryAttempt = 0;
 
+  m_rows_found.clear();
+  Uint32 zero = 0;
+  m_rows_found.fill(m_query_def->getNoOfOperations() - 1, zero);
+
   if (batch == 0) {
     g_info << "ERROR: Argument batch == 0 in runLookupQuery. Not allowed."
            << endl;
@@ -132,33 +136,36 @@ HugoQueries::runLookupQuery(Ndb* pNdb,
     if (retryAttempt >= m_retryMax)
     {
       g_info << "ERROR: has retried this operation " << retryAttempt
-	     << " times, failing!" << endl;
+             << " times, failing!" << endl;
       return NDBT_FAILED;
     }
 
+    Vector<Uint32> batch_rows_found;
+    batch_rows_found.fill(m_query_def->getNoOfOperations() - 1, zero);
+    Vector<NdbQuery*> queries;
+
     NdbTransaction * pTrans = pNdb->startTransaction();
     if (pTrans == NULL)
     {
       const NdbError err = pNdb->getNdbError();
 
       if (err.status == NdbError::TemporaryError){
-	ERR(err);
-	NdbSleep_MilliSleep(50);
-	retryAttempt++;
-	continue;
+        ERR(err);
+        NdbSleep_MilliSleep(50);
+        retryAttempt++;
+        continue;
       }
       ERR(err);
       return NDBT_FAILED;
     }
 
-    NdbQuery * query = 0;
     for (int b = 0; b<batch; b++)
     {
       char buf[NDB_MAX_TUPLE_SIZE];
       NdbQueryParamValue params[NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY];
       equalForParameters(buf, * m_ops[0].m_calc, params, b + r);
 
-      query = pTrans->createQuery(m_query_def, params);
+      NdbQuery * query = pTrans->createQuery(m_query_def, params);
       if (query == 0)
       {
         const NdbError err = pTrans->getNdbError();
@@ -171,41 +178,49 @@ HugoQueries::runLookupQuery(Ndb* pNdb,
         NdbQueryOperation * pOp = query->getQueryOperation((Uint32)o);
         HugoQueries::getValueForQueryOp(pOp, m_ops[o].m_rows[b]);
       }
+      queries.push_back(query);
     }
 
     int check = pTrans->execute(NoCommit, AbortOnError);
     if (check == -1)
     {
       const NdbError err = pTrans->getNdbError();
+      ERR(err);
       if (err.status == NdbError::TemporaryError){
-	ERR(err);
-	pTrans->close();
-	NdbSleep_MilliSleep(50);
-	retryAttempt++;
-	continue;
+        pTrans->close();
+        NdbSleep_MilliSleep(50);
+        retryAttempt++;
+        continue;
       }
-      ERR(err);
       pTrans->close();
       return NDBT_FAILED;
     }
 
     for (int b = 0; b<batch; b++)
     {
-      for (size_t o = 0; o<m_ops.size(); o++)
+      NdbQuery * query = queries[b];
+      if (query->nextResult() == NdbQuery::NextResult_gotRow)
       {
-        NdbQueryOperation * pOp = query->getQueryOperation((Uint32)o);
-        if (!pOp->isRowNULL())
+        for (size_t o = 0; o<m_ops.size(); o++)
         {
-          if (m_ops[o].m_calc->verifyRowValues(m_ops[o].m_rows[b]) != 0)
+          NdbQueryOperation * pOp = query->getQueryOperation((Uint32)o);
+          if (!pOp->isRowNULL())
           {
-            pTrans->close();
-            return NDBT_FAILED;
+            batch_rows_found[o]++;
+            if (m_ops[o].m_calc->verifyRowValues(m_ops[o].m_rows[b]) != 0)
+            {
+              pTrans->close();
+              return NDBT_FAILED;
+            }
           }
         }
       }
     }
     pTrans->close();
     r += batch;
+
+    for (size_t i = 0; i<batch_rows_found.size(); i++)
+      m_rows_found[i] += batch_rows_found[i];
   }
 
   return NDBT_OK;
@@ -223,18 +238,20 @@ HugoQueries::runScanQuery(Ndb * pNdb,
 
   while (retryAttempt < m_retryMax)
   {
+    m_rows_found.clear();
+    Uint32 zero = 0;
+    m_rows_found.fill(m_query_def->getNoOfOperations() - 1, zero);
+
     NdbTransaction * pTrans = pNdb->startTransaction();
     if (pTrans == NULL)
     {
       const NdbError err = pNdb->getNdbError();
-
+      ERR(err);
       if (err.status == NdbError::TemporaryError){
-	ERR(err);
-	NdbSleep_MilliSleep(50);
-	retryAttempt++;
-	continue;
+        NdbSleep_MilliSleep(50);
+        retryAttempt++;
+        continue;
       }
-      ERR(err);
       return NDBT_FAILED;
     }
 
@@ -260,26 +277,45 @@ HugoQueries::runScanQuery(Ndb * pNdb,
     if (check == -1)
     {
       const NdbError err = pTrans->getNdbError();
+      ERR(err);
       if (err.status == NdbError::TemporaryError){
-	ERR(err);
-	pTrans->close();
-	NdbSleep_MilliSleep(50);
-	retryAttempt++;
-	continue;
+        pTrans->close();
+        NdbSleep_MilliSleep(50);
+        retryAttempt++;
+        continue;
       }
-      ERR(err);
       pTrans->close();
       return NDBT_FAILED;
     }
 
+    int r = rand() % 100;
+    if (r < abort && ((r & 1) == 0))
+    {
+      ndbout_c("Query aborted!");
+      query->close();
+      pTrans->close();
+      m_rows_found.clear();
+      return NDBT_OK;
+    }
+
     NdbQuery::NextResultOutcome res;
     while ((res = query->nextResult()) == NdbQuery::NextResult_gotRow)
     {
+      if (r < abort && ((r & 1) == 1))
+      {
+        ndbout_c("Query aborted 2!");
+        query->close();
+        pTrans->close();
+        m_rows_found.clear();
+      return NDBT_OK;
+      }
+
       for (size_t o = 0; o<m_ops.size(); o++)
       {
         NdbQueryOperation * pOp = query->getQueryOperation((Uint32)o);
         if (!pOp->isRowNULL())
         {
+          m_rows_found[o]++;
           if (m_ops[o].m_calc->verifyRowValues(m_ops[o].m_rows[0]) != 0)
           {
             pTrans->close();
@@ -289,16 +325,17 @@ HugoQueries::runScanQuery(Ndb * pNdb,
       }
     }
 
+    const NdbError err = query->getNdbError();
+    query->close();
     pTrans->close();
     if (res == NdbQuery::NextResult_error)
     {
-      const NdbError err = query->getNdbError();
+      ERR(err);
       if (err.status == NdbError::TemporaryError)
       {
-	ERR(err);
-	NdbSleep_MilliSleep(50);
-	retryAttempt++;
-	continue;
+        NdbSleep_MilliSleep(50);
+        retryAttempt++;
+        continue;
       }
       return NDBT_FAILED;
     }
@@ -314,3 +351,4 @@ HugoQueries::runScanQuery(Ndb * pNdb,
 }
 
 template class Vector<HugoQueries::Op>;
+template class Vector<NdbQuery*>;

=== modified file 'storage/ndb/test/tools/hugoJoin.cpp'
--- a/storage/ndb/test/tools/hugoJoin.cpp	2010-05-12 06:27:44 +0000
+++ b/storage/ndb/test/tools/hugoJoin.cpp	2010-05-20 11:18:08 +0000
@@ -56,7 +56,7 @@ static struct my_option my_long_options[
     0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
   { "loops", 'l', "Loops",
     (uchar**) &_loops, 0,
-    0, GET_INT, REQUIRED_ARG, _loops, -1, 0, 0, 0, 0},
+    0, GET_INT, REQUIRED_ARG, _loops, 0, 0, 0, 0, 0},
   { "verbose", 'v', "verbosity",
     (uchar**) &_verbose, 0,
     0, GET_INT, REQUIRED_ARG, _verbose, 0, 0, 0, 0, 0},
@@ -182,8 +182,9 @@ int main(int argc, char** argv){
     _seed = (unsigned)NdbTick_CurrentMillisecond();
   }
   ndbout << "--seed=" << _seed << endl;
+  srand(_seed);
 
-  for (int i = 0; (_loops == -1) || (i < _loops);)
+  for (int i = 0; (_loops == 0) || (i < _loops);)
   {
     if (_verbose >= 1)
     {
@@ -192,7 +193,7 @@ int main(int argc, char** argv){
     HugoQueryBuilder builder(&MyNdb, tables.getBase(), mask);
     builder.setJoinLevel(_depth);
     const NdbQueryDef * q = builder.createQuery(&MyNdb);
-    for (int j = 0; j < _loops_per_query && ((_loops == -1) || (i < _loops));
+    for (int j = 0; j < _loops_per_query && ((_loops == 0) || (i < _loops));
          i++, j++)
     {
       int res = 0;
@@ -209,6 +210,15 @@ int main(int argc, char** argv){
       {
         return NDBT_ProgramExit(NDBT_FAILED);
       }
+      if (hq.m_rows_found.size() != 0)
+      {
+        printf("\tfound: [ ");
+        for (size_t i = 0; i<hq.m_rows_found.size(); i++)
+        {
+          printf("%u ", (Uint32)hq.m_rows_found[i]);
+        }
+        ndbout_c("]");
+      }
     }
   }
 

=== modified file 'storage/ndb/test/tools/test_spj.cpp'
--- a/storage/ndb/test/tools/test_spj.cpp	2010-05-06 08:42:59 +0000
+++ b/storage/ndb/test/tools/test_spj.cpp	2010-05-20 11:18:08 +0000
@@ -135,11 +135,13 @@ int main(int argc, char** argv)
   NdbTransaction * pTrans = MyNdb.startTransaction();
   NdbScanOperation * pOp = pTrans->scanTable(pTab->getDefaultRecord(), 
                                              NdbOperation::LM_CommittedRead);
+
+  bool scanindexchild = false;
 #if 0
   /**
-   * select STRAIGHT_JOIN *
-   * from t1 join t1 as t2 
-   * where t2.a = t1.b and t1.b <= 100 and t2.b <= 3;
+     select STRAIGHT_JOIN *
+     from t1 join t1 as t2 
+     where t2.a = t1.b and t1.b <= 100 and t2.b <= 3;
    *
    * - ScanFrag
    * PI_ATTR_INTERPRET w/ values inlined
@@ -164,7 +166,7 @@ int main(int argc, char** argv)
     0x00000001, // table version
     0x00000001, // parent list
     0x00000001, // key pattern: #parameters/#len
-    0x00020000, // P_COL col = 0
+    QueryPattern::col(0), // P_COL col = 0
 
     // ScanFragParameters
     0x000c0002, // type/len
@@ -186,8 +188,8 @@ int main(int argc, char** argv)
     0x1000001c, // result data
 
     0x00020004, // #len subroutine / #len interpreted program
-    0x0003301a, // p0: BRANCH_ATTR_OP_COL | LE | OFFSET-JUMP
-    0x00000000, // p0: param ref 0
+    0x0003301a, // p0: BRANCH_ATTR_OP_COL2 | LE | OFFSET-JUMP
+    0x00010000, // p0: attrid: 1, param ref 0
     0x00000012, // p1: EXIT_OK
     0x03830013, // p2: EXIT_NOK
     0x00000004, // param 0 header
@@ -229,14 +231,14 @@ int main(int argc, char** argv)
     0x00000001, // table version
     0x00000001, // parent list
     0x00000001, // key pattern: #parameters/#len
-    0x00020000, // P_COL col = 0
+    QueryPattern::col(0), // P_COL col = 0
     0x00010004, // attrinfo pattern: #len-pattern / #len interpreted program
     0x0003301a, // p0: BRANCH_ATTR_OP_COL_2 | LE | OFFSET-JUMP
     0x00010000, // p0: attrid: 1 / program param 0
     0x00000012, // p1: EXIT_OK
     0x03830013, // p2: EXIT_NOK
     0x00000001, // attr-param pattern: #parameters
-    0x00060000, // attr-param pattern: P_PARAM_WITH_HEADER col=0
+    QueryPattern::paramHeader(0), // P_PARAM_WITH_HEADER col=0
 
     // ScanFragParameters
     0x000c0002, // type/len
@@ -262,7 +264,7 @@ int main(int argc, char** argv)
     0xfff00002, // read all
     0xffe90000  // read any value
   };
-#else
+#elif 0
   /**
    *
    * select STRAIGHT_JOIN *
@@ -294,14 +296,14 @@ int main(int argc, char** argv)
     0x00000001, // table version
     0x00000001, // parent list
     0x00000001, // key pattern: #parameters/#len
-    0x00020000, // P_COL col = 0
+    QueryPattern::col(0), // P_COL col = 0
     0x00010004, // attrinfo pattern: #len-pattern / #len interpreted program
     0x0003301a, // p0: BRANCH_ATTR_OP_COL_2 | LE | OFFSET-JUMP
     0x00010000, // p0: attrid: 1 / program param 0
     0x00000012, // p1: EXIT_OK
     0x03830013, // p2: EXIT_NOK
     0x00000000, // attr-param pattern: #parameters
-    0x00070000, // attr-param pattern: P_ATTRINFO col=0
+    QueryPattern::attrInfo(0), // attr-param pattern: P_ATTRINFO col=0
 
     // ScanFragParameters
     0x000c0002, // type/len
@@ -325,7 +327,59 @@ int main(int argc, char** argv)
     0xfff00002, // read all
     0xffe90000  // read any value
   };
+#else
+  /**
+     select STRAIGHT_JOIN *
+     from t1 join t1 as t2 on t2.a >= t1.b;
+  */
+
+  scanindexchild = true;
+  Uint32 request[] = {
+    // pos: 0
+    0x000d0002, 
+
+    // pos: 1 ScanFragNode
+    0x00050002, // len-type
+    DABits::NI_LINKED_ATTR, // bits
+    0x0000000c, // table id
+    0x00000001, // table version
+    0x00010001, // #cnt, linked attr
+
+    // pos: 6 ScanIndexNode
+    0x00090003, // type len
+    DABits::NI_HAS_PARENT | DABits::NI_KEY_LINKED, // bits
+    0x0000000b, // table id
+    0x00000001, // table version
+    0x00000001, // parent list
+    0x00000003, // key pattern (cnt/len)
+    QueryPattern::data(1), // P_DATA len = 1
+    0x00000002, // BoundLE
+    QueryPattern::attrInfo(0), // P_ATTRINFO col = 0
+
+    // pos: 15 ScanFragParameters
+    0x00080002, // type len
+    0x00000009, // bits
+    0x10000020, // result data
+    0x00000001, // param/len interpret program
+    0x00000012, // p1 = exit ok
+    0x00000002, // len user projection
+    0xfff00002, // up 1 - read all
+    0xffe90000, // up 2 - read any value
+
+    // pos: 23 ScanIndexParameters
+    0x000a0003, // type/len
+    0x00020009, // bits
+    0xffff0100, // batch size
+    0x10000024, // result data
+    0x00000001, // param/len interpret program
+    0x00000012, // p1 = exit ok
+    0x00000003, // len user projection
+    0xfff00002, // up 1 - read all
+    0xffe90000, // up 2 - read any value
+    0xfffb0000
+  };
 #endif
+
   Uint32 n0 = (request[1] >> 16);
   Uint32 n1 = (request[1 + n0] >> 16);
   request[0] = ((1 + n0 + n1) << 16) | 2;
@@ -333,8 +387,16 @@ int main(int argc, char** argv)
   request[1+2] = pTab->getObjectId();
   request[1+3] = pTab->getObjectVersion();
 
-  request[1 + n0 + 2] = pTab->getObjectId();
-  request[1 + n0 + 3] = pTab->getObjectVersion();
+  if (scanindexchild == false)
+  {
+    request[1 + n0 + 2] = pTab->getObjectId();
+    request[1 + n0 + 3] = pTab->getObjectVersion();
+  }
+  else
+  {
+    request[1 + n0 + 2] = pIdx->getObjectId();
+    request[1 + n0 + 3] = pIdx->getObjectVersion();
+  }
 
   NdbScanFilterImpl::setIsLinkedFlag(pOp);
   NdbScanFilterImpl::set(pOp, request, NDB_ARRAY_SIZE(request));
@@ -344,3 +406,4 @@ int main(int argc, char** argv)
 
   return NDBT_ProgramExit(NDBT_OK);
 }
+


Attachment: [text/bzr-bundle] bzr/jonas@mysql.com-20100520111808-xyu1qnbpw7iv4qwb.bundle
Thread
bzr commit into mysql-5.1-telco-7.0-spj branch (jonas:3165)Jonas Oreland20 May