List:Commits« Previous MessageNext Message »
From:Jan Wedvik Date:September 29 2011 11:43am
Subject:bzr push into mysql-5.1-telco-7.0 branch (jan.wedvik:4559 to 4562)
View as plain text  
 4562 Jan Wedvik	2011-09-29
      This patch will make the SPJ block fetch all rows for some non-root index scans 
      in one batch rather than two.
      This will happen if the first batch reads from a subset of the fragments and 
      receive few rows. The SPJ block will then
      try to read from the remaining fragments before finishing the batch.
      This is especially useful when doing bushy scans. If there are more branches in 
      the bushy scan, then these will have to be
      repeated for each batch of the current scan.

    modified:
      storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp
      storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
 4561 Jan Wedvik	2011-09-29
      This patch fixes a bug that causes assert error in NdbQueryOperationDefImpl::printTree() for query trees that are more
      than 31 operations deep.

    modified:
      storage/ndb/src/ndbapi/NdbQueryBuilder.cpp
      storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp
 4560 Jan Wedvik	2011-09-29
      This patch fixes an assert error that happens in printSCANTABCONF() when enabling api signal logging and running
      the SQL script below:
      
      CREATE TABLE t1 (
        pk1 int NOT NULL,
        pk2 int NOT NULL,
        PRIMARY KEY (`pk1`,`pk2`)
      ) ENGINE=ndb partition by key(pk1) partitions 8;
      
      select count(*) from t1 where t1.pk1=0;
      
      The resulting ScanTabConf signal carries OpData for eight fragments in a separate segment, which printSCANTABCONF()
      does not handle.

    modified:
      storage/ndb/src/common/debugger/signaldata/ScanTab.cpp
 4559 Jonas Oreland	2011-09-29
      ndb - fix incorrect NdbRestarter usage

    modified:
      storage/ndb/test/ndbapi/testNdbApi.cpp
      storage/ndb/test/ndbapi/testRestartGci.cpp
=== modified file 'storage/ndb/src/common/debugger/signaldata/ScanTab.cpp'
--- a/storage/ndb/src/common/debugger/signaldata/ScanTab.cpp	2011-06-30 15:59:25 +0000
+++ b/storage/ndb/src/common/debugger/signaldata/ScanTab.cpp	2011-09-29 11:31:28 +0000
@@ -78,9 +78,9 @@ printSCANTABCONF(FILE * output, const Ui
   size_t op_count= requestInfo & (~ScanTabConf::EndOfData);
   if (op_count)
   {
-    fprintf(output, " Operation(s) [api tc rows len]:\n");
     if (len == ScanTabConf::SignalLength + 4 * op_count)
     {
+      fprintf(output, " Operation(s) [api tc rows len]:\n");
       ScanTabConf::OpData * op = (ScanTabConf::OpData*)
         (theData + ScanTabConf::SignalLength);
       for(size_t i = 0; i<op_count; i++)
@@ -91,9 +91,9 @@ printSCANTABCONF(FILE * output, const Ui
         op++;
       }
     }
-    else
+    else if (len == ScanTabConf::SignalLength + 3 * op_count)
     {
-      assert(len == ScanTabConf::SignalLength + 3 * op_count);
+      fprintf(output, " Operation(s) [api tc rows len]:\n");      
       for(size_t i = 0; i<op_count; i++)
       {
         ScanTabConf::OpData * op = (ScanTabConf::OpData*)
@@ -104,6 +104,12 @@ printSCANTABCONF(FILE * output, const Ui
                 ScanTabConf::getLength(op->rows));
       }
     }
+    else
+    {
+      // ScanTabConf::OpData stored in section 0 of signal.
+      assert(len == ScanTabConf::SignalLength);
+      fprintf(output, " Long signal. Cannot print operations.");
+    }
     fprintf(output, "\n");
   }
   return false;

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp	2011-08-22 08:35:35 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp	2011-09-29 11:43:27 +0000
@@ -580,6 +580,8 @@ public:
     Uint32 m_fragCount;
     // The number of fragments that we scan in parallel.
     Uint32 m_parallelism;
+    // True if we are still receiving the first batch for this operation.
+    bool   m_firstBatch;
     /**
      * True if this is the first instantiation of this operation. A child
      * operation will be instantiated once for each batch of its parent.
@@ -1229,7 +1231,6 @@ private:
   void scanIndex_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
   void scanIndex_parent_row(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowPtr&);
   void scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr, Uint32 ptrI, Uint32);
-  void scanIndex_send(Signal*,Ptr<Request>,Ptr<TreeNode>);
   void scanIndex_send(Signal* signal,
                       Ptr<Request> requestPtr,
                       Ptr<TreeNode> treeNodePtr,

=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2011-09-23 07:43:25 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp	2011-09-29 11:43:27 +0000
@@ -5023,6 +5023,7 @@ Dbspj::scanIndex_parent_batch_complete(S
   const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
   ndbrequire(org->batch_size_rows > 0);
 
+  data.m_firstBatch = true;
   if (treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL)
   {
     jam();
@@ -5171,6 +5172,9 @@ Dbspj::scanIndex_send(Signal* signal,
                       Uint32 bs_rows,
                       Uint32& batchRange)
 {
+  jam();
+  ndbassert(bs_bytes > 0);
+  ndbassert(bs_rows > 0);
   /**
    * if (m_bits & prunemask):
    * - Range keys sliced out to each ScanFragHandle
@@ -5451,6 +5455,9 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa
 
   if (data.m_frags_outstanding == 0)
   {
+    const bool isFirstBatch = data.m_firstBatch;
+    data.m_firstBatch = false;
+
     const ScanFragReq * const org
       = reinterpret_cast<const ScanFragReq*>(data.m_scanFragReq);
 
@@ -5486,24 +5493,78 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa
     {
       jam();
       ndbrequire((requestPtr.p->m_state & Request::RS_ABORTING) != 0);
-    }
-    else if (! (data.m_rows_received == data.m_rows_expecting))
-    {
-      jam();
+      checkBatchComplete(signal, requestPtr, 1);
       return;
     }
-    else
+
+    if (isFirstBatch && data.m_frags_not_started > 0)
     {
-      if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+      /**
+       * Check if we can expect to be able to fetch the entire result set by
+       * asking for more fragments within the same batch. This may improve 
+       * performance for bushy scans, as subsequent bushy branches must be
+       * re-executed for each batch of this scan.
+       */
+      
+      /**
+       * Find the maximal correlation value that we may have seen so far.
+       * Correlation value must be unique within batch and smaller than 
+       * org->batch_size_rows.
+       */
+      const Uint32 maxCorrVal = (data.m_totalRows) == 0 ? 0 :
+        org->batch_size_rows / data.m_parallelism * (data.m_parallelism - 1)
+        + data.m_totalRows;
+      
+      // Number of rows that we can still fetch in this batch.
+      const Int32 remainingRows 
+        = static_cast<Int32>(org->batch_size_rows - maxCorrVal);
+      
+      if (remainingRows >= data.m_frags_not_started &&
+          /**
+           * Check that (remaning row capacity)/(remaining fragments) is 
+           * greater or equal to (rows read so far)/(finished fragments).
+           */
+          remainingRows * static_cast<Int32>(data.m_parallelism) >=
+          static_cast<Int32>(data.m_totalRows * data.m_frags_not_started) &&
+          (org->batch_size_bytes - data.m_totalBytes) * data.m_parallelism >=
+          data.m_totalBytes * data.m_frags_not_started)
       {
         jam();
-        reportBatchComplete(signal, requestPtr, treeNodePtr);
+        Uint32 batchRange = maxCorrVal;
+        DEBUG("::scanIndex_execSCAN_FRAGCONF() first batch was not full."
+              " Asking for new batches from " << data.m_frags_not_started <<
+              " fragments with " << 
+              remainingRows / data.m_frags_not_started 
+              <<" rows and " << 
+              (org->batch_size_bytes - data.m_totalBytes)
+              / data.m_frags_not_started 
+              << " bytes.");
+        scanIndex_send(signal,
+                       requestPtr,
+                       treeNodePtr,
+                       data.m_frags_not_started,
+                       (org->batch_size_bytes - data.m_totalBytes)
+                       / data.m_frags_not_started,
+                       remainingRows / data.m_frags_not_started,
+                       batchRange);
+        return;
       }
     }
+    
+    if (data.m_rows_received != data.m_rows_expecting)
+    {
+      jam();
+      return;
+    }
+    
+    if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
+    {
+      jam();
+      reportBatchComplete(signal, requestPtr, treeNodePtr);
+    }
 
     checkBatchComplete(signal, requestPtr, 1);
-    return;
-  }
+  } // if (data.m_frags_outstanding == 0)
 }
 
 void

=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp	2011-09-14 10:30:08 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp	2011-09-29 11:35:02 +0000
@@ -343,7 +343,8 @@ NdbQueryDef::destroy() const
 void
 NdbQueryDef::print() const
 {
-  m_impl.getQueryOperation(0U).printTree(0, Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32>());
+  m_impl.getQueryOperation(0U)
+    .printTree(0, NdbQueryOperationDefImpl::SiblingMask());
 }
 
 /*************************************************************************
@@ -1188,7 +1189,8 @@ NdbQueryBuilderImpl::prepare()
   if (doPrintQueryTree)
   {
     ndbout << "Query tree:" << endl;
-    def->getQueryOperation(0U).printTree(0, Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32>());
+    def->getQueryOperation(0U)
+      .printTree(0, NdbQueryOperationDefImpl::SiblingMask());
   }
 
   return def;
@@ -2159,7 +2161,8 @@ NdbQueryOperationDefImpl::appendChildPro
  * that connect the tree nodes.
  */
 static void printMargin(Uint32 depth, 
-                        Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> hasMoreSiblingsMask, 
+                        NdbQueryOperationDefImpl::SiblingMask 
+                        hasMoreSiblingsMask, 
                         bool header)
 {
   if (depth > 0)
@@ -2193,11 +2196,10 @@ static void printMargin(Uint32 depth,
 
 void 
 NdbQueryOperationDefImpl::printTree(Uint32 depth, 
-                                    Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> 
-                                    hasMoreSiblingsMask) const
+                                    SiblingMask hasMoreSiblingsMask) const
 {
   // Print vertical line leading down to this node.
-  Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> firstLineMask = hasMoreSiblingsMask;
+  SiblingMask firstLineMask = hasMoreSiblingsMask;
   firstLineMask.set(depth);
   printMargin(depth, firstLineMask, false);
   ndbout << endl;
@@ -2214,22 +2216,24 @@ NdbQueryOperationDefImpl::printTree(Uint
     printMargin(depth, hasMoreSiblingsMask, false);
     ndbout << " index: " << getIndex()->getName() << endl; 
   }
-  /* For each child but the last one, use a mask with an extra bit set to
-   * indicate that there are more siblings.
-   */
-  hasMoreSiblingsMask.set(depth+1);
+
   for (int childNo = 0; 
-       childNo < static_cast<int>(getNoOfChildOperations()) - 1; 
+       childNo < static_cast<int>(getNoOfChildOperations()); 
        childNo++)
   {
-    getChildOperation(childNo).printTree(depth+1, hasMoreSiblingsMask);
-  }
-  if (getNoOfChildOperations() > 0)
-  {
-    // The last child has no more siblings.
-    hasMoreSiblingsMask.clear(depth+1);
-    getChildOperation(getNoOfChildOperations() - 1)
-      .printTree(depth+1, hasMoreSiblingsMask);
+    if (childNo == 0)
+    {
+      /* For each child but the last one, use a mask with an extra bit set to
+       * indicate that there are more siblings.
+       */
+      hasMoreSiblingsMask.set(depth+1);
+    }
+    if (childNo == static_cast<int>(getNoOfChildOperations()) - 1)
+    {
+      // The last child has no more siblings.
+      hasMoreSiblingsMask.clear(depth+1);
+    }
+    getChildOperation(childNo).printTree(depth+1, hasMoreSiblingsMask); 
   }
 } // NdbQueryOperationDefImpl::printTree()
 

=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp	2011-09-14 10:30:08 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp	2011-09-29 11:35:02 +0000
@@ -429,6 +429,12 @@ public:
   // Get type of query operation
   virtual NdbQueryOperationDef::Type getType() const = 0;
 
+  /**
+   * Used for telling if parent at depth n has more siblings. (In that case
+   * we need to draw a horisontal line leading to that sibling.)
+   */
+  typedef Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> SiblingMask;
+
   /** Print query tree graph to trace file (using recursion).
    * @param depth Number of ancestor nodes that this node has.
    * @param hasMoreSiblingsMask The n'th bit should be set if the n'th ancestor
@@ -436,7 +442,7 @@ public:
    */
   void printTree(
            Uint32 depth, 
-           Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> hasMoreSiblingsMask) const;
+           SiblingMask hasMoreSiblingsMask) const;
 
 protected:
   // QueryTree building:

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.1-telco-7.0 branch (jan.wedvik:4559 to 4562) Jan Wedvik2 Oct