List:Commits« Previous MessageNext Message »
From:Frazer Clement Date:March 9 2009 1:22pm
Subject:bzr commit into mysql-5.1-telco-6.4 branch (frazer:2920) Bug#37934
View as plain text  
#At file:///home/frazer/bzr/mysql-5.1-telco-6.4/

 2920 Frazer Clement	2009-03-09
      Bug#37934 Partition pruning doesn't work for all queries in DBT2
      
      Modifications to 6.3 fix for 6.4 :
       1) Make partition setting mechanisms more strict about
          setting partitionIds for natively partitioned tables
       2) Add DISTR_KEY_RECORD variant to PartitionSpec structure
       3) Add testing of 2)
      
      With this patch, ha_ndbcluster.cc only provides explicit 
      partition ids for UserDefined partitioned tables.
      modified:
        sql/ha_ndbcluster.cc
        storage/ndb/include/ndbapi/Ndb.hpp
        storage/ndb/include/ndbapi/NdbScanOperation.hpp
        storage/ndb/src/ndbapi/Ndb.cpp
        storage/ndb/src/ndbapi/NdbOperationDefine.cpp
        storage/ndb/src/ndbapi/NdbOperationSearch.cpp
        storage/ndb/src/ndbapi/NdbScanOperation.cpp
        storage/ndb/src/ndbapi/ndberror.c
        storage/ndb/test/ndbapi/testPartitioning.cpp

=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc	2009-03-09 13:04:27 +0000
+++ b/sql/ha_ndbcluster.cc	2009-03-09 13:22:09 +0000
@@ -2935,14 +2935,11 @@ int ha_ndbcluster::ordered_index_scan(co
   }
 
   /* Partition pruning */
-  if (m_use_partition_pruning && part_spec != NULL &&
+  if (m_use_partition_pruning && 
+      m_user_defined_partitioning && part_spec != NULL &&
       part_spec->start_part == part_spec->end_part)
   {
-    /* TODO - 6.4
-     * Really should only take this path when the table uses
-     * user-defined partitioning, otherwise we need a key value
-     * to correctly constrain the partition scanned.
-     */
+    /* Explicitly set partition id when pruning User-defined partitioned scan */
     options.partitionId = part_spec->start_part;
     options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
   }
@@ -3014,7 +3011,7 @@ int ha_ndbcluster::full_table_scan(const
   DBUG_ENTER("full_table_scan");  
   DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
 
-  if (m_use_partition_pruning)
+  if (m_use_partition_pruning && m_user_defined_partitioning)
   {
     part_spec.start_part= 0;
     part_spec.end_part= m_part_info->get_tot_partitions() - 1;
@@ -3036,15 +3033,14 @@ int ha_ndbcluster::full_table_scan(const
        * Only one partition is required to scan, if sorted is required
        * don't need it anymore since output from one ordered partitioned
        * index is always sorted.
+       *
+       * Note : This table scan pruning currently only occurs for 
+       * UserDefined partitioned tables.
+       * It could be extended to occur for natively partitioned tables if
+       * the Partitioning layer can make a key (e.g. start or end key)
+       * available so that we can determine the correct pruning in the 
+       * NDBAPI layer.
        */
-      // TODO - 6.4
-      // This code still allows a table scan of a natively partitioned
-      // table to be pruned by MySQLD.  
-      // It is used as part of ALTER TABLE COALESCE PARTITION in the 
-      // ndb_partition_key test.
-      // If this is required then we need a single value of the
-      // distribution key to attempt pruning.
-      //
       use_set_part_id= TRUE;
       if (!trans)
         if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
@@ -3065,9 +3061,7 @@ int ha_ndbcluster::full_table_scan(const
   options.parallel = parallelism;
 
   if (use_set_part_id) {
-    // TODO - 6.4 
-    // Note that we still call this for natively partitioned tables
-    // See comment above about resolving this.
+    assert(m_user_defined_partitioning);
     options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
     options.partitionId = part_spec.start_part;
   };

=== modified file 'storage/ndb/include/ndbapi/Ndb.hpp'
--- a/storage/ndb/include/ndbapi/Ndb.hpp	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/include/ndbapi/Ndb.hpp	2009-03-09 13:22:09 +0000
@@ -1364,6 +1364,10 @@ public:
    *   An array of a table's distribution key values for a 
    *   table with native partitioning.
    *
+   * PS_DISTR_KEY_RECORD
+   *   A row in given NdbRecord format containing a natively 
+   *   partitioned table's distribution key values 
+   *
    */
 
   struct PartitionSpec
@@ -1372,6 +1376,43 @@ public:
     {
       PS_NONE                = 0,
       PS_USER_DEFINED        = 1,
+      PS_DISTR_KEY_PART_PTR  = 2,
+      PS_DISTR_KEY_RECORD    = 3
+    };
+
+    Uint32 type;
+    
+    union
+    {
+      struct {
+        Uint32 partitionId;
+      } UserDefined;
+      
+      struct {
+        const Key_part_ptr* tableKeyParts;
+        void* xfrmbuf;
+        Uint32 xfrmbuflen;
+      } KeyPartPtr;
+
+      struct {
+        const NdbRecord* keyRecord;
+        const char* keyRow;
+        void* xfrmbuf;
+        Uint32 xfrmbuflen;
+      } KeyRecord;
+    };
+  };
+
+#ifndef DOXYGEN_SHOULD_SKIP_DEPRECATED
+  /* First version of PartitionSpec, defined here for 
+   * backwards compatibility reasons
+   */
+  struct PartitionSpec_v1
+  {
+    enum SpecType
+    {
+      PS_NONE                = 0,
+      PS_USER_DEFINED        = 1,
       PS_DISTR_KEY_PART_PTR  = 2
     };
 
@@ -1390,6 +1431,7 @@ public:
       } KeyPartPtr;
     };
   };
+#endif
 
   /**
    * Start a transaction

=== modified file 'storage/ndb/include/ndbapi/NdbScanOperation.hpp'
--- a/storage/ndb/include/ndbapi/NdbScanOperation.hpp	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/include/ndbapi/NdbScanOperation.hpp	2009-03-09 13:22:09 +0000
@@ -473,7 +473,8 @@ protected:
                                ScanOptions& currOptions);
   int handleScanOptions(const ScanOptions *options);
   int validatePartInfoPtr(const Ndb::PartitionSpec*& partInfo,
-                          Uint32 sizeOfPartInfo);
+                          Uint32 sizeOfPartInfo,
+                          Ndb::PartitionSpec& partValue);
   int getPartValueFromInfo(const Ndb::PartitionSpec* partInfo,
                            const NdbTableImpl* table,
                            Uint32* partValue);

=== modified file 'storage/ndb/src/ndbapi/Ndb.cpp'
--- a/storage/ndb/src/ndbapi/Ndb.cpp	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/Ndb.cpp	2009-03-09 13:22:09 +0000
@@ -388,6 +388,8 @@ Ndb::computeHash(Uint32 *retval,
   
   if (buf)
   {
+    /* Get 64-bit aligned ptr required for hashing */
+    assert(bufLen != 0);
     UintPtr org = UintPtr(buf);
     UintPtr use = (org + 7) & ~(UintPtr)7;
 
@@ -501,17 +503,42 @@ Ndb::computeHash(Uint32 *retval,
                  void* buf, Uint32 bufLen)
 {
   Uint32 len;
-  char* pos = (char*)buf;
+  char* pos = NULL;
 
   Uint32 parts = keyRec->distkey_index_length;
 
+  if (unlikely(keyRec->flags & NdbRecord::RecHasUserDefinedPartitioning))
   {
+    /* Calculating native hash on keys in user defined 
+     * partitioned table is probably part of a bug
+     */
+    goto euserdeftable;
+  }
+
+  if (buf)
+  {
+    /* Get 64-bit aligned address as required for hashing */
+    assert(bufLen != 0);
     UintPtr org = UintPtr(buf);
     UintPtr use = (org + 7) & ~(UintPtr)7;
 
     buf = (void*)use;
     bufLen -= Uint32(use - org);
   }
+  else
+  {
+    /* We malloc buf here.  Don't have a handy 'Max distr key size'
+     * variable, so let's use the key length, which must include
+     * the Distr key.
+     */
+    buf= malloc(keyRec->m_keyLenInWords << 2);
+    if (unlikely(buf == 0))
+      goto enomem;
+    bufLen= 0; /* So we remember to deallocate */
+    assert((UintPtr(buf) & 7) == 0);
+  }
+  
+  pos= (char*) buf;
 
   for (Uint32 i = 0; i < parts; i++)
   {
@@ -603,8 +630,17 @@ Ndb::computeHash(Uint32 *retval,
     free(buf);
 
   return 0;
+
+euserdeftable:
+  return 4544;
+
+enomem:
+  return 4000;
   
-emalformedstring:  
+emalformedstring:
+  if (bufLen == 0)
+    free(buf);
+
   return 4279;
 }
 

=== modified file 'storage/ndb/src/ndbapi/NdbOperationDefine.cpp'
--- a/storage/ndb/src/ndbapi/NdbOperationDefine.cpp	2008-11-11 12:54:17 +0000
+++ b/storage/ndb/src/ndbapi/NdbOperationDefine.cpp	2009-03-09 13:22:09 +0000
@@ -1242,6 +1242,14 @@ NdbOperation::handleOperationOptions (co
        * takeover operation 
        */
     }
+    /* Only allowed for pk ops on user defined partitioned tables */
+    if (unlikely( ! ((op->m_attribute_record->flags & 
+                      NdbRecord::RecHasUserDefinedPartitioning) &&
+                     (op->m_key_record->table->m_index == NULL))))
+    {
+      /* Explicit partitioning info not allowed for table and operation*/
+      return 4546;
+    }
     op->theDistributionKey=opts->partitionId;
     op->theDistrKeyIndicator_= 1;       
   }

=== modified file 'storage/ndb/src/ndbapi/NdbOperationSearch.cpp'
--- a/storage/ndb/src/ndbapi/NdbOperationSearch.cpp	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/NdbOperationSearch.cpp	2009-03-09 13:22:09 +0000
@@ -534,14 +534,21 @@ NdbOperation::setPartitionId(Uint32 valu
     return; // TODO : Consider adding int rc for error
   }
 
-  /* Todo - 6.4
-   * Should only be allowed for user-defined partitioned tables
-   * Add an #ifdef STRICT_SET_PARTITION_ID
-   * Cannot be on at least until Blobs stop setting partition
-   * id for parts and MySQLD stops setting it for Alter table.
-   * operations.
+  /* We only allow setPartitionId() for :
+   *   PrimaryKey ops on a UserDefined partitioned table
+   *   Ordered index scans
+   *   Table scans
+   *
+   * It is not allowed on :
+   *   Primary key access to Natively partitioned tables
+   *   Any unique key access
    */
-
+  assert(((m_type == PrimaryKeyAccess) && 
+          (m_currentTable->getFragmentType() ==
+           NdbDictionary::Object::UserDefined)) ||
+         (m_type == OrderedIndexScan) ||
+         (m_type == TableScan));
+  
   theDistributionKey = value;
   theDistrKeyIndicator_ = 1;
   DBUG_PRINT("info", ("NdbOperation::setPartitionId: %u",

=== modified file 'storage/ndb/src/ndbapi/NdbScanOperation.cpp'
--- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp	2009-03-09 13:22:09 +0000
@@ -274,14 +274,17 @@ NdbScanOperation::handleScanOptions(cons
     assert(theBlobList == NULL);
     assert(m_pruneState == SPS_UNKNOWN);
     
-    // TODO - 6.4
-    //      : Ideally we could insist that an explicit partition id
-    //        is only allowed for user-defined partitioned tables.
-    //        However, alter table for partitions currently uses this
-    //        (ndb_partition_key test)
-    //
-    //assert(m_attribute_record->table->m_fragmentType == 
-    //  NdbDictionary::Object::UserDefined);
+    /* Only allowed to set partition id for PK ops on UserDefined
+     * partitioned tables
+     */
+    if(unlikely(! (m_attribute_record->flags & 
+                   NdbRecord::RecHasUserDefinedPartitioning)))
+    {
+      /* Explicit partitioning info not allowed for table and operation*/
+      setErrorCodeAbort(4546);
+      return -1;
+    }
+
     m_pruneState= SPS_FIXED;
     m_pruningKey= options->partitionId;
     
@@ -330,9 +333,11 @@ NdbScanOperation::handleScanOptions(cons
   if (options->optionsPresent & ScanOptions::SO_PART_INFO)
   {
     Uint32 partValue;
+    Ndb::PartitionSpec tmpSpec;
     const Ndb::PartitionSpec* pSpec= options->partitionInfo;
     if (unlikely(validatePartInfoPtr(pSpec,
-                                     options->sizeOfPartInfo) ||
+                                     options->sizeOfPartInfo,
+                                     tmpSpec) ||
                  getPartValueFromInfo(pSpec,
                                       m_currentTable,
                                       &partValue)))
@@ -591,13 +596,16 @@ NdbScanOperation::getPartValueFromInfo(c
                                        const NdbTableImpl* table,
                                        Uint32* partValue)
 {
-  if (partInfo->type == Ndb::PartitionSpec::PS_USER_DEFINED)
+  switch(partInfo->type)
+  {
+  case Ndb::PartitionSpec::PS_USER_DEFINED:
   {
     assert(table->m_fragmentType == NdbDictionary::Object::UserDefined);
     *partValue= partInfo->UserDefined.partitionId;
     return 0;
   }
-  if (partInfo->type == Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR)
+
+  case Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR:
   {
     assert(table->m_fragmentType != NdbDictionary::Object::UserDefined);
     Uint32 hashVal;
@@ -607,7 +615,21 @@ NdbScanOperation::getPartValueFromInfo(c
                               partInfo->KeyPartPtr.xfrmbuflen);
     if (ret == 0)
     {
-      *partValue= table->getPartitionId(hashVal);
+      /* We send the hash result here (rather than the partitionId
+       * generated by doing some function on the hash)
+       * Note that KEY and LINEAR KEY native partitioning hash->partitionId
+       * mapping functions are idempotent so that they can be
+       * applied multiple times to their result without changing it.  
+       * DIH will apply them, so there's no need to also do it here in API, 
+       * unless we want to see which physical partition we *think* will 
+       * hold the values.
+       * Only possible advantage is that we could identify some locality
+       * not shown in the hash result.  This is only *safe* for schemes
+       * which cannot change the hash->partitionId mapping function
+       * online.
+       * Can add as an optimisation if necessary.
+       */
+      *partValue= hashVal;
       return 0;
     }
     else
@@ -617,6 +639,31 @@ NdbScanOperation::getPartValueFromInfo(c
     }
   }
   
+  case Ndb::PartitionSpec::PS_DISTR_KEY_RECORD:
+  {
+    assert(table->m_fragmentType != NdbDictionary::Object::UserDefined);
+    Uint32 hashVal;
+    int ret= Ndb::computeHash(&hashVal,
+                              partInfo->KeyRecord.keyRecord,
+                              partInfo->KeyRecord.keyRow,
+                              partInfo->KeyRecord.xfrmbuf, 
+                              partInfo->KeyRecord.xfrmbuflen);
+    if (ret == 0)
+    {
+      /* See comments above about sending hashResult rather than
+       * partitionId
+       */
+      *partValue= hashVal;
+      return 0;
+    }
+    else
+    {
+      setErrorCodeAbort(ret);
+      return -1;
+    }
+  }
+  }
+  
   /* 4542 : Unknown partition information type */
   setErrorCodeAbort(4542);
   return -1;
@@ -731,14 +778,7 @@ NdbIndexScanOperation::getDistKeyFromRan
                              ptrs, tmpshrink, tmplen);
   if (ret == 0)
   {
-    if (result_record->flags & NdbRecord::RecHasUserDefinedPartitioning)
-    {
-      *distKey= result_record->table->getPartitionId(hashValue);
-    }
-    else
-    {
-      *distKey = hashValue;
-    }
+    *distKey = hashValue;
     return 0;
   }
   else
@@ -753,14 +793,37 @@ NdbIndexScanOperation::getDistKeyFromRan
 
 int
 NdbScanOperation::validatePartInfoPtr(const Ndb::PartitionSpec*& partInfo,
-                                      Uint32 sizeOfPartInfo)
+                                      Uint32 sizeOfPartInfo,
+                                      Ndb::PartitionSpec& tmpSpec)
 {  
-  if (unlikely((sizeOfPartInfo != sizeof(Ndb::PartitionSpec)) &&
-               (sizeOfPartInfo != 0)))
+  if (unlikely(sizeOfPartInfo != sizeof(Ndb::PartitionSpec)))
   {
-    /* 4545 : Invalid or Unsupported PartitionInfo structure */
-    setErrorCodeAbort(4545);
-    return -1;
+    if (sizeOfPartInfo == sizeof(Ndb::PartitionSpec_v1))
+    {
+      const Ndb::PartitionSpec_v1* oldPSpec= 
+        (const Ndb::PartitionSpec_v1*) partInfo;
+      
+      /* Let's upgrade to the latest variant */
+      tmpSpec.type= oldPSpec->type;
+      if (tmpSpec.type == Ndb::PartitionSpec_v1::PS_USER_DEFINED)
+      {
+        tmpSpec.UserDefined.partitionId= oldPSpec->UserDefined.partitionId;
+      }
+      else
+      {
+        tmpSpec.KeyPartPtr.tableKeyParts= oldPSpec->KeyPartPtr.tableKeyParts;
+        tmpSpec.KeyPartPtr.xfrmbuf= oldPSpec->KeyPartPtr.xfrmbuf;
+        tmpSpec.KeyPartPtr.xfrmbuflen= oldPSpec->KeyPartPtr.xfrmbuflen;
+      }
+      
+      partInfo= &tmpSpec;
+    }
+    else
+    {
+      /* 4545 : Invalid or Unsupported PartitionInfo structure */
+      setErrorCodeAbort(4545);
+      return -1;
+    }
   }
   
   if (partInfo->type != Ndb::PartitionSpec::PS_NONE)
@@ -842,11 +905,13 @@ NdbIndexScanOperation::setBound(const Nd
                                    NdbDictionary::Object::UserDefined);
 
   /* Validate explicit partitioning info if it's supplied */
+  Ndb::PartitionSpec tmpSpec;
   if (partInfo)
   {
     /* May update the PartInfo ptr */
     if (validatePartInfoPtr(partInfo,
-                            sizeOfPartInfo))
+                            sizeOfPartInfo,
+                            tmpSpec))
       return -1;
   }
 

=== modified file 'storage/ndb/src/ndbapi/ndberror.c'
--- a/storage/ndb/src/ndbapi/ndberror.c	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/ndberror.c	2009-03-09 13:22:09 +0000
@@ -611,6 +611,8 @@ ErrorBundle ErrorCodes[] = {
   { 4543, DMEC, AE, "Duplicate partitioning information supplied" },
   { 4544, DMEC, AE, "Wrong partitionInfo type for table" },
   { 4545, DMEC, AE, "Invalid or Unsupported PartitionInfo structure" },
+  { 4546, DMEC, AE, "Explicit partitioning info not allowed for table and operation" },
+
 
   { 4200, DMEC, AE, "Status Error when defining an operation" },
   { 4201, DMEC, AE, "Variable Arrays not yet supported" },

=== modified file 'storage/ndb/test/ndbapi/testPartitioning.cpp'
--- a/storage/ndb/test/ndbapi/testPartitioning.cpp	2009-03-09 13:04:27 +0000
+++ b/storage/ndb/test/ndbapi/testPartitioning.cpp	2009-03-09 13:22:09 +0000
@@ -51,7 +51,7 @@ setNativePartitioning(Ndb* ndb, NdbDicti
   }
 
   /* Use rand to choose one of the native partitioning schemes */
-  const Uint32 rType= rand() % 2;
+  const Uint32 rType= rand() % 3;
   Uint32 fragType= -1;
   switch(rType)
   {
@@ -61,6 +61,9 @@ setNativePartitioning(Ndb* ndb, NdbDicti
   case 1 :
     fragType = NdbDictionary::Object::DistrKeyLin;
     break;
+  case 2:
+    fragType = NdbDictionary::Object::HashMapPartition;
+    break;
   }
 
   ndbout << "Setting fragment type to " << fragType << endl;
@@ -242,6 +245,10 @@ create_dist_table(Ndb* pNdb, 
     {
       setupUDPartitioning(pNdb, tab);
     }
+    else
+    {
+      setNativePartitioning(pNdb, tab, 0, 0);
+    }
     
     NdbDictionary::Column dk;
     dk.setName(DistTabDKeyCol);
@@ -984,6 +991,7 @@ dist_scan_body(Ndb* pNdb, int records, i
       //keyParts[0].ptr= &badPartVal;
       
       Ndb::PartitionSpec pSpec;
+      char* tabRow= NULL;
       
       if (userDefined)
       {
@@ -993,10 +1001,35 @@ dist_scan_body(Ndb* pNdb, int records, i
       }
       else
       {
-        pSpec.type= Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR;
-        pSpec.KeyPartPtr.tableKeyParts= keyParts;
-        pSpec.KeyPartPtr.xfrmbuf= NULL;
-        pSpec.KeyPartPtr.xfrmbuflen= 0;
+        /* Can set either using an array of Key parts, or a KeyRecord
+         * structure.  Let's test both
+         */
+        if (rand() % 2)
+        {
+          //ndbout << "Using Key Parts to set range partition info" << endl;
+          pSpec.type= Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR;
+          pSpec.KeyPartPtr.tableKeyParts= keyParts;
+          pSpec.KeyPartPtr.xfrmbuf= NULL;
+          pSpec.KeyPartPtr.xfrmbuflen= 0;
+        }
+        else
+        {
+          //ndbout << "Using KeyRecord to set range partition info" << endl;
+          
+          /* Setup a row in NdbRecord format with the distkey value set */
+          tabRow= (char*)malloc(NdbDictionary::getRecordRowLength(tabRecord));
+          int& dKeyVal= *((int*) NdbDictionary::getValuePtr(tabRecord,
+                                                            tabRow,
+                                                            tab->getColumn(DistTabDKeyCol)->getAttrId()));
+          dKeyVal= partValue;
+          // dKeyVal= partValue + 1; // Test failue case
+          
+          pSpec.type= Ndb::PartitionSpec::PS_DISTR_KEY_RECORD;
+          pSpec.KeyRecord.keyRecord= tabRecord;
+          pSpec.KeyRecord.keyRow= tabRow;
+          pSpec.KeyRecord.xfrmbuf= 0;
+          pSpec.KeyRecord.xfrmbuflen= 0;
+        }
       }
 
       CHECK(op->setBound(idxRecord,
@@ -1004,6 +1037,11 @@ dist_scan_body(Ndb* pNdb, int records, i
                          &pSpec,
                          sizeof(pSpec)),
             op);
+
+      if (tabRow)
+        free(tabRow);
+      tabRow= NULL;
+
     }
   }
 

Thread
bzr commit into mysql-5.1-telco-6.4 branch (frazer:2920) Bug#37934Frazer Clement9 Mar