#At file:///home/frazer/bzr/mysql-5.1-telco-6.4/
2920 Frazer Clement 2009-03-09
Bug#37934 Partition pruning doesn't work for all queries in DBT2
Modifications to 6.3 fix for 6.4 :
1) Make partition setting mechanisms more strict about
setting partitionIds for natively partitioned tables
2) Add DISTR_KEY_RECORD variant to PartitionSpec structure
3) Add testing of 2)
With this patch, ha_ndbcluster.cc only provides explicit
partition ids for UserDefined partitioned tables.
modified:
sql/ha_ndbcluster.cc
storage/ndb/include/ndbapi/Ndb.hpp
storage/ndb/include/ndbapi/NdbScanOperation.hpp
storage/ndb/src/ndbapi/Ndb.cpp
storage/ndb/src/ndbapi/NdbOperationDefine.cpp
storage/ndb/src/ndbapi/NdbOperationSearch.cpp
storage/ndb/src/ndbapi/NdbScanOperation.cpp
storage/ndb/src/ndbapi/ndberror.c
storage/ndb/test/ndbapi/testPartitioning.cpp
=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc 2009-03-09 13:04:27 +0000
+++ b/sql/ha_ndbcluster.cc 2009-03-09 13:22:09 +0000
@@ -2935,14 +2935,11 @@ int ha_ndbcluster::ordered_index_scan(co
}
/* Partition pruning */
- if (m_use_partition_pruning && part_spec != NULL &&
+ if (m_use_partition_pruning &&
+ m_user_defined_partitioning && part_spec != NULL &&
part_spec->start_part == part_spec->end_part)
{
- /* TODO - 6.4
- * Really should only take this path when the table uses
- * user-defined partitioning, otherwise we need a key value
- * to correctly constrain the partition scanned.
- */
+ /* Explicitly set partition id when pruning User-defined partitioned scan */
options.partitionId = part_spec->start_part;
options.optionsPresent |= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
}
@@ -3014,7 +3011,7 @@ int ha_ndbcluster::full_table_scan(const
DBUG_ENTER("full_table_scan");
DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));
- if (m_use_partition_pruning)
+ if (m_use_partition_pruning && m_user_defined_partitioning)
{
part_spec.start_part= 0;
part_spec.end_part= m_part_info->get_tot_partitions() - 1;
@@ -3036,15 +3033,14 @@ int ha_ndbcluster::full_table_scan(const
* Only one partition is required to scan, if sorted is required
* don't need it anymore since output from one ordered partitioned
* index is always sorted.
+ *
+ * Note : This table scan pruning currently only occurs for
+ * UserDefined partitioned tables.
+ * It could be extended to occur for natively partitioned tables if
+ * the Partitioning layer can make a key (e.g. start or end key)
+ * available so that we can determine the correct pruning in the
+ * NDBAPI layer.
*/
- // TODO - 6.4
- // This code still allows a table scan of a natively partitioned
- // table to be pruned by MySQLD.
- // It is used as part of ALTER TABLE COALESCE PARTITION in the
- // ndb_partition_key test.
- // If this is required then we need a single value of the
- // distribution key to attempt pruning.
- //
use_set_part_id= TRUE;
if (!trans)
if (unlikely(!(trans= get_transaction_part_id(part_spec.start_part,
@@ -3065,9 +3061,7 @@ int ha_ndbcluster::full_table_scan(const
options.parallel = parallelism;
if (use_set_part_id) {
- // TODO - 6.4
- // Note that we still call this for natively partitioned tables
- // See comment above about resolving this.
+ assert(m_user_defined_partitioning);
options.optionsPresent|= NdbScanOperation::ScanOptions::SO_PARTITION_ID;
options.partitionId = part_spec.start_part;
};
=== modified file 'storage/ndb/include/ndbapi/Ndb.hpp'
--- a/storage/ndb/include/ndbapi/Ndb.hpp 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/include/ndbapi/Ndb.hpp 2009-03-09 13:22:09 +0000
@@ -1364,6 +1364,10 @@ public:
* An array of a table's distribution key values for a
* table with native partitioning.
*
+ * PS_DISTR_KEY_RECORD
+ * A row in given NdbRecord format containing a natively
+ * partitioned table's distribution key values
+ *
*/
struct PartitionSpec
@@ -1372,6 +1376,43 @@ public:
{
PS_NONE = 0,
PS_USER_DEFINED = 1,
+ PS_DISTR_KEY_PART_PTR = 2,
+ PS_DISTR_KEY_RECORD = 3
+ };
+
+ Uint32 type;
+
+ union
+ {
+ struct {
+ Uint32 partitionId;
+ } UserDefined;
+
+ struct {
+ const Key_part_ptr* tableKeyParts;
+ void* xfrmbuf;
+ Uint32 xfrmbuflen;
+ } KeyPartPtr;
+
+ struct {
+ const NdbRecord* keyRecord;
+ const char* keyRow;
+ void* xfrmbuf;
+ Uint32 xfrmbuflen;
+ } KeyRecord;
+ };
+ };
+
+#ifndef DOXYGEN_SHOULD_SKIP_DEPRECATED
+ /* First version of PartitionSpec, defined here for
+ * backwards compatibility reasons
+ */
+ struct PartitionSpec_v1
+ {
+ enum SpecType
+ {
+ PS_NONE = 0,
+ PS_USER_DEFINED = 1,
PS_DISTR_KEY_PART_PTR = 2
};
@@ -1390,6 +1431,7 @@ public:
} KeyPartPtr;
};
};
+#endif
/**
* Start a transaction
=== modified file 'storage/ndb/include/ndbapi/NdbScanOperation.hpp'
--- a/storage/ndb/include/ndbapi/NdbScanOperation.hpp 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/include/ndbapi/NdbScanOperation.hpp 2009-03-09 13:22:09 +0000
@@ -473,7 +473,8 @@ protected:
ScanOptions& currOptions);
int handleScanOptions(const ScanOptions *options);
int validatePartInfoPtr(const Ndb::PartitionSpec*& partInfo,
- Uint32 sizeOfPartInfo);
+ Uint32 sizeOfPartInfo,
+ Ndb::PartitionSpec& partValue);
int getPartValueFromInfo(const Ndb::PartitionSpec* partInfo,
const NdbTableImpl* table,
Uint32* partValue);
=== modified file 'storage/ndb/src/ndbapi/Ndb.cpp'
--- a/storage/ndb/src/ndbapi/Ndb.cpp 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/Ndb.cpp 2009-03-09 13:22:09 +0000
@@ -388,6 +388,8 @@ Ndb::computeHash(Uint32 *retval,
if (buf)
{
+ /* Get 64-bit aligned ptr required for hashing */
+ assert(bufLen != 0);
UintPtr org = UintPtr(buf);
UintPtr use = (org + 7) & ~(UintPtr)7;
@@ -501,17 +503,42 @@ Ndb::computeHash(Uint32 *retval,
void* buf, Uint32 bufLen)
{
Uint32 len;
- char* pos = (char*)buf;
+ char* pos = NULL;
Uint32 parts = keyRec->distkey_index_length;
+ if (unlikely(keyRec->flags & NdbRecord::RecHasUserDefinedPartitioning))
{
+ /* Calculating native hash on keys in user defined
+ * partitioned table is probably part of a bug
+ */
+ goto euserdeftable;
+ }
+
+ if (buf)
+ {
+ /* Get 64-bit aligned address as required for hashing */
+ assert(bufLen != 0);
UintPtr org = UintPtr(buf);
UintPtr use = (org + 7) & ~(UintPtr)7;
buf = (void*)use;
bufLen -= Uint32(use - org);
}
+ else
+ {
+ /* We malloc buf here. Don't have a handy 'Max distr key size'
+ * variable, so let's use the key length, which must include
+ * the Distr key.
+ */
+ buf= malloc(keyRec->m_keyLenInWords << 2);
+ if (unlikely(buf == 0))
+ goto enomem;
+ bufLen= 0; /* So we remember to deallocate */
+ assert((UintPtr(buf) & 7) == 0);
+ }
+
+ pos= (char*) buf;
for (Uint32 i = 0; i < parts; i++)
{
@@ -603,8 +630,17 @@ Ndb::computeHash(Uint32 *retval,
free(buf);
return 0;
+
+euserdeftable:
+ return 4544;
+
+enomem:
+ return 4000;
-emalformedstring:
+emalformedstring:
+ if (bufLen == 0)
+ free(buf);
+
return 4279;
}
=== modified file 'storage/ndb/src/ndbapi/NdbOperationDefine.cpp'
--- a/storage/ndb/src/ndbapi/NdbOperationDefine.cpp 2008-11-11 12:54:17 +0000
+++ b/storage/ndb/src/ndbapi/NdbOperationDefine.cpp 2009-03-09 13:22:09 +0000
@@ -1242,6 +1242,14 @@ NdbOperation::handleOperationOptions (co
* takeover operation
*/
}
+ /* Only allowed for pk ops on user defined partitioned tables */
+ if (unlikely( ! ((op->m_attribute_record->flags &
+ NdbRecord::RecHasUserDefinedPartitioning) &&
+ (op->m_key_record->table->m_index == NULL))))
+ {
+ /* Explicit partitioning info not allowed for table and operation*/
+ return 4546;
+ }
op->theDistributionKey=opts->partitionId;
op->theDistrKeyIndicator_= 1;
}
=== modified file 'storage/ndb/src/ndbapi/NdbOperationSearch.cpp'
--- a/storage/ndb/src/ndbapi/NdbOperationSearch.cpp 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/NdbOperationSearch.cpp 2009-03-09 13:22:09 +0000
@@ -534,14 +534,21 @@ NdbOperation::setPartitionId(Uint32 valu
return; // TODO : Consider adding int rc for error
}
- /* Todo - 6.4
- * Should only be allowed for user-defined partitioned tables
- * Add an #ifdef STRICT_SET_PARTITION_ID
- * Cannot be on at least until Blobs stop setting partition
- * id for parts and MySQLD stops setting it for Alter table.
- * operations.
+ /* We only allow setPartitionId() for :
+ * PrimaryKey ops on a UserDefined partitioned table
+ * Ordered index scans
+ * Table scans
+ *
+ * It is not allowed on :
+ * Primary key access to Natively partitioned tables
+ * Any unique key access
*/
-
+ assert(((m_type == PrimaryKeyAccess) &&
+ (m_currentTable->getFragmentType() ==
+ NdbDictionary::Object::UserDefined)) ||
+ (m_type == OrderedIndexScan) ||
+ (m_type == TableScan));
+
theDistributionKey = value;
theDistrKeyIndicator_ = 1;
DBUG_PRINT("info", ("NdbOperation::setPartitionId: %u",
=== modified file 'storage/ndb/src/ndbapi/NdbScanOperation.cpp'
--- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp 2009-03-09 13:22:09 +0000
@@ -274,14 +274,17 @@ NdbScanOperation::handleScanOptions(cons
assert(theBlobList == NULL);
assert(m_pruneState == SPS_UNKNOWN);
- // TODO - 6.4
- // : Ideally we could insist that an explicit partition id
- // is only allowed for user-defined partitioned tables.
- // However, alter table for partitions currently uses this
- // (ndb_partition_key test)
- //
- //assert(m_attribute_record->table->m_fragmentType ==
- // NdbDictionary::Object::UserDefined);
+ /* Only allowed to set partition id for PK ops on UserDefined
+ * partitioned tables
+ */
+ if(unlikely(! (m_attribute_record->flags &
+ NdbRecord::RecHasUserDefinedPartitioning)))
+ {
+ /* Explicit partitioning info not allowed for table and operation*/
+ setErrorCodeAbort(4546);
+ return -1;
+ }
+
m_pruneState= SPS_FIXED;
m_pruningKey= options->partitionId;
@@ -330,9 +333,11 @@ NdbScanOperation::handleScanOptions(cons
if (options->optionsPresent & ScanOptions::SO_PART_INFO)
{
Uint32 partValue;
+ Ndb::PartitionSpec tmpSpec;
const Ndb::PartitionSpec* pSpec= options->partitionInfo;
if (unlikely(validatePartInfoPtr(pSpec,
- options->sizeOfPartInfo) ||
+ options->sizeOfPartInfo,
+ tmpSpec) ||
getPartValueFromInfo(pSpec,
m_currentTable,
&partValue)))
@@ -591,13 +596,16 @@ NdbScanOperation::getPartValueFromInfo(c
const NdbTableImpl* table,
Uint32* partValue)
{
- if (partInfo->type == Ndb::PartitionSpec::PS_USER_DEFINED)
+ switch(partInfo->type)
+ {
+ case Ndb::PartitionSpec::PS_USER_DEFINED:
{
assert(table->m_fragmentType == NdbDictionary::Object::UserDefined);
*partValue= partInfo->UserDefined.partitionId;
return 0;
}
- if (partInfo->type == Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR)
+
+ case Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR:
{
assert(table->m_fragmentType != NdbDictionary::Object::UserDefined);
Uint32 hashVal;
@@ -607,7 +615,21 @@ NdbScanOperation::getPartValueFromInfo(c
partInfo->KeyPartPtr.xfrmbuflen);
if (ret == 0)
{
- *partValue= table->getPartitionId(hashVal);
+ /* We send the hash result here (rather than the partitionId
+ * generated by doing some function on the hash)
+ * Note that KEY and LINEAR KEY native partitioning hash->partitionId
+ * mapping functions are idempotent so that they can be
+ * applied multiple times to their result without changing it.
+ * DIH will apply them, so there's no need to also do it here in API,
+ * unless we want to see which physical partition we *think* will
+ * hold the values.
+ * Only possible advantage is that we could identify some locality
+ * not shown in the hash result. This is only *safe* for schemes
+ * which cannot change the hash->partitionId mapping function
+ * online.
+ * Can add as an optimisation if necessary.
+ */
+ *partValue= hashVal;
return 0;
}
else
@@ -617,6 +639,31 @@ NdbScanOperation::getPartValueFromInfo(c
}
}
+ case Ndb::PartitionSpec::PS_DISTR_KEY_RECORD:
+ {
+ assert(table->m_fragmentType != NdbDictionary::Object::UserDefined);
+ Uint32 hashVal;
+ int ret= Ndb::computeHash(&hashVal,
+ partInfo->KeyRecord.keyRecord,
+ partInfo->KeyRecord.keyRow,
+ partInfo->KeyRecord.xfrmbuf,
+ partInfo->KeyRecord.xfrmbuflen);
+ if (ret == 0)
+ {
+ /* See comments above about sending hashResult rather than
+ * partitionId
+ */
+ *partValue= hashVal;
+ return 0;
+ }
+ else
+ {
+ setErrorCodeAbort(ret);
+ return -1;
+ }
+ }
+ }
+
/* 4542 : Unknown partition information type */
setErrorCodeAbort(4542);
return -1;
@@ -731,14 +778,7 @@ NdbIndexScanOperation::getDistKeyFromRan
ptrs, tmpshrink, tmplen);
if (ret == 0)
{
- if (result_record->flags & NdbRecord::RecHasUserDefinedPartitioning)
- {
- *distKey= result_record->table->getPartitionId(hashValue);
- }
- else
- {
- *distKey = hashValue;
- }
+ *distKey = hashValue;
return 0;
}
else
@@ -753,14 +793,37 @@ NdbIndexScanOperation::getDistKeyFromRan
int
NdbScanOperation::validatePartInfoPtr(const Ndb::PartitionSpec*& partInfo,
- Uint32 sizeOfPartInfo)
+ Uint32 sizeOfPartInfo,
+ Ndb::PartitionSpec& tmpSpec)
{
- if (unlikely((sizeOfPartInfo != sizeof(Ndb::PartitionSpec)) &&
- (sizeOfPartInfo != 0)))
+ if (unlikely(sizeOfPartInfo != sizeof(Ndb::PartitionSpec)))
{
- /* 4545 : Invalid or Unsupported PartitionInfo structure */
- setErrorCodeAbort(4545);
- return -1;
+ if (sizeOfPartInfo == sizeof(Ndb::PartitionSpec_v1))
+ {
+ const Ndb::PartitionSpec_v1* oldPSpec=
+ (const Ndb::PartitionSpec_v1*) partInfo;
+
+ /* Let's upgrade to the latest variant */
+ tmpSpec.type= oldPSpec->type;
+ if (tmpSpec.type == Ndb::PartitionSpec_v1::PS_USER_DEFINED)
+ {
+ tmpSpec.UserDefined.partitionId= oldPSpec->UserDefined.partitionId;
+ }
+ else
+ {
+ tmpSpec.KeyPartPtr.tableKeyParts= oldPSpec->KeyPartPtr.tableKeyParts;
+ tmpSpec.KeyPartPtr.xfrmbuf= oldPSpec->KeyPartPtr.xfrmbuf;
+ tmpSpec.KeyPartPtr.xfrmbuflen= oldPSpec->KeyPartPtr.xfrmbuflen;
+ }
+
+ partInfo= &tmpSpec;
+ }
+ else
+ {
+ /* 4545 : Invalid or Unsupported PartitionInfo structure */
+ setErrorCodeAbort(4545);
+ return -1;
+ }
}
if (partInfo->type != Ndb::PartitionSpec::PS_NONE)
@@ -842,11 +905,13 @@ NdbIndexScanOperation::setBound(const Nd
NdbDictionary::Object::UserDefined);
/* Validate explicit partitioning info if it's supplied */
+ Ndb::PartitionSpec tmpSpec;
if (partInfo)
{
/* May update the PartInfo ptr */
if (validatePartInfoPtr(partInfo,
- sizeOfPartInfo))
+ sizeOfPartInfo,
+ tmpSpec))
return -1;
}
=== modified file 'storage/ndb/src/ndbapi/ndberror.c'
--- a/storage/ndb/src/ndbapi/ndberror.c 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/src/ndbapi/ndberror.c 2009-03-09 13:22:09 +0000
@@ -611,6 +611,8 @@ ErrorBundle ErrorCodes[] = {
{ 4543, DMEC, AE, "Duplicate partitioning information supplied" },
{ 4544, DMEC, AE, "Wrong partitionInfo type for table" },
{ 4545, DMEC, AE, "Invalid or Unsupported PartitionInfo structure" },
+ { 4546, DMEC, AE, "Explicit partitioning info not allowed for table and operation" },
+
{ 4200, DMEC, AE, "Status Error when defining an operation" },
{ 4201, DMEC, AE, "Variable Arrays not yet supported" },
=== modified file 'storage/ndb/test/ndbapi/testPartitioning.cpp'
--- a/storage/ndb/test/ndbapi/testPartitioning.cpp 2009-03-09 13:04:27 +0000
+++ b/storage/ndb/test/ndbapi/testPartitioning.cpp 2009-03-09 13:22:09 +0000
@@ -51,7 +51,7 @@ setNativePartitioning(Ndb* ndb, NdbDicti
}
/* Use rand to choose one of the native partitioning schemes */
- const Uint32 rType= rand() % 2;
+ const Uint32 rType= rand() % 3;
Uint32 fragType= -1;
switch(rType)
{
@@ -61,6 +61,9 @@ setNativePartitioning(Ndb* ndb, NdbDicti
case 1 :
fragType = NdbDictionary::Object::DistrKeyLin;
break;
+ case 2:
+ fragType = NdbDictionary::Object::HashMapPartition;
+ break;
}
ndbout << "Setting fragment type to " << fragType << endl;
@@ -242,6 +245,10 @@ create_dist_table(Ndb* pNdb,
{
setupUDPartitioning(pNdb, tab);
}
+ else
+ {
+ setNativePartitioning(pNdb, tab, 0, 0);
+ }
NdbDictionary::Column dk;
dk.setName(DistTabDKeyCol);
@@ -984,6 +991,7 @@ dist_scan_body(Ndb* pNdb, int records, i
//keyParts[0].ptr= &badPartVal;
Ndb::PartitionSpec pSpec;
+ char* tabRow= NULL;
if (userDefined)
{
@@ -993,10 +1001,35 @@ dist_scan_body(Ndb* pNdb, int records, i
}
else
{
- pSpec.type= Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR;
- pSpec.KeyPartPtr.tableKeyParts= keyParts;
- pSpec.KeyPartPtr.xfrmbuf= NULL;
- pSpec.KeyPartPtr.xfrmbuflen= 0;
+ /* Can set either using an array of Key parts, or a KeyRecord
+ * structure. Let's test both
+ */
+ if (rand() % 2)
+ {
+ //ndbout << "Using Key Parts to set range partition info" << endl;
+ pSpec.type= Ndb::PartitionSpec::PS_DISTR_KEY_PART_PTR;
+ pSpec.KeyPartPtr.tableKeyParts= keyParts;
+ pSpec.KeyPartPtr.xfrmbuf= NULL;
+ pSpec.KeyPartPtr.xfrmbuflen= 0;
+ }
+ else
+ {
+ //ndbout << "Using KeyRecord to set range partition info" << endl;
+
+ /* Setup a row in NdbRecord format with the distkey value set */
+ tabRow= (char*)malloc(NdbDictionary::getRecordRowLength(tabRecord));
+ int& dKeyVal= *((int*) NdbDictionary::getValuePtr(tabRecord,
+ tabRow,
+ tab->getColumn(DistTabDKeyCol)->getAttrId()));
+ dKeyVal= partValue;
+ // dKeyVal= partValue + 1; // Test failue case
+
+ pSpec.type= Ndb::PartitionSpec::PS_DISTR_KEY_RECORD;
+ pSpec.KeyRecord.keyRecord= tabRecord;
+ pSpec.KeyRecord.keyRow= tabRow;
+ pSpec.KeyRecord.xfrmbuf= 0;
+ pSpec.KeyRecord.xfrmbuflen= 0;
+ }
}
CHECK(op->setBound(idxRecord,
@@ -1004,6 +1037,11 @@ dist_scan_body(Ndb* pNdb, int records, i
&pSpec,
sizeof(pSpec)),
op);
+
+ if (tabRow)
+ free(tabRow);
+ tabRow= NULL;
+
}
}
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-6.4 branch (frazer:2920) Bug#37934 | Frazer Clement | 9 Mar |