#At file:///net/atum17/export/home2/tmp/jw159207/mysql/repo/push-scan-scan/ based on revid:ole.john.aske@stripped
3292 Jan Wedvik 2010-09-24
This commit fixes a bug that caused results for pushed queries to be sorted
wrongly in some cases. This could happen if the root operation was a sorted
scan (of an ordered index) and there was at least one scan descendant. Then it
might happen that the child batch for fragment 0 contained child tuples of
parent tuples A and B, while fragment 1 only containing tuples that are children
of A *with more such tuples coming in the next batch*. The API would then
emit joined tuples containing parent tuple B before asking for the next batch
of child tuples. Sorting order would then be wrong, since the next batch would
generate more joined tuples containing parent tuple A.
The fix has two element:
1. The API will return an error code if an application tries to build
a query with a sorted root scan *and* another scan.
2. The ndb handler will not try to push such operation trees. This would
otherwise happen if e.g. the query had an "ORDER BY" clause or if (parts of)
a predicate would map to a range scan on the root table. (In the latter case
sorting is not really needed and should ideally be eliminated.)
modified:
mysql-test/suite/ndb/r/ndb_join_pushdown.result
mysql-test/suite/ndb/t/ndb_join_pushdown.test
sql/abstract_query_plan.cc
sql/abstract_query_plan.h
sql/ha_ndbcluster.cc
sql/ha_ndbcluster.h
storage/ndb/src/ndbapi/NdbQueryBuilder.cpp
storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp
storage/ndb/src/ndbapi/NdbQueryOperation.cpp
storage/ndb/src/ndbapi/ndberror.c
=== modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown.result'
--- a/mysql-test/suite/ndb/r/ndb_join_pushdown.result 2010-09-23 11:47:46 +0000
+++ b/mysql-test/suite/ndb/r/ndb_join_pushdown.result 2010-09-24 07:29:43 +0000
@@ -3811,6 +3811,108 @@ pruned
const_pruned
6
drop table t1;
+create table t1 (pk int primary key, a int, b int) engine=ndb;
+create index ix1 on t1(b,a);
+insert into t1 values (0,10,10);
+insert into t1 values (1,10,20);
+insert into t1 values (2,20,20);
+insert into t1 values (3,10,10);
+insert into t1 values (4,10,20);
+insert into t1 values (5,10,20);
+insert into t1 values (6,10,10);
+insert into t1 values (7,10,10);
+insert into t1 values (8,10,20);
+insert into t1 values (9,10,10);
+explain select x1.pk,x1.a,x1.b from t1 as x1
+join t1 as x2 on x1.a=x2.b
+join t1 as x3 on x2.a=x3.b
+order by x1.pk limit 70;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE x1 index NULL PRIMARY 4 NULL 10
+1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 1 Parent of 2 pushed join@1; Using where
+1 SIMPLE x3 ref ix1 ix1 5 test.x2.a 1 Child of pushed join@1; Using where
+select x1.pk,x1.a,x1.b from t1 as x1
+join t1 as x2 on x1.a=x2.b
+join t1 as x3 on x2.a=x3.b
+order by x1.pk limit 70;
+pk a b
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+0 10 10
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+1 10 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+2 20 20
+explain select * from t1 as x1, t1 as x2 where x1.a=x2.b and x1.b = 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE x1 ref ix1 ix1 5 const 10 Using where with pushed condition
+1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 1 Using where
+select * from t1 as x1, t1 as x2 where x1.a=x2.b and x1.b = 3;
+pk a b pk a b
+drop table t1;
create temporary table spj_counts_at_end
select counter_name, sum(val) AS val
from ndbinfo.counters
@@ -3828,10 +3930,10 @@ counter_name spj_counts_at_end.val - spj
CONST_PRUNED_RANGE_SCANS_RECEIVED 6
LOCAL_TABLE_SCANS_SENT 190
PRUNED_RANGE_SCANS_RECEIVED 12
-RANGE_SCANS_RECEIVED 189
+RANGE_SCANS_RECEIVED 195
READS_NOT_FOUND 398
READS_RECEIVED 48
-SCAN_ROWS_RETURNED 42489
+SCAN_ROWS_RETURNED 42579
TABLE_SCANS_RECEIVED 190
select sum(spj_counts_at_end.val - spj_counts_at_startup.val) as 'LOCAL+REMOTE READS_SENT'
from spj_counts_at_end, spj_counts_at_startup
@@ -3843,15 +3945,15 @@ LOCAL+REMOTE READS_SENT
drop table spj_counts_at_startup;
drop table spj_counts_at_end;
scan_count
-1944
+1955
pruned_scan_count
7
sorted_scan_count
-42
+44
pushed_queries_defined
-325
+327
pushed_queries_dropped
11
pushed_queries_executed
-241
+244
set ndb_join_pushdown = @save_ndb_join_pushdown;
=== modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.test'
--- a/mysql-test/suite/ndb/t/ndb_join_pushdown.test 2010-09-23 09:22:03 +0000
+++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.test 2010-09-24 07:29:43 +0000
@@ -2723,6 +2723,40 @@ select straight_join * from t1 x, t1 y w
drop table t1;
+###
+# Test that sorted scan with sub scan is *not* pushed.
+###
+create table t1 (pk int primary key, a int, b int) engine=ndb;
+create index ix1 on t1(b,a);
+
+insert into t1 values (0,10,10);
+insert into t1 values (1,10,20);
+insert into t1 values (2,20,20);
+insert into t1 values (3,10,10);
+insert into t1 values (4,10,20);
+insert into t1 values (5,10,20);
+insert into t1 values (6,10,10);
+insert into t1 values (7,10,10);
+insert into t1 values (8,10,20);
+insert into t1 values (9,10,10);
+
+
+# Results would be sorted wrongly if pushed.
+explain select x1.pk,x1.a,x1.b from t1 as x1
+ join t1 as x2 on x1.a=x2.b
+ join t1 as x3 on x2.a=x3.b
+ order by x1.pk limit 70;
+select x1.pk,x1.a,x1.b from t1 as x1
+ join t1 as x2 on x1.a=x2.b
+ join t1 as x3 on x2.a=x3.b
+ order by x1.pk limit 70;
+
+# This query should not be pushed, since mysqld requires sorted
+# results for the root scan.
+explain select * from t1 as x1, t1 as x2 where x1.a=x2.b and x1.b = 3;
+select * from t1 as x1, t1 as x2 where x1.a=x2.b and x1.b = 3;
+
+drop table t1;
########################################
# Verify DBSPJ counters for entire test:
=== modified file 'sql/abstract_query_plan.cc'
--- a/sql/abstract_query_plan.cc 2010-09-22 07:44:06 +0000
+++ b/sql/abstract_query_plan.cc 2010-09-24 07:29:43 +0000
@@ -449,6 +449,16 @@ namespace AQP
{}
/**
+ @return True if sorted results are expetced from this operation.
+ */
+ bool Table_access::is_sorted() const
+ {
+ return get_join_tab()->type == JT_NEXT ||
+ (get_join_tab()->type == JT_REF && get_join_tab()->sorted) ||
+ (get_join_tab()->type == JT_REF_OR_NULL && get_join_tab()->sorted);
+ }
+
+ /**
@param plan Iterate over fields within this plan.
@param field_item Iterate over Item_fields equal to this.
*/
=== modified file 'sql/abstract_query_plan.h'
--- a/sql/abstract_query_plan.h 2010-09-22 07:44:06 +0000
+++ b/sql/abstract_query_plan.h 2010-09-24 07:29:43 +0000
@@ -205,6 +205,8 @@ namespace AQP
void dbug_print() const;
+ bool is_sorted() const;
+
private:
/** Backref. to the Join_plan which this Table_access is part of */
=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc 2010-09-23 11:47:46 +0000
+++ b/sql/ha_ndbcluster.cc 2010-09-24 07:29:43 +0000
@@ -1274,6 +1274,13 @@ ha_ndbcluster::make_pushed_join(AQP::Joi
DBUG_PRINT("info", ("Table %d not a pushable access type", join_cnt));
continue;
}
+ if (join_root->is_sorted() && !is_lookup_operation(child_type))
+ {
+ // Sorted scan cannot have scan descendant.
+ DBUG_PRINT("info", ("Table %d is scanned. Therefore it cannot be pushed"
+ " as a descendant of a sorted scan", join_cnt));
+ continue;
+ }
if (!context.field_ref_is_join_pushable(join_tab, join_items, join_parent))
{
DBUG_PRINT("info", ("Table %d not REF-joined, not pushable", join_cnt));
@@ -1596,82 +1603,119 @@ private:
*
* @param type This is the operation type that the server want to execute.
* @param idx Index used whenever relevant for operation type
+ * @param rootSorted True if the root operation is an ordered index scan
+ * with sorted results.
* @return True if the operation may be pushed.
*/
bool
-ha_ndbcluster::check_if_pushable(const NdbQueryOperationTypeWrapper& type, uint idx) const
+ha_ndbcluster::check_if_pushable(const NdbQueryOperationTypeWrapper& type,
+ uint idx,
+ bool rootSorted) const
{
- bool pushable= FALSE;
+ if (m_pushed_join == NULL)
+ {
+ return FALSE;
+ }
+
+ const NdbQueryOperationDef* const root_operation=
+ m_pushed_join->get_query_def().getQueryOperation(0U);
+
+ const NdbQueryOperationTypeWrapper& query_def_type=
+ root_operation->getType();
+
+ if (query_def_type != type)
+ {
+ DBUG_PRINT("info",
+ ("Cannot execute push join. Root operation prepared as %s "
+ "not executable as %s w/ index %d",
+ NdbQueryOperationDef::getTypeName(query_def_type),
+ NdbQueryOperationDef::getTypeName(type), idx));
+ return FALSE;
+ }
- if (m_pushed_join != NULL)
+ if (m_disable_pushed_join)
{
- const NdbQueryOperationDef* const root_operation=
- m_pushed_join->get_query_def().getQueryOperation(0U);
+ DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
+ return FALSE;
+ }
+
+ const NdbDictionary::Index* const expected_index= root_operation->getIndex();
- const NdbQueryOperationTypeWrapper& query_def_type= root_operation->getType();
+ // Check that we still use the same index as when the query was prepared.
+ switch (type)
+ {
+ case NdbQueryOperationDef::PrimaryKeyAccess:
+ DBUG_ASSERT(idx==table->s->primary_key);
+ break;
- if (query_def_type == type)
+ case NdbQueryOperationDef::UniqueIndexAccess:
+ DBUG_ASSERT(idx<MAX_KEY);
+ // DBUG_ASSERT(m_index[idx].unique_index == expected_index);
+ if (m_index[idx].unique_index != expected_index)
{
- if (m_disable_pushed_join)
- {
- DBUG_PRINT("info", ("Push disabled (HA_EXTRA_KEYREAD)"));
- }
- else
- {
- const NdbDictionary::Index* const expected_index= root_operation->getIndex();
- pushable= TRUE;
+ DBUG_PRINT("info", ("Actual index %s differs from expected index %s."
+ "Therefore, join cannot be pushed.",
+ m_index[idx].unique_index->getName(),
+ expected_index->getName()));
+ return FALSE;
+ }
+ break;
+
+ case NdbQueryOperationDef::TableScan:
+ DBUG_ASSERT (idx==MAX_KEY);
+ break;
- switch (type)
+ case NdbQueryOperationDef::OrderedIndexScan:
+ DBUG_ASSERT(idx<MAX_KEY);
+ // DBUG_ASSERT(m_index[idx].index == expected_index);
+ if (m_index[idx].index != expected_index)
+ {
+ DBUG_PRINT("info", ("Actual index %s differs from expected index %s."
+ "Therefore, join cannot be pushed.",
+ m_index[idx].index->getName(),
+ expected_index->getName()));
+ return FALSE;
+ }
+ // Check that we do not have a sorted scan with sub scans.
+ if (rootSorted)
+ {
+ const NdbQueryDef& query_def = m_pushed_join->get_query_def();
+ for (uint i= 1; i < query_def.getNoOfOperations(); i++)
+ {
+ const NdbQueryOperationTypeWrapper& child_type=
+ query_def.getQueryOperation(i)->getType();
+ if (child_type == NdbQueryOperationDef::TableScan ||
+ child_type == NdbQueryOperationDef::OrderedIndexScan)
{
- case NdbQueryOperationDef::PrimaryKeyAccess:
- DBUG_ASSERT(idx==table->s->primary_key);
- break;
- case NdbQueryOperationDef::UniqueIndexAccess:
- DBUG_ASSERT(idx<MAX_KEY);
-// DBUG_ASSERT(m_index[idx].unique_index == expected_index);
- pushable= (m_index[idx].unique_index == expected_index);
- break;
- case NdbQueryOperationDef::TableScan:
- DBUG_ASSERT (idx==MAX_KEY);
- break;
- case NdbQueryOperationDef::OrderedIndexScan:
- DBUG_ASSERT(idx<MAX_KEY);
-// DBUG_ASSERT(m_index[idx].index == expected_index);
- pushable= (m_index[idx].index == expected_index);
- break;
- default:
- DBUG_ASSERT(false);
- break;
- }
-
- if (likely(pushable))
- {
- // There may be referrences to Field values from tables outside the scope of
- // our pushed join which are supplied as paramValues().
- // If any of these are NULL values, join can't be pushed
- for (uint i= 0; i < m_pushed_join->get_field_referrences_count(); i++)
- {
- Field* field= m_pushed_join->get_field_ref(i);
- if (field->is_real_null())
- {
- DBUG_PRINT("info", ("paramValue is NULL, can not execute as pushed join"));
- pushable= false;
- break;
- }
- }
+ DBUG_PRINT("info", ("If the root operation is a sorted scan, then "
+ "there may not be scan children. This join "
+ "cannot be pushed."));
+ return FALSE;
}
}
}
- else
+ break;
+
+ default:
+ DBUG_ASSERT(false);
+ break;
+ }
+
+ // There may be referrences to Field values from tables outside the scope of
+ // our pushed join which are supplied as paramValues().
+ // If any of these are NULL values, join can't be pushed
+ for (uint i= 0; i < m_pushed_join->get_field_referrences_count(); i++)
+ {
+ Field* field= m_pushed_join->get_field_ref(i);
+ if (field->is_real_null())
{
DBUG_PRINT("info",
- ("Cannot execute push join. Root operation prepared as %s "
- "not executable as %s w/ index %d",
- NdbQueryOperationDef::getTypeName(query_def_type),
- NdbQueryOperationDef::getTypeName(type), idx));
+ ("paramValue is NULL, can not execute as pushed join"));
+ return FALSE;
}
}
- return pushable;
+
+ return TRUE;
}
@@ -5112,7 +5156,8 @@ int ha_ndbcluster::ordered_index_scan(co
pbound = &bound;
}
- if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index))
+ if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index,
+ sorted))
{
NdbQueryParamValue paramValues[ndb_pushed_join::MAX_REFERRED_FIELDS];
const int error= create_pushed_join(paramValues);
@@ -13686,7 +13731,9 @@ ha_ndbcluster::read_multi_range_first(KE
}
/* Create the scan operation for the first scan range. */
- if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index))
+ if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
+ active_index,
+ !m_active_query && sorted))
{
if (!m_active_query)
{
=== modified file 'sql/ha_ndbcluster.h'
--- a/sql/ha_ndbcluster.h 2010-09-21 06:33:27 +0000
+++ b/sql/ha_ndbcluster.h 2010-09-24 07:29:43 +0000
@@ -690,7 +690,8 @@ private:
bool check_index_fields_not_null(KEY *key_info);
bool check_if_pushable(const NdbQueryOperationTypeWrapper& type,
- uint idx= MAX_KEY) const;
+ uint idx= MAX_KEY,
+ bool rootSorted= false) const;
bool check_is_pushed() const;
int create_pushed_join(NdbQueryParamValue* paramValues, uint paramOffs= 0);
=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2010-09-03 13:04:45 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2010-09-24 07:29:43 +0000
@@ -980,6 +980,25 @@ NdbQueryBuilderImpl::contains(const NdbQ
const NdbQueryDefImpl*
NdbQueryBuilderImpl::prepare()
{
+ /* Check if query is sorted and has multiple scan operations. This
+ * combination is not implemented.
+ */
+ if (m_operations.size() > 0 &&
+ m_operations[0]->isScanOperation() &&
+ m_operations[0]->getOrdering()
+ != NdbQueryOptions::ScanOrdering_unordered &&
+ m_operations[0]->getOrdering() != NdbQueryOptions::ScanOrdering_void)
+ {
+ for (Uint32 i = 1; i<m_operations.size(); i++)
+ {
+ if (m_operations[i]->isScanOperation())
+ {
+ setErrorCode(QRY_MULTIPLE_SCAN_SORTED);
+ return NULL;
+ }
+ }
+ }
+
int error;
NdbQueryDefImpl* def = new NdbQueryDefImpl(m_operations, m_operands, error);
m_operations.clear();
@@ -2186,9 +2205,6 @@ NdbQueryIndexScanOperationDefImpl::appen
appendedPattern |= DABits::NI_KEY_CONSTS;
const NdbConstOperandImpl& constOp = *static_cast<const NdbConstOperandImpl*>(value);
- // No of words needed for storing the constant data.
- const Uint32 wordCount = AttributeHeader::getDataSize(constOp.getSizeInBytes());
-
// Build the AttributeHeader for const value
// (AttributeId is later filled in by SPJ in Dbspj::scanIndex_fixupBound())
AttributeHeader ah(0, constOp.getSizeInBytes());
=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp 2010-09-03 13:04:45 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilderImpl.hpp 2010-09-24 07:29:43 +0000
@@ -45,6 +45,7 @@
#define QRY_PARAMETER_HAS_WRONG_TYPE 4821
#define QRY_CHAR_PARAMETER_TRUNCATED 4822
#define QRY_MULTIPLE_SCAN_BRANCHES 4823
+#define QRY_MULTIPLE_SCAN_SORTED 4824
#ifdef __cplusplus
#include <Vector.hpp>
=== modified file 'storage/ndb/src/ndbapi/NdbQueryOperation.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2010-09-23 09:17:21 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2010-09-24 07:29:43 +0000
@@ -4505,6 +4505,22 @@ NdbQueryOperationImpl::setOrdering(NdbQu
return -1;
}
+ /* Check if query is sorted and has multiple scan operations. This
+ * combination is not implemented.
+ */
+ if (ordering != NdbQueryOptions::ScanOrdering_unordered)
+ {
+ for (Uint32 i = 1; i < getQuery().getNoOfOperations(); i++)
+ {
+ if (getQuery().getQueryOperation(i).getQueryOperationDef()
+ .isScanOperation())
+ {
+ getQuery().setErrorCode(QRY_MULTIPLE_SCAN_SORTED);
+ return -1;
+ }
+ }
+ }
+
m_ordering = ordering;
return 0;
} // NdbQueryOperationImpl::setOrdering()
=== modified file 'storage/ndb/src/ndbapi/ndberror.c'
--- a/storage/ndb/src/ndbapi/ndberror.c 2010-08-17 07:32:36 +0000
+++ b/storage/ndb/src/ndbapi/ndberror.c 2010-09-24 07:29:43 +0000
@@ -799,6 +799,8 @@ ErrorBundle ErrorCodes[] = {
"Character Parameter was right truncated" },
{ QRY_MULTIPLE_SCAN_BRANCHES, DMEC, AE,
"Query has scans that are not descendants/ancestors of each other." },
+ { QRY_MULTIPLE_SCAN_SORTED, DMEC, AE,
+ "Query with multiple scans may not be sorted." },
{ NO_CONTACT_WITH_PROCESS, DMEC, AE,
"No contact with the process (dead ?)."},
Attachment: [text/bzr-bundle] bzr/jan.wedvik@sun.com-20100924072943-kl3vv7gpivgwvjud.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-telco-7.0-spj-scan-vs-scan branch(jan.wedvik:3292) | Jan Wedvik | 24 Sep |