3869 Jonas Oreland 2012-03-28 [merge]
ndb - merge 72-spj into 73
modified:
.bzr-mysql/default.conf
mysql-test/suite/ndb/r/ndb_join_pushdown_default.result
mysql-test/suite/ndb/t/ndb_join_pushdown.inc
sql/abstract_query_plan.cc
sql/abstract_query_plan.h
sql/ha_ndbcluster.cc
sql/ha_ndbcluster.h
sql/ha_ndbcluster_push.cc
sql/ha_ndbcluster_push.h
sql/handler.h
sql/sql_select.cc
storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp
storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp
storage/ndb/src/ndbapi/NdbQueryBuilder.cpp
storage/ndb/src/ndbapi/NdbQueryOperation.cpp
storage/ndb/test/ndbapi/testSpj.cpp
storage/ndb/test/tools/spj_sanity_test.cpp
3868 Martin Zaun 2012-03-26 [merge]
Bug#54854 - merge
=== modified file '.bzr-mysql/default.conf'
--- a/.bzr-mysql/default.conf 2012-02-23 15:41:31 +0000
+++ b/.bzr-mysql/default.conf 2012-03-28 10:41:26 +0000
@@ -1,4 +1,4 @@
[MYSQL]
post_commit_to = "commits@stripped"
post_push_to = "commits@stripped"
-tree_name = "mysql-5.5-cluster-7.2"
+tree_name = "mysql-5.5-cluster-7.3"
=== modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown_default.result'
--- a/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-02-23 15:41:31 +0000
+++ b/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-03-22 14:18:01 +0000
@@ -2099,6 +2099,77 @@ count(*)
20000
drop table t1;
drop table tx;
+create table t1 (
+a int not null,
+b int not null,
+c int not null,
+d int not null,
+primary key (`a`,`b`)
+) engine=ndbcluster partition by key(a);
+insert into t1 values
+(1,1,1,1), (2,2,2,2), (3,3,3,3), (4,4,4,4),
+(1,2,5,1), (1,3,1,2), (1,4,2,3),
+(2,1,3,4), (2,3,4,5), (2,4,5,1),
+(3,1,1,2), (3,2,2,3), (3,4,3,4),
+(4,1,4,5), (4,2,5,1), (4,3,1,2);
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00 Parent of 2 pushed join@1
+1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00 Child of 't1' in pushed join@1
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`))
+set new=on;
+alter table t1 partition by hash(a);
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00
+1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00
+Warnings:
+Note 9999 Table 't1' is not pushable: has user defined partioning
+Note 9999 Table 't2' is not pushable: has user defined partioning
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`))
+alter table t1 partition by list(a) (
+partition p1 values in (1),
+partition p2 values in (2),
+partition p3 values in (3),
+partition p4 values in (4)
+);
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00
+1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00
+Warnings:
+Note 9999 Table 't1' is not pushable: has user defined partioning
+Note 9999 Table 't2' is not pushable: has user defined partioning
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`))
+alter table t1 partition by range(a) partitions 4 (
+partition p1 values less than (0),
+partition p2 values less than (2),
+partition p3 values less than (4),
+partition p4 values less than (99999)
+);
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00
+1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00
+Warnings:
+Note 9999 Table 't1' is not pushable: has user defined partioning
+Note 9999 Table 't2' is not pushable: has user defined partioning
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`))
+drop table t1;
+set new=default;
create table t1 (a int, b int, primary key(a) using hash) engine = ndb;
insert into t1 values (1, 2);
insert into t1 values (2, 3);
@@ -5071,7 +5142,7 @@ join t1 as x2 on x1.a=x2.b
join t1 as x3 on x2.a=x3.b
order by x1.pk limit 70;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE x1 index NULL PRIMARY 4 NULL 10 100.00 Parent of 3 pushed join@1; Using temporary; Using filesort
+1 SIMPLE x1 index NULL PRIMARY 4 NULL 10 100.00 Parent of 3 pushed join@1
1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 2 100.00 Child of 'x1' in pushed join@1; Using where
1 SIMPLE x3 ref ix1 ix1 5 test.x2.a 2 100.00 Child of 'x2' in pushed join@1; Using where
Warnings:
@@ -5216,10 +5287,9 @@ on t1.pk2 = t2.pk1
where t1.pk1 != 6
order by t1.pk1 DESC;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 6 66.67 Using where with pushed condition: (`test`.`t1`.`pk1` <> 6)
-1 SIMPLE t2 ref PRIMARY PRIMARY 4 test.t1.pk2 1 100.00
+1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 6 66.67 Parent of 2 pushed join@1; Using where with pushed condition: (`test`.`t1`.`pk1` <> 6)
+1 SIMPLE t2 ref PRIMARY PRIMARY 4 test.t1.pk2 1 100.00 Child of 't1' in pushed join@1
Warnings:
-Note 9999 Push of table 't2' as scan-child with ordered indexscan-root 't1' not implemented
Note 1003 select `test`.`t1`.`pk1` AS `pk1`,`test`.`t1`.`pk2` AS `pk2`,`test`.`t2`.`pk1` AS `pk1`,`test`.`t2`.`pk2` AS `pk2` from `test`.`t` `t1` join `test`.`t` `t2` where ((`test`.`t2`.`pk1` = `test`.`t1`.`pk2`) and (`test`.`t1`.`pk1` <> 6)) order by `test`.`t1`.`pk1` desc
select * from t as t1 join t as t2
on t1.pk2 = t2.pk1
@@ -5548,7 +5618,7 @@ counter_name spj_counts_at_end.val - spj
CONST_PRUNED_RANGE_SCANS_RECEIVED 8
LOCAL_TABLE_SCANS_SENT 250
PRUNED_RANGE_SCANS_RECEIVED 27
-RANGE_SCANS_RECEIVED 732
+RANGE_SCANS_RECEIVED 736
READS_RECEIVED 58
TABLE_SCANS_RECEIVED 250
drop table spj_counts_at_startup;
@@ -5560,9 +5630,9 @@ where new.variable_name = old.variable_n
order by new.variable_name;
variable_name new.variable_value - old.variable_value
NDB_PRUNED_SCAN_COUNT 8
-NDB_PUSHED_QUERIES_DEFINED 405
+NDB_PUSHED_QUERIES_DEFINED 408
NDB_PUSHED_QUERIES_DROPPED 8
-NDB_PUSHED_QUERIES_EXECUTED 550
-NDB_SORTED_SCAN_COUNT 10
+NDB_PUSHED_QUERIES_EXECUTED 552
+NDB_SORTED_SCAN_COUNT 11
drop table server_counts_at_startup;
set ndb_join_pushdown = @save_ndb_join_pushdown;
=== modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.inc'
--- a/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-02-23 15:41:31 +0000
+++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-03-20 09:42:12 +0000
@@ -1055,6 +1055,76 @@ connection ddl;
drop table t1;
drop table tx;
+# Test user defined partition not being pushed
+#
+# Note: User defined partitions are handled
+# by the SQL layer, and as such are unknown
+# to the NDB datanodes.
+#
+
+connection spj;
+create table t1 (
+ a int not null,
+ b int not null,
+ c int not null,
+ d int not null,
+ primary key (`a`,`b`)
+) engine=ndbcluster partition by key(a);
+
+connection spj;
+insert into t1 values
+(1,1,1,1), (2,2,2,2), (3,3,3,3), (4,4,4,4),
+(1,2,5,1), (1,3,1,2), (1,4,2,3),
+(2,1,3,4), (2,3,4,5), (2,4,5,1),
+(3,1,1,2), (3,2,2,3), (3,4,3,4),
+(4,1,4,5), (4,2,5,1), (4,3,1,2);
+
+# Only this query('partition by key') should be pushed
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+
+connection ddl;
+set new=on;
+alter table t1 partition by hash(a);
+
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+
+connection ddl;
+alter table t1 partition by list(a) (
+ partition p1 values in (1),
+ partition p2 values in (2),
+ partition p3 values in (3),
+ partition p4 values in (4)
+);
+
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+
+connection ddl;
+alter table t1 partition by range(a) partitions 4 (
+ partition p1 values less than (0),
+ partition p2 values less than (2),
+ partition p3 values less than (4),
+ partition p4 values less than (99999)
+);
+
+explain extended
+select *
+from t1
+join t1 as t2 on t2.a = t1.b and t2.b = t1.c;
+
+connection ddl;
+drop table t1;
+set new=default;
+
+
# pushed mrr does not yet handle multiple PK operations in same transaction
# Need 6.0 result handling stuff to simplify result handling
# *** join push is currently dissabled for these ****
=== modified file 'sql/abstract_query_plan.cc'
--- a/sql/abstract_query_plan.cc 2011-10-20 19:52:11 +0000
+++ b/sql/abstract_query_plan.cc 2012-03-22 14:18:01 +0000
@@ -63,6 +63,10 @@ namespace AQP
return m_join_tabs + join_tab_no;
}
+ /**
+ * Check if either a GROUP BY or ORDER BY could be
+ * executed without sorting by reading an ordered index.
+ */
void
Join_plan::find_skippabable_group_or_order() const
{
@@ -546,28 +550,6 @@ namespace AQP
{}
/**
- @return True iff ordered index access is *required* from this operation.
- */
- bool Table_access::is_fixed_ordered_index() const
- {
- const JOIN_TAB* const join_tab= get_join_tab();
-
- /* For the QUICK_SELECT_I classes we can disable ordered index usage by
- * setting 'QUICK_SELECT_I::sorted = false'.
- * However, QUICK_SELECT_I::QS_TYPE_RANGE_DESC is special as its
- * internal implementation requires its 'multi-ranges' to be retrieved
- * in (descending) sorted order from the underlying table.
- */
- if (join_tab->select != NULL &&
- join_tab->select->quick != NULL)
- {
- QUICK_SELECT_I *quick= join_tab->select->quick;
- return (quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE_DESC);
- }
- return false;
- }
-
- /**
Check if the results from this operation will joined with results
from the next operation using a join buffer (instead of plain nested loop).
@return True if using a join buffer.
=== modified file 'sql/abstract_query_plan.h'
--- a/sql/abstract_query_plan.h 2011-10-20 19:52:11 +0000
+++ b/sql/abstract_query_plan.h 2012-03-22 14:18:01 +0000
@@ -210,8 +210,6 @@ namespace AQP
void dbug_print() const;
- bool is_fixed_ordered_index() const;
-
bool uses_join_cache() const;
private:
=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc 2012-03-27 04:18:59 +0000
+++ b/sql/ha_ndbcluster.cc 2012-03-28 10:41:26 +0000
@@ -4023,8 +4023,7 @@ int ha_ndbcluster::ordered_index_scan(co
}
#ifndef NDB_WITHOUT_JOIN_PUSHDOWN
- if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index,
- sorted))
+ if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index))
{
const int error= create_pushed_join();
if (unlikely(error))
@@ -13919,8 +13918,7 @@ ha_ndbcluster::read_multi_range_first(KE
#ifndef NDB_WITHOUT_JOIN_PUSHDOWN
/* Create the scan operation for the first scan range. */
if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan,
- active_index,
- !m_active_query && sorted))
+ active_index))
{
if (!m_active_query)
{
@@ -14578,8 +14576,7 @@ ha_ndbcluster::maybe_pushable_join(const
#ifndef NDB_WITHOUT_JOIN_PUSHDOWN
bool
ha_ndbcluster::check_if_pushable(int type, //NdbQueryOperationDef::Type,
- uint idx,
- bool needSorted) const
+ uint idx) const
{
if (m_disable_pushed_join)
{
@@ -14590,8 +14587,7 @@ ha_ndbcluster::check_if_pushable(int typ
&& m_pushed_join_member != NULL
&& m_pushed_join_member->match_definition(
type,
- (idx<MAX_KEY) ? &m_index[idx] : NULL,
- needSorted);
+ (idx<MAX_KEY) ? &m_index[idx] : NULL);
}
int
@@ -14728,46 +14724,6 @@ ha_ndbcluster::test_push_flag(enum ha_pu
}
DBUG_RETURN(false);
- case HA_PUSH_NO_ORDERED_INDEX:
- {
- if (m_pushed_join_operation != PUSHED_ROOT)
- {
- DBUG_RETURN(true);
- }
- const NdbQueryDef& query_def = m_pushed_join_member->get_query_def();
- const NdbQueryOperationDef::Type root_type=
- query_def.getQueryOperation((uint)PUSHED_ROOT)->getType();
-
- /**
- * Primary key/ unique key lookup is always 'ordered' wrt. itself.
- */
- if (root_type == NdbQueryOperationDef::PrimaryKeyAccess ||
- root_type == NdbQueryOperationDef::UniqueIndexAccess)
- {
- DBUG_RETURN(false);
- }
-
- /**
- * Ordered index scan can be provided as an ordered resultset iff
- * it has no child scans.
- */
- if (root_type == NdbQueryOperationDef::OrderedIndexScan)
- {
- for (uint i= 1; i < query_def.getNoOfOperations(); i++)
- {
- const NdbQueryOperationDef::Type child_type=
- query_def.getQueryOperation(i)->getType();
- if (child_type == NdbQueryOperationDef::TableScan ||
- child_type == NdbQueryOperationDef::OrderedIndexScan)
- {
- DBUG_RETURN(true);
- }
- }
- DBUG_RETURN(false);
- }
- DBUG_RETURN(true);
- }
-
default:
DBUG_ASSERT(0);
DBUG_RETURN(false);
=== modified file 'sql/ha_ndbcluster.h'
--- a/sql/ha_ndbcluster.h 2012-02-23 15:41:31 +0000
+++ b/sql/ha_ndbcluster.h 2012-03-22 14:18:01 +0000
@@ -445,8 +445,7 @@ private:
bool check_index_fields_not_null(KEY *key_info) const;
bool check_if_pushable(int type, //NdbQueryOperationDef::Type,
- uint idx= MAX_KEY,
- bool rootSorted= false) const;
+ uint idx= MAX_KEY) const;
bool check_is_pushed() const;
int create_pushed_join(const NdbQueryParamValue* keyFieldParams=NULL,
uint paramCnt= 0);
=== modified file 'sql/ha_ndbcluster_push.cc'
--- a/sql/ha_ndbcluster_push.cc 2011-09-30 11:05:03 +0000
+++ b/sql/ha_ndbcluster_push.cc 2012-03-22 14:18:01 +0000
@@ -153,8 +153,7 @@ ndb_pushed_join::~ndb_pushed_join()
bool ndb_pushed_join::match_definition(
int type, //NdbQueryOperationDef::Type,
- const NDB_INDEX_DATA* idx,
- bool needSorted) const
+ const NDB_INDEX_DATA* idx) const
{
const NdbQueryOperationDef* const root_operation=
m_query_def->getQueryOperation((uint)0);
@@ -195,13 +194,6 @@ bool ndb_pushed_join::match_definition(
case NdbQueryOperationDef::TableScan:
DBUG_ASSERT (idx==NULL && expected_index==NULL);
- if (needSorted)
- {
- DBUG_PRINT("info",
- ("TableScan access can not be provied as sorted result. "
- "Therefore, join cannot be pushed."));
- return FALSE;
- }
break;
case NdbQueryOperationDef::OrderedIndexScan:
@@ -215,13 +207,6 @@ bool ndb_pushed_join::match_definition(
expected_index->getName()));
return FALSE;
}
- if (needSorted && m_query_def->getQueryType() == NdbQueryDef::MultiScanQuery)
- {
- DBUG_PRINT("info",
- ("OrderedIndexScan with scan siblings "
- "can not execute as pushed join."));
- return FALSE;
- }
break;
default:
@@ -622,16 +607,6 @@ ndb_pushed_builder_ctx::is_pushable_as_c
DBUG_RETURN(false);
}
- if (access_type==AQP::AT_ORDERED_INDEX_SCAN && m_join_root->is_fixed_ordered_index())
- {
- // root must be an ordered index scan - Thus it cannot have other scan descendant.
- EXPLAIN_NO_PUSH("Push of table '%s' as scan-child "
- "with ordered indexscan-root '%s' not implemented",
- table->get_table()->alias,
- m_join_root->get_table()->alias);
- DBUG_RETURN(false);
- }
-
if (table->get_no_of_key_fields() > ndb_pushed_join::MAX_LINKED_KEYS)
{
EXPLAIN_NO_PUSH("Can't push table '%s' as child, "
=== modified file 'sql/ha_ndbcluster_push.h'
--- a/sql/ha_ndbcluster_push.h 2011-09-30 11:05:03 +0000
+++ b/sql/ha_ndbcluster_push.h 2012-03-22 14:18:01 +0000
@@ -98,8 +98,7 @@ public:
* of operation specified by the arguments.
*/
bool match_definition(int type, //NdbQueryOperationDef::Type,
- const NDB_INDEX_DATA* idx,
- bool needSorted) const;
+ const NDB_INDEX_DATA* idx) const;
/** Create an executable instance of this defined query. */
NdbQuery* make_query_instance(
=== modified file 'sql/handler.h'
--- a/sql/handler.h 2012-03-21 17:28:20 +0000
+++ b/sql/handler.h 2012-03-23 07:44:14 +0000
@@ -501,11 +501,6 @@ enum ha_push_flag {
within this pushed join
*/
,HA_PUSH_MULTIPLE_DEPENDENCY
-
- /* Handler is unable to return the result in sorted order using an
- ordered index on the parent operation.
- */
- ,HA_PUSH_NO_ORDERED_INDEX
};
#endif
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2012-02-23 15:41:31 +0000
+++ b/sql/sql_select.cc 2012-03-22 14:18:01 +0000
@@ -1780,16 +1780,12 @@ make_pushed_join(THD *thd, JOIN *join)
if (join->const_tables < join->tables &&
join->join_tab[join->const_tables].table->file->number_of_pushed_joins() > 0)
{
- const handler *ha=join->join_tab[join->const_tables].table->file;
-
- if (join->group_list && join->simple_group &&
- (!plan.group_by_filesort_is_skippable() || ha->test_push_flag(HA_PUSH_NO_ORDERED_INDEX)))
+ if (join->group_list && join->simple_group && !plan.group_by_filesort_is_skippable())
{
join->need_tmp= 1;
join->simple_order= join->simple_group= 0;
}
- else if (join->order && join->simple_order &&
- (!plan.order_by_filesort_is_skippable() || ha->test_push_flag(HA_PUSH_NO_ORDERED_INDEX)))
+ else if (join->order && join->simple_order && !plan.order_by_filesort_is_skippable())
{
join->need_tmp= 1;
join->simple_order= join->simple_group= 0;
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2011-11-16 08:17:17 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2012-03-20 09:42:12 +0000
@@ -1232,13 +1232,13 @@ private:
void scanIndex_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
void scanIndex_parent_row(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowPtr&);
void scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr, Uint32 ptrI, Uint32);
- void scanIndex_send(Signal* signal,
- Ptr<Request> requestPtr,
- Ptr<TreeNode> treeNodePtr,
- Uint32 noOfFrags,
- Uint32 bs_bytes,
- Uint32 bs_rows,
- Uint32& batchRange);
+ Uint32 scanIndex_send(Signal* signal,
+ Ptr<Request> requestPtr,
+ Ptr<TreeNode> treeNodePtr,
+ Uint32 noOfFrags,
+ Uint32 bs_bytes,
+ Uint32 bs_rows,
+ Uint32& batchRange);
void scanIndex_batchComplete(Signal* signal);
Uint32 scanIndex_findFrag(Local_ScanFragHandle_list &, Ptr<ScanFragHandle>&,
Uint32 fragId);
=== modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp'
--- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-03-09 15:37:45 +0000
+++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-03-20 09:42:12 +0000
@@ -3059,145 +3059,192 @@ Dbspj::lookup_send(Signal* signal,
Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
- if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
+ Uint32 err = 0;
+
+ do
{
- jam();
+ if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
+ {
+ jam();
+ /**
+ * Pass sections to send
+ */
+ treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+ treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+ }
+ else
+ {
+ if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
+ {
+ jam();
+ Uint32 tmp = RNIL;
+ if (!dupSection(tmp, keyInfoPtrI))
+ {
+ jam();
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
+
+ keyInfoPtrI = tmp;
+ }
+ else
+ {
+ jam();
+ treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+ }
+
+ if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
+ {
+ jam();
+ Uint32 tmp = RNIL;
+
+ /**
+ * Test execution terminated due to 'OutOfSectionMemory' which
+ * may happen for different treeNodes in the request:
+ * - 17070: Fail on any lookup_send()
+ * - 17071: Fail on lookup_send() if 'isLeaf'
+ * - 17072: Fail on lookup_send() if treeNode not root
+ */
+
+ if (ERROR_INSERTED_CLEAR(17070) ||
+ (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17071)) ||
+ (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17072)))
+ {
+ jam();
+ ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
+ __LINE__, __FILE__);
+ if (keyInfoPtrI != RNIL)
+ releaseSection(keyInfoPtrI);
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
+
+ if (!dupSection(tmp, attrInfoPtrI))
+ {
+ jam();
+ if (keyInfoPtrI != RNIL)
+ releaseSection(keyInfoPtrI);
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
+
+ attrInfoPtrI = tmp;
+ }
+ else
+ {
+ jam();
+ treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+ }
+ }
+
+ getSection(handle.m_ptr[0], keyInfoPtrI);
+ getSection(handle.m_ptr[1], attrInfoPtrI);
+ handle.m_cnt = 2;
+
/**
- * Pass sections to send
+ * Inject error to test LQHKEYREF handling:
+ * Tampering with tableSchemaVersion such that LQH will
+ * return LQHKEYREF('1227: Invalid schema version')
+ * May happen for different treeNodes in the request:
+ * - 17030: Fail on any lookup_send()
+ * - 17031: Fail on lookup_send() if 'isLeaf'
+ * - 17032: Fail on lookup_send() if treeNode not root
*/
- treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
- treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
- }
- else
- {
- if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
+ if (ERROR_INSERTED_CLEAR(17030) ||
+ (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17031)) ||
+ (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17032)))
{
jam();
- Uint32 tmp = RNIL;
- ndbrequire(dupSection(tmp, keyInfoPtrI)); // TODO handle error
- keyInfoPtrI = tmp;
+ req->tableSchemaVersion += (1 << 16); // Provoke 'Invalid schema version'
+ }
+
+#if defined DEBUG_LQHKEYREQ
+ ndbout_c("LQHKEYREQ to %x", ref);
+ printLQHKEYREQ(stdout, signal->getDataPtrSend(),
+ NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
+ DBLQH);
+ printf("KEYINFO: ");
+ print(handle.m_ptr[0], stdout);
+ printf("ATTRINFO: ");
+ print(handle.m_ptr[1], stdout);
+#endif
+
+ Uint32 Tnode = refToNode(ref);
+ if (Tnode == getOwnNodeId())
+ {
+ c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1);
}
else
{
- jam();
- treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
+ c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1);
}
- if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
+ /**
+ * Test execution terminated due to 'NodeFailure' which
+ * may happen for different treeNodes in the request:
+ * - 17020: Fail on any lookup_send()
+ * - 17021: Fail on lookup_send() if 'isLeaf'
+ * - 17022: Fail on lookup_send() if treeNode not root
+ */
+ if (ERROR_INSERTED_CLEAR(17020) ||
+ (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17021)) ||
+ (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17022)))
{
jam();
- Uint32 tmp = RNIL;
- ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
- attrInfoPtrI = tmp;
+ releaseSections(handle);
+ err = DbspjErr::NodeFailure;
+ break;
}
- else
+ if (unlikely(!c_alive_nodes.get(Tnode)))
{
jam();
- treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
+ releaseSections(handle);
+ err = DbspjErr::NodeFailure;
+ break;
+ }
+ else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup()))
+ {
+ jam();
+ ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data));
+ requestPtr.p->m_outstanding += cnt;
+ requestPtr.p->m_lookup_node_data[Tnode] += cnt;
+ // number wrapped
+ ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0));
}
- }
-
- getSection(handle.m_ptr[0], keyInfoPtrI);
- getSection(handle.m_ptr[1], attrInfoPtrI);
- handle.m_cnt = 2;
-
- /**
- * Inject error to test LQHKEYREF handling:
- * Tampering with tableSchemaVersion such that LQH will
- * return LQHKEYREF('1227: Invalid schema version')
- * May happen for different treeNodes in the request:
- * - 17030: Fail on any lookup_send()
- * - 17031: Fail on lookup_send() if 'isLeaf'
- * - 17032: Fail on lookup_send() if treeNode not root
- */
- if (ERROR_INSERTED_CLEAR(17030) ||
- (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17031)) ||
- (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17032)))
- {
- jam();
- req->tableSchemaVersion += (1 << 16); // Provoke 'Invalid schema version'
- }
-#if defined DEBUG_LQHKEYREQ
- ndbout_c("LQHKEYREQ to %x", ref);
- printLQHKEYREQ(stdout, signal->getDataPtrSend(),
- NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
- DBLQH);
- printf("KEYINFO: ");
- print(handle.m_ptr[0], stdout);
- printf("ATTRINFO: ");
- print(handle.m_ptr[1], stdout);
-#endif
+ sendSignal(ref, GSN_LQHKEYREQ, signal,
+ NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
+ JBB, &handle);
- Uint32 Tnode = refToNode(ref);
- if (Tnode == getOwnNodeId())
- {
- c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1);
- }
- else
- {
- c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1);
- }
+ treeNodePtr.p->m_lookup_data.m_outstanding += cnt;
+ if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf())
+ {
+ jam();
+ /**
+ * Send TCKEYCONF with DirtyReadBit + Tnode,
+ * so that API can discover if Tnode while waiting for result
+ */
+ Uint32 resultRef = req->variableData[0];
+ Uint32 resultData = req->variableData[1];
- /**
- * Test execution terminated due to 'NodeFailure' which
- * may happen for different treeNodes in the request:
- * - 17020: Fail on any lookup_send()
- * - 17021: Fail on lookup_send() if 'isLeaf'
- * - 17022: Fail on lookup_send() if treeNode not root
- */
- if (ERROR_INSERTED_CLEAR(17020) ||
- (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17021)) ||
- (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17022)))
- {
- jam();
- releaseSections(handle);
- abort(signal, requestPtr, DbspjErr::NodeFailure);
- return;
- }
- if (unlikely(!c_alive_nodes.get(Tnode)))
- {
- jam();
- releaseSections(handle);
- abort(signal, requestPtr, DbspjErr::NodeFailure);
+ TcKeyConf* conf = (TcKeyConf*)signal->getDataPtrSend();
+ conf->apiConnectPtr = RNIL; // lookup transaction from operations...
+ conf->confInfo = 0;
+ TcKeyConf::setNoOfOperations(conf->confInfo, 1);
+ conf->transId1 = requestPtr.p->m_transId[0];
+ conf->transId2 = requestPtr.p->m_transId[1];
+ conf->operations[0].apiOperationPtr = resultData;
+ conf->operations[0].attrInfoLen = TcKeyConf::DirtyReadBit | Tnode;
+ Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength;
+ sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef);
+ }
return;
}
- else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup()))
- {
- jam();
- ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data));
- requestPtr.p->m_outstanding += cnt;
- requestPtr.p->m_lookup_node_data[Tnode] += cnt;
- // number wrapped
- ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0));
- }
-
- sendSignal(ref, GSN_LQHKEYREQ, signal,
- NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
- JBB, &handle);
-
- treeNodePtr.p->m_lookup_data.m_outstanding += cnt;
- if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf())
- {
- jam();
- /**
- * Send TCKEYCONF with DirtyReadBit + Tnode,
- * so that API can discover if Tnode while waiting for result
- */
- Uint32 resultRef = req->variableData[0];
- Uint32 resultData = req->variableData[1];
+ while (0);
- TcKeyConf* conf = (TcKeyConf*)signal->getDataPtrSend();
- conf->apiConnectPtr = RNIL; // lookup transaction from operations...
- conf->confInfo = 0;
- TcKeyConf::setNoOfOperations(conf->confInfo, 1);
- conf->transId1 = requestPtr.p->m_transId[0];
- conf->transId2 = requestPtr.p->m_transId[1];
- conf->operations[0].apiOperationPtr = resultData;
- conf->operations[0].attrInfoLen = TcKeyConf::DirtyReadBit | Tnode;
- Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength;
- sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef);
- }
+ ndbrequire(err);
+ jam();
+ abort(signal, requestPtr, err);
}
void
@@ -3540,7 +3587,32 @@ Dbspj::lookup_parent_row(Signal* signal,
{
jam();
Uint32 tmp = RNIL;
- ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
+
+ /**
+ * Test execution terminated due to 'OutOfSectionMemory' which
+ * may happen for different treeNodes in the request:
+ * - 17080: Fail on lookup_parent_row
+ * - 17081: Fail on lookup_parent_row: if 'isLeaf'
+ * - 17082: Fail on lookup_parent_row: if treeNode not root
+ */
+
+ if (ERROR_INSERTED_CLEAR(17080) ||
+ (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17081)) ||
+ (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17082)))
+ {
+ jam();
+ ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
+ __LINE__, __FILE__);
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
+
+ if (!dupSection(tmp, attrInfoPtrI))
+ {
+ jam();
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
Uint32 org_size;
{
@@ -4146,9 +4218,6 @@ Dbspj::scanFrag_send(Signal* signal,
{
jam();
- requestPtr.p->m_outstanding++;
- requestPtr.p->m_cnt_active++;
- treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
Ptr<ScanFragHandle> scanFragHandlePtr;
m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
m_scanfrag_data.m_scanFragHandlePtrI);
@@ -4215,6 +4284,10 @@ Dbspj::scanFrag_send(Signal* signal,
NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
JBB, &handle);
+ requestPtr.p->m_outstanding++;
+ requestPtr.p->m_cnt_active++;
+ treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
+
scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_SCANNING;
treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
@@ -5122,19 +5195,17 @@ Dbspj::scanIndex_parent_row(Signal* sign
jam();
Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
- /**
- * Test execution terminated due to 'OutOfSectionMemory':
- * - 17060: Fail on scanIndex_parent_row at first call
- * - 17061: Fail on scanIndex_parent_row if 'isLeaf'
- * - 17062: Fail on scanIndex_parent_row if treeNode not root
- * - 17063: Fail on scanIndex_parent_row at a random node of the query tree
- * -
- */
-
+ /**
+ * Test execution terminated due to 'OutOfSectionMemory':
+ * - 17060: Fail on scanIndex_parent_row at first call
+ * - 17061: Fail on scanIndex_parent_row if 'isLeaf'
+ * - 17062: Fail on scanIndex_parent_row if treeNode not root
+ * - 17063: Fail on scanIndex_parent_row at a random node of the query tree
+ */
if (ERROR_INSERTED_CLEAR(17060) ||
((rand() % 7) == 0 && ERROR_INSERTED_CLEAR(17061)) ||
((treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17062))) ||
- ((treeNodePtr.p->m_parentPtrI != RNIL &&ERROR_INSERTED_CLEAR(17063))))
+ ((treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17063))))
{
ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
__LINE__, __FILE__);
@@ -5338,7 +5409,7 @@ Dbspj::scanIndex_parent_batch_complete(S
data.m_parallelism = static_cast<Uint32>(parallelism);
#ifdef DEBUG_SCAN_FRAGREQ
- DEBUG("::scanIndex_send() starting index scan with parallelism="
+ DEBUG("::scanIndex_parent_batch_complete() starting index scan with parallelism="
<< data.m_parallelism);
#endif
}
@@ -5369,24 +5440,34 @@ Dbspj::scanIndex_parent_batch_complete(S
}
Uint32 batchRange = 0;
- scanIndex_send(signal,
- requestPtr,
- treeNodePtr,
- data.m_parallelism,
- bs_bytes,
- bs_rows,
- batchRange);
-
- data.m_firstExecution = false;
-
- ndbrequire(static_cast<Uint32>(data.m_frags_outstanding +
- data.m_frags_complete) <=
- data.m_fragCount);
+ Uint32 frags_started =
+ scanIndex_send(signal,
+ requestPtr,
+ treeNodePtr,
+ data.m_parallelism,
+ bs_bytes,
+ bs_rows,
+ batchRange);
- data.m_batch_chunks = 1;
- requestPtr.p->m_cnt_active++;
- requestPtr.p->m_outstanding++;
- treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
+ /**
+ * scanIndex_send might fail to send (errors?):
+ * Check that we really did send something before
+ * updating outstanding & active.
+ */
+ if (likely(frags_started > 0))
+ {
+ jam();
+ data.m_firstExecution = false;
+
+ ndbrequire(static_cast<Uint32>(data.m_frags_outstanding +
+ data.m_frags_complete) <=
+ data.m_fragCount);
+
+ data.m_batch_chunks = 1;
+ requestPtr.p->m_cnt_active++;
+ requestPtr.p->m_outstanding++;
+ treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
+ }
}
void
@@ -5418,8 +5499,11 @@ Dbspj::scanIndex_parent_batch_repeat(Sig
/**
* Ask for the first batch for a number of fragments.
+ *
+ * Returns how many fragments we did request the
+ * 'first batch' from. (<= noOfFrags)
*/
-void
+Uint32
Dbspj::scanIndex_send(Signal* signal,
Ptr<Request> requestPtr,
Ptr<TreeNode> treeNodePtr,
@@ -5460,147 +5544,184 @@ Dbspj::scanIndex_send(Signal* signal,
req->batch_size_bytes = bs_bytes;
req->batch_size_rows = bs_rows;
+ Uint32 err = 0;
Uint32 requestsSent = 0;
- Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
- Ptr<ScanFragHandle> fragPtr;
- list.first(fragPtr);
- Uint32 keyInfoPtrI = fragPtr.p->m_rangePtrI;
- ndbrequire(prune || keyInfoPtrI != RNIL);
- /**
- * Iterate over the list of fragments until we have sent as many
- * SCAN_FRAGREQs as we should.
- */
- while (requestsSent < noOfFrags)
{
- jam();
- ndbassert(!fragPtr.isNull());
-
- if (fragPtr.p->m_state != ScanFragHandle::SFH_NOT_STARTED)
- {
- // Skip forward to the frags that we should send.
- jam();
- list.next(fragPtr);
- continue;
- }
-
- const Uint32 ref = fragPtr.p->m_ref;
-
- if (noOfFrags==1 && !prune &&
- data.m_frags_not_started == data.m_fragCount &&
- refToNode(ref) != getOwnNodeId() &&
- list.hasNext(fragPtr))
+ Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
+ Ptr<ScanFragHandle> fragPtr;
+ list.first(fragPtr);
+ Uint32 keyInfoPtrI = fragPtr.p->m_rangePtrI;
+ ndbrequire(prune || keyInfoPtrI != RNIL);
+ /**
+ * Iterate over the list of fragments until we have sent as many
+ * SCAN_FRAGREQs as we should.
+ */
+ while (requestsSent < noOfFrags)
{
- /**
- * If we are doing a scan with adaptive parallelism and start with
- * parallelism=1 then it makes sense to fetch a batch from a fragment on
- * the local data node. The reason for this is that if that fragment
- * contains few rows, we may be able to read from several fragments in
- * parallel. Then we minimize the total number of round trips (to remote
- * data nodes) if we fetch the first fragment batch locally.
- */
jam();
- list.next(fragPtr);
- continue;
- }
+ ndbassert(!fragPtr.isNull());
- SectionHandle handle(this);
+ if (fragPtr.p->m_state != ScanFragHandle::SFH_NOT_STARTED)
+ {
+ // Skip forward to the frags that we should send.
+ jam();
+ list.next(fragPtr);
+ continue;
+ }
- Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
+ const Uint32 ref = fragPtr.p->m_ref;
- /**
- * Set data specific for this fragment
- */
- req->senderData = fragPtr.i;
- req->fragmentNoKeyLen = fragPtr.p->m_fragId;
-
- if (prune)
- {
- jam();
- keyInfoPtrI = fragPtr.p->m_rangePtrI;
- if (keyInfoPtrI == RNIL)
+ if (noOfFrags==1 && !prune &&
+ data.m_frags_not_started == data.m_fragCount &&
+ refToNode(ref) != getOwnNodeId() &&
+ list.hasNext(fragPtr))
{
/**
- * Since we use pruning, we can see that no parent rows would hash
- * to this fragment.
+ * If we are doing a scan with adaptive parallelism and start with
+ * parallelism=1 then it makes sense to fetch a batch from a fragment on
+ * the local data node. The reason for this is that if that fragment
+ * contains few rows, we may be able to read from several fragments in
+ * parallel. Then we minimize the total number of round trips (to remote
+ * data nodes) if we fetch the first fragment batch locally.
*/
jam();
- fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
list.next(fragPtr);
continue;
}
- if (!repeatable)
+ SectionHandle handle(this);
+
+ Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
+
+ /**
+ * Set data specific for this fragment
+ */
+ req->senderData = fragPtr.i;
+ req->fragmentNoKeyLen = fragPtr.p->m_fragId;
+
+ if (prune)
{
- /**
- * If we'll use sendSignal() and we need to send the attrInfo several
- * times, we need to copy them. (For repeatable or unpruned scans
- * we use sendSignalNoRelease(), so then we do not need to copy.)
- */
jam();
- Uint32 tmp = RNIL;
- ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error
- attrInfoPtrI = tmp;
+ keyInfoPtrI = fragPtr.p->m_rangePtrI;
+ if (keyInfoPtrI == RNIL)
+ {
+ /**
+ * Since we use pruning, we can see that no parent rows would hash
+ * to this fragment.
+ */
+ jam();
+ fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
+ list.next(fragPtr);
+ continue;
+ }
+
+ if (!repeatable)
+ {
+ /**
+ * If we'll use sendSignal() and we need to send the attrInfo several
+ * times, we need to copy them. (For repeatable or unpruned scans
+ * we use sendSignalNoRelease(), so then we do not need to copy.)
+ */
+ jam();
+ Uint32 tmp = RNIL;
+
+ /**
+ * Test execution terminated due to 'OutOfSectionMemory' which
+ * may happen for different treeNodes in the request:
+ * - 17090: Fail on any scanIndex_send()
+ * - 17091: Fail after sending SCAN_FRAGREQ to some fragments
+ * - 17092: Fail on scanIndex_send() if 'isLeaf'
+ * - 17093: Fail on scanIndex_send() if treeNode not root
+ */
+
+ if (ERROR_INSERTED_CLEAR(17090) ||
+ (requestsSent > 1 && ERROR_INSERTED_CLEAR(17091)) ||
+ (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17092)) ||
+ (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17093)))
+ {
+ jam();
+ ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
+ __LINE__, __FILE__);
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
+
+ if (!dupSection(tmp, attrInfoPtrI))
+ {
+ jam();
+ err = DbspjErr::OutOfSectionMemory;
+ break;
+ }
+
+ attrInfoPtrI = tmp;
+ }
}
- }
- req->variableData[0] = batchRange;
- getSection(handle.m_ptr[0], attrInfoPtrI);
- getSection(handle.m_ptr[1], keyInfoPtrI);
- handle.m_cnt = 2;
+ req->variableData[0] = batchRange;
+ getSection(handle.m_ptr[0], attrInfoPtrI);
+ getSection(handle.m_ptr[1], keyInfoPtrI);
+ handle.m_cnt = 2;
#if defined DEBUG_SCAN_FRAGREQ
- ndbout_c("SCAN_FRAGREQ to %x", ref);
- printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
- NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
- DBLQH);
- printf("ATTRINFO: ");
- print(handle.m_ptr[0], stdout);
- printf("KEYINFO: ");
- print(handle.m_ptr[1], stdout);
+ ndbout_c("SCAN_FRAGREQ to %x", ref);
+ printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
+ NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
+ DBLQH);
+ printf("ATTRINFO: ");
+ print(handle.m_ptr[0], stdout);
+ printf("KEYINFO: ");
+ print(handle.m_ptr[1], stdout);
#endif
- if (refToNode(ref) == getOwnNodeId())
- {
- c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
- }
- else
- {
- c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1);
- }
+ if (refToNode(ref) == getOwnNodeId())
+ {
+ c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
+ }
+ else
+ {
+ c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1);
+ }
- if (prune && !repeatable)
- {
- /**
- * For a non-repeatable pruned scan, key info is unique for each
- * fragment and therefore cannot be reused, so we release key info
- * right away.
- */
- jam();
- sendSignal(ref, GSN_SCAN_FRAGREQ, signal,
- NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle);
- fragPtr.p->m_rangePtrI = RNIL;
- fragPtr.p->reset_ranges();
- }
- else
- {
- /**
- * Reuse key info for multiple fragments and/or multiple repetitions
- * of the scan.
- */
- jam();
- sendSignalNoRelease(ref, GSN_SCAN_FRAGREQ, signal,
- NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle);
- }
- handle.clear();
+ if (prune && !repeatable)
+ {
+ /**
+ * For a non-repeatable pruned scan, key info is unique for each
+ * fragment and therefore cannot be reused, so we release key info
+ * right away.
+ */
+ jam();
+ sendSignal(ref, GSN_SCAN_FRAGREQ, signal,
+ NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle);
+ fragPtr.p->m_rangePtrI = RNIL;
+ fragPtr.p->reset_ranges();
+ }
+ else
+ {
+ /**
+ * Reuse key info for multiple fragments and/or multiple repetitions
+ * of the scan.
+ */
+ jam();
+ sendSignalNoRelease(ref, GSN_SCAN_FRAGREQ, signal,
+ NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle);
+ }
+ handle.clear();
- fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; // running
- data.m_frags_outstanding++;
- batchRange += bs_rows;
- requestsSent++;
- list.next(fragPtr);
- } // while (requestsSent < noOfFrags)
+ fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; // running
+ data.m_frags_outstanding++;
+ data.m_frags_not_started--;
+ batchRange += bs_rows;
+ requestsSent++;
+ list.next(fragPtr);
+ } // while (requestsSent < noOfFrags)
+ }
+ if (err)
+ {
+ jam();
+ abort(signal, requestPtr, err);
+ }
- data.m_frags_not_started -= requestsSent;
+ return requestsSent;
}
void
@@ -5807,16 +5928,23 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa
if (unlikely(bs_rows > bs_bytes))
bs_rows = bs_bytes;
- scanIndex_send(signal,
- requestPtr,
- treeNodePtr,
- data.m_frags_not_started,
- bs_bytes,
- bs_rows,
- batchRange);
- return;
+ Uint32 frags_started =
+ scanIndex_send(signal,
+ requestPtr,
+ treeNodePtr,
+ data.m_frags_not_started,
+ bs_bytes,
+ bs_rows,
+ batchRange);
+
+ if (likely(frags_started > 0))
+ return;
+
+ // Else: scanIndex_send() didn't send anything for some reason.
+ // Need to continue into 'completion detection' below.
+ jam();
}
- }
+ } // (data.m_frags_outstanding == 0)
if (data.m_rows_received != data.m_rows_expecting)
{
@@ -5974,43 +6102,44 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal
/**
* First, ask for more data from fragments that are already started.
*/
- Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
- list.first(fragPtr);
+ Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
+ list.first(fragPtr);
while (sentFragCount < data.m_parallelism && !fragPtr.isNull())
- {
- jam();
+ {
+ jam();
ndbassert(fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ ||
fragPtr.p->m_state == ScanFragHandle::SFH_COMPLETE ||
fragPtr.p->m_state == ScanFragHandle::SFH_NOT_STARTED);
- if (fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ)
- {
- jam();
-
- data.m_frags_outstanding++;
- req->variableData[0] = batchRange;
- fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING;
- batchRange += bs_rows;
+ if (fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ)
+ {
+ jam();
- DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex
- << treeNodePtr.p->m_send.m_ref
- << ", m_node_no=" << treeNodePtr.p->m_node_no
- << ", senderData: " << req->senderData);
+ data.m_frags_outstanding++;
+ req->variableData[0] = batchRange;
+ fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING;
+ batchRange += bs_rows;
+
+ DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex
+ << treeNodePtr.p->m_send.m_ref
+ << ", m_node_no=" << treeNodePtr.p->m_node_no
+ << ", senderData: " << req->senderData);
#ifdef DEBUG_SCAN_FRAGREQ
- printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
- ScanFragNextReq:: SignalLength + 1, DBLQH);
+ printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
+ ScanFragNextReq:: SignalLength + 1, DBLQH);
#endif
- req->senderData = fragPtr.i;
- sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal,
- ScanFragNextReq::SignalLength + 1,
- JBB);
+ req->senderData = fragPtr.i;
+ sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal,
+ ScanFragNextReq::SignalLength + 1,
+ JBB);
sentFragCount++;
}
list.next(fragPtr);
}
}
+ Uint32 frags_started = 0;
if (sentFragCount < data.m_parallelism)
{
/**
@@ -6018,25 +6147,29 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal
*/
jam();
ndbassert(data.m_frags_not_started != 0);
- scanIndex_send(signal,
- requestPtr,
- treeNodePtr,
- data.m_parallelism - sentFragCount,
- org->batch_size_bytes/data.m_parallelism,
- bs_rows,
- batchRange);
+ frags_started =
+ scanIndex_send(signal,
+ requestPtr,
+ treeNodePtr,
+ data.m_parallelism - sentFragCount,
+ org->batch_size_bytes/data.m_parallelism,
+ bs_rows,
+ batchRange);
}
/**
- * cursor should not have been positioned here...
- * unless we actually had something more to send.
- * so require that we did actually send something
+ * sendSignal() or scanIndex_send() might have failed to send:
+ * Check that we really did send something before
+ * updating outstanding & active.
*/
- ndbrequire(data.m_frags_outstanding > 0);
- ndbrequire(data.m_batch_chunks > 0);
- data.m_batch_chunks++;
+ if (likely(sentFragCount+frags_started > 0))
+ {
+ jam();
+ ndbrequire(data.m_batch_chunks > 0);
+ data.m_batch_chunks++;
- requestPtr.p->m_outstanding++;
- ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
+ requestPtr.p->m_outstanding++;
+ ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
+ }
}
void
@@ -7216,7 +7349,6 @@ Dbspj::parseDA(Build_context& ctx,
* - 17051: Fail on parseDA if 'isLeaf'
* - 17052: Fail on parseDA if treeNode not root
* - 17053: Fail on parseDA at a random node of the query tree
- * -
*/
if (ERROR_INSERTED_CLEAR(17050) ||
((treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17051))) ||
=== modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-02-23 15:41:31 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-03-22 14:18:01 +0000
@@ -1033,13 +1033,6 @@ NdbQueryBuilder::scanIndex(const NdbDict
returnErrIf(!m_impl.m_operations[0]->isScanOperation(),
QRY_WRONG_OPERATION_TYPE);
- // If the root is a sorted scan, we should not add another scan.
- const NdbQueryOptions::ScanOrdering rootOrder =
- m_impl.m_operations[0]->getOrdering();
- returnErrIf(rootOrder == NdbQueryOptions::ScanOrdering_ascending ||
- rootOrder == NdbQueryOptions::ScanOrdering_descending,
- QRY_MULTIPLE_SCAN_SORTED);
-
if (options != NULL)
{
// A child scan should not be sorted.
=== modified file 'storage/ndb/src/ndbapi/NdbQueryOperation.cpp'
--- a/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2012-02-23 15:41:31 +0000
+++ b/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2012-03-22 14:18:01 +0000
@@ -122,6 +122,7 @@ public:
explicit TupleCorrelation(Uint32 val)
: m_correlation(val)
{}
+
Uint32 toUint32() const
{ return m_correlation; }
@@ -3078,6 +3079,18 @@ NdbQueryImpl::doSend(int nodeId, bool la
batchByteSize);
assert(batchRows==root.getMaxBatchRows());
assert(batchRows<=batchByteSize);
+
+ /**
+ * Check if query is a sorted scan-scan.
+ * Ordering can then only be guarented by restricting
+ * parent batch to contain single rows.
+ * (Child scans will have 'normal' batch size).
+ */
+ if (root.getOrdering() != NdbQueryOptions::ScanOrdering_unordered &&
+ getQueryDef().getQueryType() == NdbQueryDef::MultiScanQuery)
+ {
+ batchRows = 1;
+ }
ScanTabReq::setScanBatch(reqInfo, batchRows);
scanTabReq->batch_byte_size = batchByteSize;
scanTabReq->first_batch_size = batchRows;
@@ -5106,16 +5119,6 @@ NdbQueryOperationImpl::setOrdering(NdbQu
return -1;
}
- /* Check if query is sorted and has multiple scan operations. This
- * combination is not implemented.
- */
- if (ordering != NdbQueryOptions::ScanOrdering_unordered &&
- getQueryDef().getQueryType() == NdbQueryDef::MultiScanQuery)
- {
- getQuery().setErrorCode(QRY_MULTIPLE_SCAN_SORTED);
- return -1;
- }
-
m_ordering = ordering;
return 0;
} // NdbQueryOperationImpl::setOrdering()
=== modified file 'storage/ndb/test/ndbapi/testSpj.cpp'
--- a/storage/ndb/test/ndbapi/testSpj.cpp 2012-03-01 15:13:54 +0000
+++ b/storage/ndb/test/ndbapi/testSpj.cpp 2012-03-22 14:18:01 +0000
@@ -31,7 +31,7 @@ static int faultToInject = 0;
enum faultsToInject {
FI_START = 17001,
- FI_END = 17063
+ FI_END = 17093
};
int
@@ -120,7 +120,9 @@ runLookupJoinError(NDBT_Context* ctx, ND
17030, 17031, 17032, // LQHKEYREQ reply is LQHKEYREF('Invalid..')
17040, 17041, 17042, // lookup_parent_row -> OutOfQueryMemory
17050, 17051, 17052, 17053, // parseDA -> outOfSectionMem
- 17060, 17061, 17062, 17063 // scanIndex_parent_row -> outOfSectionMem
+ 17060, 17061, 17062, 17063, // scanIndex_parent_row -> outOfSectionMem
+ 17070, 17071, 17072, // lookup_send.dupsec -> outOfSectionMem
+ 17080, 17081, 17082 // lookup_parent_row -> OutOfQueryMemory
};
loops = faultToInject ? 1 : sizeof(lookupFaults)/sizeof(int);
@@ -206,7 +208,10 @@ runScanJoinError(NDBT_Context* ctx, NDBT
17030, 17031, 17032, // LQHKEYREQ reply is LQHKEYREF('Invalid..')
17040, 17041, 17042, // lookup_parent_row -> OutOfQueryMemory
17050, 17051, 17052, 17053, // parseDA -> outOfSectionMem
- 17060, 17061, 17062, 17063 // scanIndex_parent_row -> outOfSectionMem
+ 17060, 17061, 17062, 17063, // scanIndex_parent_row -> outOfSectionMem
+ 17070, 17071, 17072, // lookup_send.dupsec -> outOfSectionMem
+ 17080, 17081, 17082, // lookup_parent_row -> OutOfQueryMemory
+ 17090, 17091, 17092, 17093 // scanIndex_send -> OutOfQueryMemory
};
loops = faultToInject ? 1 : sizeof(scanFaults)/sizeof(int);
@@ -921,32 +926,6 @@ NegativeTest::runGraphTest() const
builder->destroy();
return NDBT_FAILED;
}
- builder->destroy();
- }
-
- // Try adding a child scan to a sorted query.
- {
- NdbQueryBuilder* const builder = NdbQueryBuilder::create();
-
- NdbQueryOptions parentOptions;
- parentOptions.setOrdering(NdbQueryOptions::ScanOrdering_ascending);
-
- const NdbQueryIndexScanOperationDef* parentOperation
- = builder->scanIndex(m_nt1OrdIdx, m_nt1Tab, NULL, &parentOptions);
- ASSERT_ALWAYS(parentOperation != NULL);
-
- const NdbQueryOperand* const childOperands[] =
- {builder->linkedValue(parentOperation, "ui1"),
- NULL};
- const NdbQueryIndexBound bound(childOperands);
-
- if (builder->scanIndex(m_nt1OrdIdx, m_nt1Tab, &bound) != NULL ||
- builder->getNdbError().code != QRY_MULTIPLE_SCAN_SORTED)
- {
- g_err << "Sorted query with scan child scan gave unexpected result.";
- builder->destroy();
- return NDBT_FAILED;
- }
builder->destroy();
}
=== modified file 'storage/ndb/test/tools/spj_sanity_test.cpp'
--- a/storage/ndb/test/tools/spj_sanity_test.cpp 2011-04-14 08:59:45 +0000
+++ b/storage/ndb/test/tools/spj_sanity_test.cpp 2012-03-22 14:18:01 +0000
@@ -63,8 +63,8 @@
#define QRY_SCAN_ORDER_ALREADY_SET 4821
#define QRY_PARAMETER_HAS_WRONG_TYPE 4822
#define QRY_CHAR_PARAMETER_TRUNCATED 4823
-#define QRY_MULTIPLE_SCAN_BRANCHES 4824
-#define QRY_MULTIPLE_SCAN_SORTED 4825
+#define QRY_MULTIPLE_SCAN_SORTED 4824
+#define QRY_BATCH_SIZE_TOO_SMALL 4825
namespace SPJSanityTest{
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.5-cluster-7.3 branch (jonas.oreland:3868 to 3869) | Jonas Oreland | 28 Mar |