From: Jonas Oreland Date: March 28 2012 10:51am Subject: bzr push into mysql-5.5-cluster-7.3 branch (jonas.oreland:3868 to 3869) List-Archive: http://lists.mysql.com/commits/143329 Message-Id: <20120328105144.C1E2C55C8EA@perch.localdomain> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3869 Jonas Oreland 2012-03-28 [merge] ndb - merge 72-spj into 73 modified: .bzr-mysql/default.conf mysql-test/suite/ndb/r/ndb_join_pushdown_default.result mysql-test/suite/ndb/t/ndb_join_pushdown.inc sql/abstract_query_plan.cc sql/abstract_query_plan.h sql/ha_ndbcluster.cc sql/ha_ndbcluster.h sql/ha_ndbcluster_push.cc sql/ha_ndbcluster_push.h sql/handler.h sql/sql_select.cc storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp storage/ndb/src/ndbapi/NdbQueryBuilder.cpp storage/ndb/src/ndbapi/NdbQueryOperation.cpp storage/ndb/test/ndbapi/testSpj.cpp storage/ndb/test/tools/spj_sanity_test.cpp 3868 Martin Zaun 2012-03-26 [merge] Bug#54854 - merge === modified file '.bzr-mysql/default.conf' --- a/.bzr-mysql/default.conf 2012-02-23 15:41:31 +0000 +++ b/.bzr-mysql/default.conf 2012-03-28 10:41:26 +0000 @@ -1,4 +1,4 @@ [MYSQL] post_commit_to = "commits@stripped" post_push_to = "commits@stripped" -tree_name = "mysql-5.5-cluster-7.2" +tree_name = "mysql-5.5-cluster-7.3" === modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown_default.result' --- a/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-02-23 15:41:31 +0000 +++ b/mysql-test/suite/ndb/r/ndb_join_pushdown_default.result 2012-03-22 14:18:01 +0000 @@ -2099,6 +2099,77 @@ count(*) 20000 drop table t1; drop table tx; +create table t1 ( +a int not null, +b int not null, +c int not null, +d int not null, +primary key (`a`,`b`) +) engine=ndbcluster partition by key(a); +insert into t1 values +(1,1,1,1), (2,2,2,2), (3,3,3,3), (4,4,4,4), +(1,2,5,1), (1,3,1,2), (1,4,2,3), +(2,1,3,4), (2,3,4,5), (2,4,5,1), +(3,1,1,2), (3,2,2,3), (3,4,3,4), +(4,1,4,5), (4,2,5,1), (4,3,1,2); +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00 Parent of 2 pushed join@1 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00 Child of 't1' in pushed join@1 +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`)) +set new=on; +alter table t1 partition by hash(a); +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00 +Warnings: +Note 9999 Table 't1' is not pushable: has user defined partioning +Note 9999 Table 't2' is not pushable: has user defined partioning +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`)) +alter table t1 partition by list(a) ( +partition p1 values in (1), +partition p2 values in (2), +partition p3 values in (3), +partition p4 values in (4) +); +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00 +Warnings: +Note 9999 Table 't1' is not pushable: has user defined partioning +Note 9999 Table 't2' is not pushable: has user defined partioning +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`)) +alter table t1 partition by range(a) partitions 4 ( +partition p1 values less than (0), +partition p2 values less than (2), +partition p3 values less than (4), +partition p4 values less than (99999) +); +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 16 100.00 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 8 test.t1.b,test.t1.c 1 100.00 +Warnings: +Note 9999 Table 't1' is not pushable: has user defined partioning +Note 9999 Table 't2' is not pushable: has user defined partioning +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d` from `test`.`t1` join `test`.`t1` `t2` where ((`test`.`t2`.`b` = `test`.`t1`.`c`) and (`test`.`t2`.`a` = `test`.`t1`.`b`)) +drop table t1; +set new=default; create table t1 (a int, b int, primary key(a) using hash) engine = ndb; insert into t1 values (1, 2); insert into t1 values (2, 3); @@ -5071,7 +5142,7 @@ join t1 as x2 on x1.a=x2.b join t1 as x3 on x2.a=x3.b order by x1.pk limit 70; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE x1 index NULL PRIMARY 4 NULL 10 100.00 Parent of 3 pushed join@1; Using temporary; Using filesort +1 SIMPLE x1 index NULL PRIMARY 4 NULL 10 100.00 Parent of 3 pushed join@1 1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 2 100.00 Child of 'x1' in pushed join@1; Using where 1 SIMPLE x3 ref ix1 ix1 5 test.x2.a 2 100.00 Child of 'x2' in pushed join@1; Using where Warnings: @@ -5216,10 +5287,9 @@ on t1.pk2 = t2.pk1 where t1.pk1 != 6 order by t1.pk1 DESC; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 6 66.67 Using where with pushed condition: (`test`.`t1`.`pk1` <> 6) -1 SIMPLE t2 ref PRIMARY PRIMARY 4 test.t1.pk2 1 100.00 +1 SIMPLE t1 range PRIMARY PRIMARY 4 NULL 6 66.67 Parent of 2 pushed join@1; Using where with pushed condition: (`test`.`t1`.`pk1` <> 6) +1 SIMPLE t2 ref PRIMARY PRIMARY 4 test.t1.pk2 1 100.00 Child of 't1' in pushed join@1 Warnings: -Note 9999 Push of table 't2' as scan-child with ordered indexscan-root 't1' not implemented Note 1003 select `test`.`t1`.`pk1` AS `pk1`,`test`.`t1`.`pk2` AS `pk2`,`test`.`t2`.`pk1` AS `pk1`,`test`.`t2`.`pk2` AS `pk2` from `test`.`t` `t1` join `test`.`t` `t2` where ((`test`.`t2`.`pk1` = `test`.`t1`.`pk2`) and (`test`.`t1`.`pk1` <> 6)) order by `test`.`t1`.`pk1` desc select * from t as t1 join t as t2 on t1.pk2 = t2.pk1 @@ -5548,7 +5618,7 @@ counter_name spj_counts_at_end.val - spj CONST_PRUNED_RANGE_SCANS_RECEIVED 8 LOCAL_TABLE_SCANS_SENT 250 PRUNED_RANGE_SCANS_RECEIVED 27 -RANGE_SCANS_RECEIVED 732 +RANGE_SCANS_RECEIVED 736 READS_RECEIVED 58 TABLE_SCANS_RECEIVED 250 drop table spj_counts_at_startup; @@ -5560,9 +5630,9 @@ where new.variable_name = old.variable_n order by new.variable_name; variable_name new.variable_value - old.variable_value NDB_PRUNED_SCAN_COUNT 8 -NDB_PUSHED_QUERIES_DEFINED 405 +NDB_PUSHED_QUERIES_DEFINED 408 NDB_PUSHED_QUERIES_DROPPED 8 -NDB_PUSHED_QUERIES_EXECUTED 550 -NDB_SORTED_SCAN_COUNT 10 +NDB_PUSHED_QUERIES_EXECUTED 552 +NDB_SORTED_SCAN_COUNT 11 drop table server_counts_at_startup; set ndb_join_pushdown = @save_ndb_join_pushdown; === modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.inc' --- a/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-02-23 15:41:31 +0000 +++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-03-20 09:42:12 +0000 @@ -1055,6 +1055,76 @@ connection ddl; drop table t1; drop table tx; +# Test user defined partition not being pushed +# +# Note: User defined partitions are handled +# by the SQL layer, and as such are unknown +# to the NDB datanodes. +# + +connection spj; +create table t1 ( + a int not null, + b int not null, + c int not null, + d int not null, + primary key (`a`,`b`) +) engine=ndbcluster partition by key(a); + +connection spj; +insert into t1 values +(1,1,1,1), (2,2,2,2), (3,3,3,3), (4,4,4,4), +(1,2,5,1), (1,3,1,2), (1,4,2,3), +(2,1,3,4), (2,3,4,5), (2,4,5,1), +(3,1,1,2), (3,2,2,3), (3,4,3,4), +(4,1,4,5), (4,2,5,1), (4,3,1,2); + +# Only this query('partition by key') should be pushed +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; + +connection ddl; +set new=on; +alter table t1 partition by hash(a); + +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; + +connection ddl; +alter table t1 partition by list(a) ( + partition p1 values in (1), + partition p2 values in (2), + partition p3 values in (3), + partition p4 values in (4) +); + +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; + +connection ddl; +alter table t1 partition by range(a) partitions 4 ( + partition p1 values less than (0), + partition p2 values less than (2), + partition p3 values less than (4), + partition p4 values less than (99999) +); + +explain extended +select * +from t1 +join t1 as t2 on t2.a = t1.b and t2.b = t1.c; + +connection ddl; +drop table t1; +set new=default; + + # pushed mrr does not yet handle multiple PK operations in same transaction # Need 6.0 result handling stuff to simplify result handling # *** join push is currently dissabled for these **** === modified file 'sql/abstract_query_plan.cc' --- a/sql/abstract_query_plan.cc 2011-10-20 19:52:11 +0000 +++ b/sql/abstract_query_plan.cc 2012-03-22 14:18:01 +0000 @@ -63,6 +63,10 @@ namespace AQP return m_join_tabs + join_tab_no; } + /** + * Check if either a GROUP BY or ORDER BY could be + * executed without sorting by reading an ordered index. + */ void Join_plan::find_skippabable_group_or_order() const { @@ -546,28 +550,6 @@ namespace AQP {} /** - @return True iff ordered index access is *required* from this operation. - */ - bool Table_access::is_fixed_ordered_index() const - { - const JOIN_TAB* const join_tab= get_join_tab(); - - /* For the QUICK_SELECT_I classes we can disable ordered index usage by - * setting 'QUICK_SELECT_I::sorted = false'. - * However, QUICK_SELECT_I::QS_TYPE_RANGE_DESC is special as its - * internal implementation requires its 'multi-ranges' to be retrieved - * in (descending) sorted order from the underlying table. - */ - if (join_tab->select != NULL && - join_tab->select->quick != NULL) - { - QUICK_SELECT_I *quick= join_tab->select->quick; - return (quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE_DESC); - } - return false; - } - - /** Check if the results from this operation will joined with results from the next operation using a join buffer (instead of plain nested loop). @return True if using a join buffer. === modified file 'sql/abstract_query_plan.h' --- a/sql/abstract_query_plan.h 2011-10-20 19:52:11 +0000 +++ b/sql/abstract_query_plan.h 2012-03-22 14:18:01 +0000 @@ -210,8 +210,6 @@ namespace AQP void dbug_print() const; - bool is_fixed_ordered_index() const; - bool uses_join_cache() const; private: === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2012-03-27 04:18:59 +0000 +++ b/sql/ha_ndbcluster.cc 2012-03-28 10:41:26 +0000 @@ -4023,8 +4023,7 @@ int ha_ndbcluster::ordered_index_scan(co } #ifndef NDB_WITHOUT_JOIN_PUSHDOWN - if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index, - sorted)) + if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index)) { const int error= create_pushed_join(); if (unlikely(error)) @@ -13919,8 +13918,7 @@ ha_ndbcluster::read_multi_range_first(KE #ifndef NDB_WITHOUT_JOIN_PUSHDOWN /* Create the scan operation for the first scan range. */ if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, - active_index, - !m_active_query && sorted)) + active_index)) { if (!m_active_query) { @@ -14578,8 +14576,7 @@ ha_ndbcluster::maybe_pushable_join(const #ifndef NDB_WITHOUT_JOIN_PUSHDOWN bool ha_ndbcluster::check_if_pushable(int type, //NdbQueryOperationDef::Type, - uint idx, - bool needSorted) const + uint idx) const { if (m_disable_pushed_join) { @@ -14590,8 +14587,7 @@ ha_ndbcluster::check_if_pushable(int typ && m_pushed_join_member != NULL && m_pushed_join_member->match_definition( type, - (idxget_query_def(); - const NdbQueryOperationDef::Type root_type= - query_def.getQueryOperation((uint)PUSHED_ROOT)->getType(); - - /** - * Primary key/ unique key lookup is always 'ordered' wrt. itself. - */ - if (root_type == NdbQueryOperationDef::PrimaryKeyAccess || - root_type == NdbQueryOperationDef::UniqueIndexAccess) - { - DBUG_RETURN(false); - } - - /** - * Ordered index scan can be provided as an ordered resultset iff - * it has no child scans. - */ - if (root_type == NdbQueryOperationDef::OrderedIndexScan) - { - for (uint i= 1; i < query_def.getNoOfOperations(); i++) - { - const NdbQueryOperationDef::Type child_type= - query_def.getQueryOperation(i)->getType(); - if (child_type == NdbQueryOperationDef::TableScan || - child_type == NdbQueryOperationDef::OrderedIndexScan) - { - DBUG_RETURN(true); - } - } - DBUG_RETURN(false); - } - DBUG_RETURN(true); - } - default: DBUG_ASSERT(0); DBUG_RETURN(false); === modified file 'sql/ha_ndbcluster.h' --- a/sql/ha_ndbcluster.h 2012-02-23 15:41:31 +0000 +++ b/sql/ha_ndbcluster.h 2012-03-22 14:18:01 +0000 @@ -445,8 +445,7 @@ private: bool check_index_fields_not_null(KEY *key_info) const; bool check_if_pushable(int type, //NdbQueryOperationDef::Type, - uint idx= MAX_KEY, - bool rootSorted= false) const; + uint idx= MAX_KEY) const; bool check_is_pushed() const; int create_pushed_join(const NdbQueryParamValue* keyFieldParams=NULL, uint paramCnt= 0); === modified file 'sql/ha_ndbcluster_push.cc' --- a/sql/ha_ndbcluster_push.cc 2011-09-30 11:05:03 +0000 +++ b/sql/ha_ndbcluster_push.cc 2012-03-22 14:18:01 +0000 @@ -153,8 +153,7 @@ ndb_pushed_join::~ndb_pushed_join() bool ndb_pushed_join::match_definition( int type, //NdbQueryOperationDef::Type, - const NDB_INDEX_DATA* idx, - bool needSorted) const + const NDB_INDEX_DATA* idx) const { const NdbQueryOperationDef* const root_operation= m_query_def->getQueryOperation((uint)0); @@ -195,13 +194,6 @@ bool ndb_pushed_join::match_definition( case NdbQueryOperationDef::TableScan: DBUG_ASSERT (idx==NULL && expected_index==NULL); - if (needSorted) - { - DBUG_PRINT("info", - ("TableScan access can not be provied as sorted result. " - "Therefore, join cannot be pushed.")); - return FALSE; - } break; case NdbQueryOperationDef::OrderedIndexScan: @@ -215,13 +207,6 @@ bool ndb_pushed_join::match_definition( expected_index->getName())); return FALSE; } - if (needSorted && m_query_def->getQueryType() == NdbQueryDef::MultiScanQuery) - { - DBUG_PRINT("info", - ("OrderedIndexScan with scan siblings " - "can not execute as pushed join.")); - return FALSE; - } break; default: @@ -622,16 +607,6 @@ ndb_pushed_builder_ctx::is_pushable_as_c DBUG_RETURN(false); } - if (access_type==AQP::AT_ORDERED_INDEX_SCAN && m_join_root->is_fixed_ordered_index()) - { - // root must be an ordered index scan - Thus it cannot have other scan descendant. - EXPLAIN_NO_PUSH("Push of table '%s' as scan-child " - "with ordered indexscan-root '%s' not implemented", - table->get_table()->alias, - m_join_root->get_table()->alias); - DBUG_RETURN(false); - } - if (table->get_no_of_key_fields() > ndb_pushed_join::MAX_LINKED_KEYS) { EXPLAIN_NO_PUSH("Can't push table '%s' as child, " === modified file 'sql/ha_ndbcluster_push.h' --- a/sql/ha_ndbcluster_push.h 2011-09-30 11:05:03 +0000 +++ b/sql/ha_ndbcluster_push.h 2012-03-22 14:18:01 +0000 @@ -98,8 +98,7 @@ public: * of operation specified by the arguments. */ bool match_definition(int type, //NdbQueryOperationDef::Type, - const NDB_INDEX_DATA* idx, - bool needSorted) const; + const NDB_INDEX_DATA* idx) const; /** Create an executable instance of this defined query. */ NdbQuery* make_query_instance( === modified file 'sql/handler.h' --- a/sql/handler.h 2012-03-21 17:28:20 +0000 +++ b/sql/handler.h 2012-03-23 07:44:14 +0000 @@ -501,11 +501,6 @@ enum ha_push_flag { within this pushed join */ ,HA_PUSH_MULTIPLE_DEPENDENCY - - /* Handler is unable to return the result in sorted order using an - ordered index on the parent operation. - */ - ,HA_PUSH_NO_ORDERED_INDEX }; #endif === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2012-02-23 15:41:31 +0000 +++ b/sql/sql_select.cc 2012-03-22 14:18:01 +0000 @@ -1780,16 +1780,12 @@ make_pushed_join(THD *thd, JOIN *join) if (join->const_tables < join->tables && join->join_tab[join->const_tables].table->file->number_of_pushed_joins() > 0) { - const handler *ha=join->join_tab[join->const_tables].table->file; - - if (join->group_list && join->simple_group && - (!plan.group_by_filesort_is_skippable() || ha->test_push_flag(HA_PUSH_NO_ORDERED_INDEX))) + if (join->group_list && join->simple_group && !plan.group_by_filesort_is_skippable()) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; } - else if (join->order && join->simple_order && - (!plan.order_by_filesort_is_skippable() || ha->test_push_flag(HA_PUSH_NO_ORDERED_INDEX))) + else if (join->order && join->simple_order && !plan.order_by_filesort_is_skippable()) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; === modified file 'storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp' --- a/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2011-11-16 08:17:17 +0000 +++ b/storage/ndb/src/kernel/blocks/dbspj/Dbspj.hpp 2012-03-20 09:42:12 +0000 @@ -1232,13 +1232,13 @@ private: void scanIndex_execSCAN_FRAGCONF(Signal*, Ptr, Ptr, Ptr); void scanIndex_parent_row(Signal*,Ptr,Ptr, const RowPtr&); void scanIndex_fixupBound(Ptr fragPtr, Uint32 ptrI, Uint32); - void scanIndex_send(Signal* signal, - Ptr requestPtr, - Ptr treeNodePtr, - Uint32 noOfFrags, - Uint32 bs_bytes, - Uint32 bs_rows, - Uint32& batchRange); + Uint32 scanIndex_send(Signal* signal, + Ptr requestPtr, + Ptr treeNodePtr, + Uint32 noOfFrags, + Uint32 bs_bytes, + Uint32 bs_rows, + Uint32& batchRange); void scanIndex_batchComplete(Signal* signal); Uint32 scanIndex_findFrag(Local_ScanFragHandle_list &, Ptr&, Uint32 fragId); === modified file 'storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp' --- a/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-03-09 15:37:45 +0000 +++ b/storage/ndb/src/kernel/blocks/dbspj/DbspjMain.cpp 2012-03-20 09:42:12 +0000 @@ -3059,145 +3059,192 @@ Dbspj::lookup_send(Signal* signal, Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI; Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI; - if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT) + Uint32 err = 0; + + do { - jam(); + if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT) + { + jam(); + /** + * Pass sections to send + */ + treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL; + treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL; + } + else + { + if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0) + { + jam(); + Uint32 tmp = RNIL; + if (!dupSection(tmp, keyInfoPtrI)) + { + jam(); + err = DbspjErr::OutOfSectionMemory; + break; + } + + keyInfoPtrI = tmp; + } + else + { + jam(); + treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL; + } + + if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0) + { + jam(); + Uint32 tmp = RNIL; + + /** + * Test execution terminated due to 'OutOfSectionMemory' which + * may happen for different treeNodes in the request: + * - 17070: Fail on any lookup_send() + * - 17071: Fail on lookup_send() if 'isLeaf' + * - 17072: Fail on lookup_send() if treeNode not root + */ + + if (ERROR_INSERTED_CLEAR(17070) || + (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17071)) || + (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17072))) + { + jam(); + ndbout_c("Injecting OutOfSectionMemory error at line %d file %s", + __LINE__, __FILE__); + if (keyInfoPtrI != RNIL) + releaseSection(keyInfoPtrI); + err = DbspjErr::OutOfSectionMemory; + break; + } + + if (!dupSection(tmp, attrInfoPtrI)) + { + jam(); + if (keyInfoPtrI != RNIL) + releaseSection(keyInfoPtrI); + err = DbspjErr::OutOfSectionMemory; + break; + } + + attrInfoPtrI = tmp; + } + else + { + jam(); + treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL; + } + } + + getSection(handle.m_ptr[0], keyInfoPtrI); + getSection(handle.m_ptr[1], attrInfoPtrI); + handle.m_cnt = 2; + /** - * Pass sections to send + * Inject error to test LQHKEYREF handling: + * Tampering with tableSchemaVersion such that LQH will + * return LQHKEYREF('1227: Invalid schema version') + * May happen for different treeNodes in the request: + * - 17030: Fail on any lookup_send() + * - 17031: Fail on lookup_send() if 'isLeaf' + * - 17032: Fail on lookup_send() if treeNode not root */ - treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL; - treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL; - } - else - { - if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0) + if (ERROR_INSERTED_CLEAR(17030) || + (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17031)) || + (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17032))) { jam(); - Uint32 tmp = RNIL; - ndbrequire(dupSection(tmp, keyInfoPtrI)); // TODO handle error - keyInfoPtrI = tmp; + req->tableSchemaVersion += (1 << 16); // Provoke 'Invalid schema version' + } + +#if defined DEBUG_LQHKEYREQ + ndbout_c("LQHKEYREQ to %x", ref); + printLQHKEYREQ(stdout, signal->getDataPtrSend(), + NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq), + DBLQH); + printf("KEYINFO: "); + print(handle.m_ptr[0], stdout); + printf("ATTRINFO: "); + print(handle.m_ptr[1], stdout); +#endif + + Uint32 Tnode = refToNode(ref); + if (Tnode == getOwnNodeId()) + { + c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1); } else { - jam(); - treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL; + c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1); } - if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0) + /** + * Test execution terminated due to 'NodeFailure' which + * may happen for different treeNodes in the request: + * - 17020: Fail on any lookup_send() + * - 17021: Fail on lookup_send() if 'isLeaf' + * - 17022: Fail on lookup_send() if treeNode not root + */ + if (ERROR_INSERTED_CLEAR(17020) || + (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17021)) || + (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17022))) { jam(); - Uint32 tmp = RNIL; - ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error - attrInfoPtrI = tmp; + releaseSections(handle); + err = DbspjErr::NodeFailure; + break; } - else + if (unlikely(!c_alive_nodes.get(Tnode))) { jam(); - treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL; + releaseSections(handle); + err = DbspjErr::NodeFailure; + break; + } + else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup())) + { + jam(); + ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data)); + requestPtr.p->m_outstanding += cnt; + requestPtr.p->m_lookup_node_data[Tnode] += cnt; + // number wrapped + ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0)); } - } - - getSection(handle.m_ptr[0], keyInfoPtrI); - getSection(handle.m_ptr[1], attrInfoPtrI); - handle.m_cnt = 2; - - /** - * Inject error to test LQHKEYREF handling: - * Tampering with tableSchemaVersion such that LQH will - * return LQHKEYREF('1227: Invalid schema version') - * May happen for different treeNodes in the request: - * - 17030: Fail on any lookup_send() - * - 17031: Fail on lookup_send() if 'isLeaf' - * - 17032: Fail on lookup_send() if treeNode not root - */ - if (ERROR_INSERTED_CLEAR(17030) || - (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17031)) || - (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17032))) - { - jam(); - req->tableSchemaVersion += (1 << 16); // Provoke 'Invalid schema version' - } -#if defined DEBUG_LQHKEYREQ - ndbout_c("LQHKEYREQ to %x", ref); - printLQHKEYREQ(stdout, signal->getDataPtrSend(), - NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq), - DBLQH); - printf("KEYINFO: "); - print(handle.m_ptr[0], stdout); - printf("ATTRINFO: "); - print(handle.m_ptr[1], stdout); -#endif + sendSignal(ref, GSN_LQHKEYREQ, signal, + NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq), + JBB, &handle); - Uint32 Tnode = refToNode(ref); - if (Tnode == getOwnNodeId()) - { - c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1); - } - else - { - c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1); - } + treeNodePtr.p->m_lookup_data.m_outstanding += cnt; + if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()) + { + jam(); + /** + * Send TCKEYCONF with DirtyReadBit + Tnode, + * so that API can discover if Tnode while waiting for result + */ + Uint32 resultRef = req->variableData[0]; + Uint32 resultData = req->variableData[1]; - /** - * Test execution terminated due to 'NodeFailure' which - * may happen for different treeNodes in the request: - * - 17020: Fail on any lookup_send() - * - 17021: Fail on lookup_send() if 'isLeaf' - * - 17022: Fail on lookup_send() if treeNode not root - */ - if (ERROR_INSERTED_CLEAR(17020) || - (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17021)) || - (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17022))) - { - jam(); - releaseSections(handle); - abort(signal, requestPtr, DbspjErr::NodeFailure); - return; - } - if (unlikely(!c_alive_nodes.get(Tnode))) - { - jam(); - releaseSections(handle); - abort(signal, requestPtr, DbspjErr::NodeFailure); + TcKeyConf* conf = (TcKeyConf*)signal->getDataPtrSend(); + conf->apiConnectPtr = RNIL; // lookup transaction from operations... + conf->confInfo = 0; + TcKeyConf::setNoOfOperations(conf->confInfo, 1); + conf->transId1 = requestPtr.p->m_transId[0]; + conf->transId2 = requestPtr.p->m_transId[1]; + conf->operations[0].apiOperationPtr = resultData; + conf->operations[0].attrInfoLen = TcKeyConf::DirtyReadBit | Tnode; + Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength; + sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef); + } return; } - else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup())) - { - jam(); - ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data)); - requestPtr.p->m_outstanding += cnt; - requestPtr.p->m_lookup_node_data[Tnode] += cnt; - // number wrapped - ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0)); - } - - sendSignal(ref, GSN_LQHKEYREQ, signal, - NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq), - JBB, &handle); - - treeNodePtr.p->m_lookup_data.m_outstanding += cnt; - if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()) - { - jam(); - /** - * Send TCKEYCONF with DirtyReadBit + Tnode, - * so that API can discover if Tnode while waiting for result - */ - Uint32 resultRef = req->variableData[0]; - Uint32 resultData = req->variableData[1]; + while (0); - TcKeyConf* conf = (TcKeyConf*)signal->getDataPtrSend(); - conf->apiConnectPtr = RNIL; // lookup transaction from operations... - conf->confInfo = 0; - TcKeyConf::setNoOfOperations(conf->confInfo, 1); - conf->transId1 = requestPtr.p->m_transId[0]; - conf->transId2 = requestPtr.p->m_transId[1]; - conf->operations[0].apiOperationPtr = resultData; - conf->operations[0].attrInfoLen = TcKeyConf::DirtyReadBit | Tnode; - Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength; - sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef); - } + ndbrequire(err); + jam(); + abort(signal, requestPtr, err); } void @@ -3540,7 +3587,32 @@ Dbspj::lookup_parent_row(Signal* signal, { jam(); Uint32 tmp = RNIL; - ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error + + /** + * Test execution terminated due to 'OutOfSectionMemory' which + * may happen for different treeNodes in the request: + * - 17080: Fail on lookup_parent_row + * - 17081: Fail on lookup_parent_row: if 'isLeaf' + * - 17082: Fail on lookup_parent_row: if treeNode not root + */ + + if (ERROR_INSERTED_CLEAR(17080) || + (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17081)) || + (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17082))) + { + jam(); + ndbout_c("Injecting OutOfSectionMemory error at line %d file %s", + __LINE__, __FILE__); + err = DbspjErr::OutOfSectionMemory; + break; + } + + if (!dupSection(tmp, attrInfoPtrI)) + { + jam(); + err = DbspjErr::OutOfSectionMemory; + break; + } Uint32 org_size; { @@ -4146,9 +4218,6 @@ Dbspj::scanFrag_send(Signal* signal, { jam(); - requestPtr.p->m_outstanding++; - requestPtr.p->m_cnt_active++; - treeNodePtr.p->m_state = TreeNode::TN_ACTIVE; Ptr scanFragHandlePtr; m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p-> m_scanfrag_data.m_scanFragHandlePtrI); @@ -4215,6 +4284,10 @@ Dbspj::scanFrag_send(Signal* signal, NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq), JBB, &handle); + requestPtr.p->m_outstanding++; + requestPtr.p->m_cnt_active++; + treeNodePtr.p->m_state = TreeNode::TN_ACTIVE; + scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_SCANNING; treeNodePtr.p->m_scanfrag_data.m_rows_received = 0; treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0); @@ -5122,19 +5195,17 @@ Dbspj::scanIndex_parent_row(Signal* sign jam(); Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern); - /** - * Test execution terminated due to 'OutOfSectionMemory': - * - 17060: Fail on scanIndex_parent_row at first call - * - 17061: Fail on scanIndex_parent_row if 'isLeaf' - * - 17062: Fail on scanIndex_parent_row if treeNode not root - * - 17063: Fail on scanIndex_parent_row at a random node of the query tree - * - - */ - + /** + * Test execution terminated due to 'OutOfSectionMemory': + * - 17060: Fail on scanIndex_parent_row at first call + * - 17061: Fail on scanIndex_parent_row if 'isLeaf' + * - 17062: Fail on scanIndex_parent_row if treeNode not root + * - 17063: Fail on scanIndex_parent_row at a random node of the query tree + */ if (ERROR_INSERTED_CLEAR(17060) || ((rand() % 7) == 0 && ERROR_INSERTED_CLEAR(17061)) || ((treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17062))) || - ((treeNodePtr.p->m_parentPtrI != RNIL &&ERROR_INSERTED_CLEAR(17063)))) + ((treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17063)))) { ndbout_c("Injecting OutOfSectionMemory error at line %d file %s", __LINE__, __FILE__); @@ -5338,7 +5409,7 @@ Dbspj::scanIndex_parent_batch_complete(S data.m_parallelism = static_cast(parallelism); #ifdef DEBUG_SCAN_FRAGREQ - DEBUG("::scanIndex_send() starting index scan with parallelism=" + DEBUG("::scanIndex_parent_batch_complete() starting index scan with parallelism=" << data.m_parallelism); #endif } @@ -5369,24 +5440,34 @@ Dbspj::scanIndex_parent_batch_complete(S } Uint32 batchRange = 0; - scanIndex_send(signal, - requestPtr, - treeNodePtr, - data.m_parallelism, - bs_bytes, - bs_rows, - batchRange); - - data.m_firstExecution = false; - - ndbrequire(static_cast(data.m_frags_outstanding + - data.m_frags_complete) <= - data.m_fragCount); + Uint32 frags_started = + scanIndex_send(signal, + requestPtr, + treeNodePtr, + data.m_parallelism, + bs_bytes, + bs_rows, + batchRange); - data.m_batch_chunks = 1; - requestPtr.p->m_cnt_active++; - requestPtr.p->m_outstanding++; - treeNodePtr.p->m_state = TreeNode::TN_ACTIVE; + /** + * scanIndex_send might fail to send (errors?): + * Check that we really did send something before + * updating outstanding & active. + */ + if (likely(frags_started > 0)) + { + jam(); + data.m_firstExecution = false; + + ndbrequire(static_cast(data.m_frags_outstanding + + data.m_frags_complete) <= + data.m_fragCount); + + data.m_batch_chunks = 1; + requestPtr.p->m_cnt_active++; + requestPtr.p->m_outstanding++; + treeNodePtr.p->m_state = TreeNode::TN_ACTIVE; + } } void @@ -5418,8 +5499,11 @@ Dbspj::scanIndex_parent_batch_repeat(Sig /** * Ask for the first batch for a number of fragments. + * + * Returns how many fragments we did request the + * 'first batch' from. (<= noOfFrags) */ -void +Uint32 Dbspj::scanIndex_send(Signal* signal, Ptr requestPtr, Ptr treeNodePtr, @@ -5460,147 +5544,184 @@ Dbspj::scanIndex_send(Signal* signal, req->batch_size_bytes = bs_bytes; req->batch_size_rows = bs_rows; + Uint32 err = 0; Uint32 requestsSent = 0; - Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); - Ptr fragPtr; - list.first(fragPtr); - Uint32 keyInfoPtrI = fragPtr.p->m_rangePtrI; - ndbrequire(prune || keyInfoPtrI != RNIL); - /** - * Iterate over the list of fragments until we have sent as many - * SCAN_FRAGREQs as we should. - */ - while (requestsSent < noOfFrags) { - jam(); - ndbassert(!fragPtr.isNull()); - - if (fragPtr.p->m_state != ScanFragHandle::SFH_NOT_STARTED) - { - // Skip forward to the frags that we should send. - jam(); - list.next(fragPtr); - continue; - } - - const Uint32 ref = fragPtr.p->m_ref; - - if (noOfFrags==1 && !prune && - data.m_frags_not_started == data.m_fragCount && - refToNode(ref) != getOwnNodeId() && - list.hasNext(fragPtr)) + Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); + Ptr fragPtr; + list.first(fragPtr); + Uint32 keyInfoPtrI = fragPtr.p->m_rangePtrI; + ndbrequire(prune || keyInfoPtrI != RNIL); + /** + * Iterate over the list of fragments until we have sent as many + * SCAN_FRAGREQs as we should. + */ + while (requestsSent < noOfFrags) { - /** - * If we are doing a scan with adaptive parallelism and start with - * parallelism=1 then it makes sense to fetch a batch from a fragment on - * the local data node. The reason for this is that if that fragment - * contains few rows, we may be able to read from several fragments in - * parallel. Then we minimize the total number of round trips (to remote - * data nodes) if we fetch the first fragment batch locally. - */ jam(); - list.next(fragPtr); - continue; - } + ndbassert(!fragPtr.isNull()); - SectionHandle handle(this); + if (fragPtr.p->m_state != ScanFragHandle::SFH_NOT_STARTED) + { + // Skip forward to the frags that we should send. + jam(); + list.next(fragPtr); + continue; + } - Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI; + const Uint32 ref = fragPtr.p->m_ref; - /** - * Set data specific for this fragment - */ - req->senderData = fragPtr.i; - req->fragmentNoKeyLen = fragPtr.p->m_fragId; - - if (prune) - { - jam(); - keyInfoPtrI = fragPtr.p->m_rangePtrI; - if (keyInfoPtrI == RNIL) + if (noOfFrags==1 && !prune && + data.m_frags_not_started == data.m_fragCount && + refToNode(ref) != getOwnNodeId() && + list.hasNext(fragPtr)) { /** - * Since we use pruning, we can see that no parent rows would hash - * to this fragment. + * If we are doing a scan with adaptive parallelism and start with + * parallelism=1 then it makes sense to fetch a batch from a fragment on + * the local data node. The reason for this is that if that fragment + * contains few rows, we may be able to read from several fragments in + * parallel. Then we minimize the total number of round trips (to remote + * data nodes) if we fetch the first fragment batch locally. */ jam(); - fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE; list.next(fragPtr); continue; } - if (!repeatable) + SectionHandle handle(this); + + Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI; + + /** + * Set data specific for this fragment + */ + req->senderData = fragPtr.i; + req->fragmentNoKeyLen = fragPtr.p->m_fragId; + + if (prune) { - /** - * If we'll use sendSignal() and we need to send the attrInfo several - * times, we need to copy them. (For repeatable or unpruned scans - * we use sendSignalNoRelease(), so then we do not need to copy.) - */ jam(); - Uint32 tmp = RNIL; - ndbrequire(dupSection(tmp, attrInfoPtrI)); // TODO handle error - attrInfoPtrI = tmp; + keyInfoPtrI = fragPtr.p->m_rangePtrI; + if (keyInfoPtrI == RNIL) + { + /** + * Since we use pruning, we can see that no parent rows would hash + * to this fragment. + */ + jam(); + fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE; + list.next(fragPtr); + continue; + } + + if (!repeatable) + { + /** + * If we'll use sendSignal() and we need to send the attrInfo several + * times, we need to copy them. (For repeatable or unpruned scans + * we use sendSignalNoRelease(), so then we do not need to copy.) + */ + jam(); + Uint32 tmp = RNIL; + + /** + * Test execution terminated due to 'OutOfSectionMemory' which + * may happen for different treeNodes in the request: + * - 17090: Fail on any scanIndex_send() + * - 17091: Fail after sending SCAN_FRAGREQ to some fragments + * - 17092: Fail on scanIndex_send() if 'isLeaf' + * - 17093: Fail on scanIndex_send() if treeNode not root + */ + + if (ERROR_INSERTED_CLEAR(17090) || + (requestsSent > 1 && ERROR_INSERTED_CLEAR(17091)) || + (treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17092)) || + (treeNodePtr.p->m_parentPtrI != RNIL && ERROR_INSERTED_CLEAR(17093))) + { + jam(); + ndbout_c("Injecting OutOfSectionMemory error at line %d file %s", + __LINE__, __FILE__); + err = DbspjErr::OutOfSectionMemory; + break; + } + + if (!dupSection(tmp, attrInfoPtrI)) + { + jam(); + err = DbspjErr::OutOfSectionMemory; + break; + } + + attrInfoPtrI = tmp; + } } - } - req->variableData[0] = batchRange; - getSection(handle.m_ptr[0], attrInfoPtrI); - getSection(handle.m_ptr[1], keyInfoPtrI); - handle.m_cnt = 2; + req->variableData[0] = batchRange; + getSection(handle.m_ptr[0], attrInfoPtrI); + getSection(handle.m_ptr[1], keyInfoPtrI); + handle.m_cnt = 2; #if defined DEBUG_SCAN_FRAGREQ - ndbout_c("SCAN_FRAGREQ to %x", ref); - printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(), - NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq), - DBLQH); - printf("ATTRINFO: "); - print(handle.m_ptr[0], stdout); - printf("KEYINFO: "); - print(handle.m_ptr[1], stdout); + ndbout_c("SCAN_FRAGREQ to %x", ref); + printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(), + NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq), + DBLQH); + printf("ATTRINFO: "); + print(handle.m_ptr[0], stdout); + printf("KEYINFO: "); + print(handle.m_ptr[1], stdout); #endif - if (refToNode(ref) == getOwnNodeId()) - { - c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1); - } - else - { - c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1); - } + if (refToNode(ref) == getOwnNodeId()) + { + c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1); + } + else + { + c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1); + } - if (prune && !repeatable) - { - /** - * For a non-repeatable pruned scan, key info is unique for each - * fragment and therefore cannot be reused, so we release key info - * right away. - */ - jam(); - sendSignal(ref, GSN_SCAN_FRAGREQ, signal, - NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle); - fragPtr.p->m_rangePtrI = RNIL; - fragPtr.p->reset_ranges(); - } - else - { - /** - * Reuse key info for multiple fragments and/or multiple repetitions - * of the scan. - */ - jam(); - sendSignalNoRelease(ref, GSN_SCAN_FRAGREQ, signal, - NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle); - } - handle.clear(); + if (prune && !repeatable) + { + /** + * For a non-repeatable pruned scan, key info is unique for each + * fragment and therefore cannot be reused, so we release key info + * right away. + */ + jam(); + sendSignal(ref, GSN_SCAN_FRAGREQ, signal, + NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle); + fragPtr.p->m_rangePtrI = RNIL; + fragPtr.p->reset_ranges(); + } + else + { + /** + * Reuse key info for multiple fragments and/or multiple repetitions + * of the scan. + */ + jam(); + sendSignalNoRelease(ref, GSN_SCAN_FRAGREQ, signal, + NDB_ARRAY_SIZE(data.m_scanFragReq), JBB, &handle); + } + handle.clear(); - fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; // running - data.m_frags_outstanding++; - batchRange += bs_rows; - requestsSent++; - list.next(fragPtr); - } // while (requestsSent < noOfFrags) + fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; // running + data.m_frags_outstanding++; + data.m_frags_not_started--; + batchRange += bs_rows; + requestsSent++; + list.next(fragPtr); + } // while (requestsSent < noOfFrags) + } + if (err) + { + jam(); + abort(signal, requestPtr, err); + } - data.m_frags_not_started -= requestsSent; + return requestsSent; } void @@ -5807,16 +5928,23 @@ Dbspj::scanIndex_execSCAN_FRAGCONF(Signa if (unlikely(bs_rows > bs_bytes)) bs_rows = bs_bytes; - scanIndex_send(signal, - requestPtr, - treeNodePtr, - data.m_frags_not_started, - bs_bytes, - bs_rows, - batchRange); - return; + Uint32 frags_started = + scanIndex_send(signal, + requestPtr, + treeNodePtr, + data.m_frags_not_started, + bs_bytes, + bs_rows, + batchRange); + + if (likely(frags_started > 0)) + return; + + // Else: scanIndex_send() didn't send anything for some reason. + // Need to continue into 'completion detection' below. + jam(); } - } + } // (data.m_frags_outstanding == 0) if (data.m_rows_received != data.m_rows_expecting) { @@ -5974,43 +6102,44 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal /** * First, ask for more data from fragments that are already started. */ - Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); - list.first(fragPtr); + Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments); + list.first(fragPtr); while (sentFragCount < data.m_parallelism && !fragPtr.isNull()) - { - jam(); + { + jam(); ndbassert(fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ || fragPtr.p->m_state == ScanFragHandle::SFH_COMPLETE || fragPtr.p->m_state == ScanFragHandle::SFH_NOT_STARTED); - if (fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ) - { - jam(); - - data.m_frags_outstanding++; - req->variableData[0] = batchRange; - fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; - batchRange += bs_rows; + if (fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ) + { + jam(); - DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex - << treeNodePtr.p->m_send.m_ref - << ", m_node_no=" << treeNodePtr.p->m_node_no - << ", senderData: " << req->senderData); + data.m_frags_outstanding++; + req->variableData[0] = batchRange; + fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; + batchRange += bs_rows; + + DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex + << treeNodePtr.p->m_send.m_ref + << ", m_node_no=" << treeNodePtr.p->m_node_no + << ", senderData: " << req->senderData); #ifdef DEBUG_SCAN_FRAGREQ - printSCANFRAGNEXTREQ(stdout, &signal->theData[0], - ScanFragNextReq:: SignalLength + 1, DBLQH); + printSCANFRAGNEXTREQ(stdout, &signal->theData[0], + ScanFragNextReq:: SignalLength + 1, DBLQH); #endif - req->senderData = fragPtr.i; - sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal, - ScanFragNextReq::SignalLength + 1, - JBB); + req->senderData = fragPtr.i; + sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal, + ScanFragNextReq::SignalLength + 1, + JBB); sentFragCount++; } list.next(fragPtr); } } + Uint32 frags_started = 0; if (sentFragCount < data.m_parallelism) { /** @@ -6018,25 +6147,29 @@ Dbspj::scanIndex_execSCAN_NEXTREQ(Signal */ jam(); ndbassert(data.m_frags_not_started != 0); - scanIndex_send(signal, - requestPtr, - treeNodePtr, - data.m_parallelism - sentFragCount, - org->batch_size_bytes/data.m_parallelism, - bs_rows, - batchRange); + frags_started = + scanIndex_send(signal, + requestPtr, + treeNodePtr, + data.m_parallelism - sentFragCount, + org->batch_size_bytes/data.m_parallelism, + bs_rows, + batchRange); } /** - * cursor should not have been positioned here... - * unless we actually had something more to send. - * so require that we did actually send something + * sendSignal() or scanIndex_send() might have failed to send: + * Check that we really did send something before + * updating outstanding & active. */ - ndbrequire(data.m_frags_outstanding > 0); - ndbrequire(data.m_batch_chunks > 0); - data.m_batch_chunks++; + if (likely(sentFragCount+frags_started > 0)) + { + jam(); + ndbrequire(data.m_batch_chunks > 0); + data.m_batch_chunks++; - requestPtr.p->m_outstanding++; - ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE); + requestPtr.p->m_outstanding++; + ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE); + } } void @@ -7216,7 +7349,6 @@ Dbspj::parseDA(Build_context& ctx, * - 17051: Fail on parseDA if 'isLeaf' * - 17052: Fail on parseDA if treeNode not root * - 17053: Fail on parseDA at a random node of the query tree - * - */ if (ERROR_INSERTED_CLEAR(17050) || ((treeNodePtr.p->isLeaf() && ERROR_INSERTED_CLEAR(17051))) || === modified file 'storage/ndb/src/ndbapi/NdbQueryBuilder.cpp' --- a/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-02-23 15:41:31 +0000 +++ b/storage/ndb/src/ndbapi/NdbQueryBuilder.cpp 2012-03-22 14:18:01 +0000 @@ -1033,13 +1033,6 @@ NdbQueryBuilder::scanIndex(const NdbDict returnErrIf(!m_impl.m_operations[0]->isScanOperation(), QRY_WRONG_OPERATION_TYPE); - // If the root is a sorted scan, we should not add another scan. - const NdbQueryOptions::ScanOrdering rootOrder = - m_impl.m_operations[0]->getOrdering(); - returnErrIf(rootOrder == NdbQueryOptions::ScanOrdering_ascending || - rootOrder == NdbQueryOptions::ScanOrdering_descending, - QRY_MULTIPLE_SCAN_SORTED); - if (options != NULL) { // A child scan should not be sorted. === modified file 'storage/ndb/src/ndbapi/NdbQueryOperation.cpp' --- a/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2012-02-23 15:41:31 +0000 +++ b/storage/ndb/src/ndbapi/NdbQueryOperation.cpp 2012-03-22 14:18:01 +0000 @@ -122,6 +122,7 @@ public: explicit TupleCorrelation(Uint32 val) : m_correlation(val) {} + Uint32 toUint32() const { return m_correlation; } @@ -3078,6 +3079,18 @@ NdbQueryImpl::doSend(int nodeId, bool la batchByteSize); assert(batchRows==root.getMaxBatchRows()); assert(batchRows<=batchByteSize); + + /** + * Check if query is a sorted scan-scan. + * Ordering can then only be guarented by restricting + * parent batch to contain single rows. + * (Child scans will have 'normal' batch size). + */ + if (root.getOrdering() != NdbQueryOptions::ScanOrdering_unordered && + getQueryDef().getQueryType() == NdbQueryDef::MultiScanQuery) + { + batchRows = 1; + } ScanTabReq::setScanBatch(reqInfo, batchRows); scanTabReq->batch_byte_size = batchByteSize; scanTabReq->first_batch_size = batchRows; @@ -5106,16 +5119,6 @@ NdbQueryOperationImpl::setOrdering(NdbQu return -1; } - /* Check if query is sorted and has multiple scan operations. This - * combination is not implemented. - */ - if (ordering != NdbQueryOptions::ScanOrdering_unordered && - getQueryDef().getQueryType() == NdbQueryDef::MultiScanQuery) - { - getQuery().setErrorCode(QRY_MULTIPLE_SCAN_SORTED); - return -1; - } - m_ordering = ordering; return 0; } // NdbQueryOperationImpl::setOrdering() === modified file 'storage/ndb/test/ndbapi/testSpj.cpp' --- a/storage/ndb/test/ndbapi/testSpj.cpp 2012-03-01 15:13:54 +0000 +++ b/storage/ndb/test/ndbapi/testSpj.cpp 2012-03-22 14:18:01 +0000 @@ -31,7 +31,7 @@ static int faultToInject = 0; enum faultsToInject { FI_START = 17001, - FI_END = 17063 + FI_END = 17093 }; int @@ -120,7 +120,9 @@ runLookupJoinError(NDBT_Context* ctx, ND 17030, 17031, 17032, // LQHKEYREQ reply is LQHKEYREF('Invalid..') 17040, 17041, 17042, // lookup_parent_row -> OutOfQueryMemory 17050, 17051, 17052, 17053, // parseDA -> outOfSectionMem - 17060, 17061, 17062, 17063 // scanIndex_parent_row -> outOfSectionMem + 17060, 17061, 17062, 17063, // scanIndex_parent_row -> outOfSectionMem + 17070, 17071, 17072, // lookup_send.dupsec -> outOfSectionMem + 17080, 17081, 17082 // lookup_parent_row -> OutOfQueryMemory }; loops = faultToInject ? 1 : sizeof(lookupFaults)/sizeof(int); @@ -206,7 +208,10 @@ runScanJoinError(NDBT_Context* ctx, NDBT 17030, 17031, 17032, // LQHKEYREQ reply is LQHKEYREF('Invalid..') 17040, 17041, 17042, // lookup_parent_row -> OutOfQueryMemory 17050, 17051, 17052, 17053, // parseDA -> outOfSectionMem - 17060, 17061, 17062, 17063 // scanIndex_parent_row -> outOfSectionMem + 17060, 17061, 17062, 17063, // scanIndex_parent_row -> outOfSectionMem + 17070, 17071, 17072, // lookup_send.dupsec -> outOfSectionMem + 17080, 17081, 17082, // lookup_parent_row -> OutOfQueryMemory + 17090, 17091, 17092, 17093 // scanIndex_send -> OutOfQueryMemory }; loops = faultToInject ? 1 : sizeof(scanFaults)/sizeof(int); @@ -921,32 +926,6 @@ NegativeTest::runGraphTest() const builder->destroy(); return NDBT_FAILED; } - builder->destroy(); - } - - // Try adding a child scan to a sorted query. - { - NdbQueryBuilder* const builder = NdbQueryBuilder::create(); - - NdbQueryOptions parentOptions; - parentOptions.setOrdering(NdbQueryOptions::ScanOrdering_ascending); - - const NdbQueryIndexScanOperationDef* parentOperation - = builder->scanIndex(m_nt1OrdIdx, m_nt1Tab, NULL, &parentOptions); - ASSERT_ALWAYS(parentOperation != NULL); - - const NdbQueryOperand* const childOperands[] = - {builder->linkedValue(parentOperation, "ui1"), - NULL}; - const NdbQueryIndexBound bound(childOperands); - - if (builder->scanIndex(m_nt1OrdIdx, m_nt1Tab, &bound) != NULL || - builder->getNdbError().code != QRY_MULTIPLE_SCAN_SORTED) - { - g_err << "Sorted query with scan child scan gave unexpected result."; - builder->destroy(); - return NDBT_FAILED; - } builder->destroy(); } === modified file 'storage/ndb/test/tools/spj_sanity_test.cpp' --- a/storage/ndb/test/tools/spj_sanity_test.cpp 2011-04-14 08:59:45 +0000 +++ b/storage/ndb/test/tools/spj_sanity_test.cpp 2012-03-22 14:18:01 +0000 @@ -63,8 +63,8 @@ #define QRY_SCAN_ORDER_ALREADY_SET 4821 #define QRY_PARAMETER_HAS_WRONG_TYPE 4822 #define QRY_CHAR_PARAMETER_TRUNCATED 4823 -#define QRY_MULTIPLE_SCAN_BRANCHES 4824 -#define QRY_MULTIPLE_SCAN_SORTED 4825 +#define QRY_MULTIPLE_SCAN_SORTED 4824 +#define QRY_BATCH_SIZE_TOO_SMALL 4825 namespace SPJSanityTest{ No bundle (reason: useless for push emails).