From: Ole John Aske Date: October 21 2010 8:32am Subject: bzr push into mysql-5.1-telco-7.0-spj-scan-vs-scan branch (ole.john.aske:3321 to 3322) List-Archive: http://lists.mysql.com/commits/121484 Message-Id: <20101021083251.6931521D@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3322 Ole John Aske 2010-10-21 spj-svs: Enhanced implementation of handling 'sorted indexscan(t1) -> index ref(t2)' We previously had to block pushing of scan childs if the root operation was an ordered index scan as we are not able to provide these result ordered through the root operation. This fix is now reverted - Instead we now allow these kind of queries to be pushed, but require the result to be written to temp. file where it is filesorted. We believe the overhead of this extra filesort to be far less than not pushing the query. modified: mysql-test/suite/ndb/r/ndb_join_pushdown.result sql/abstract_query_plan.cc sql/abstract_query_plan.h sql/ha_ndbcluster.cc sql/handler.h sql/sql_select.cc 3321 Ole John Aske 2010-10-21 [merge] Merge from telco-spj branch modified: sql/ha_ndbcluster.cc === modified file 'mysql-test/suite/ndb/r/ndb_join_pushdown.result' --- a/mysql-test/suite/ndb/r/ndb_join_pushdown.result 2010-10-21 07:50:44 +0000 +++ b/mysql-test/suite/ndb/r/ndb_join_pushdown.result 2010-10-21 08:31:51 +0000 @@ -3929,8 +3929,8 @@ join t1 as x2 on x1.a=x2.b join t1 as x3 on x2.a=x3.b order by x1.pk limit 70; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE x1 index NULL PRIMARY 4 NULL 10 -1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 1 Parent of 2 pushed join@1; Using where +1 SIMPLE x1 index NULL PRIMARY 4 NULL 10 Parent of 3 pushed join@1; Using temporary; Using filesort +1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 1 Child of pushed join@1; Using where 1 SIMPLE x3 ref ix1 ix1 5 test.x2.a 1 Child of pushed join@1; Using where select x1.pk,x1.a,x1.b from t1 as x1 join t1 as x2 on x1.a=x2.b @@ -4009,8 +4009,8 @@ pk a b 2 20 20 explain select * from t1 as x1, t1 as x2 where x1.a=x2.b and x1.b = 3; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE x1 ref ix1 ix1 5 const 10 Using where with pushed condition -1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 1 Using where +1 SIMPLE x1 ref ix1 ix1 5 const 10 Parent of 2 pushed join@1; Using where with pushed condition +1 SIMPLE x2 ref ix1 ix1 5 test.x1.a 1 Child of pushed join@1; Using where select * from t1 as x1, t1 as x2 where x1.a=x2.b and x1.b = 3; pk a b pk a b drop table t1; @@ -4071,10 +4071,10 @@ counter_name spj_counts_at_end.val - spj CONST_PRUNED_RANGE_SCANS_RECEIVED 6 LOCAL_TABLE_SCANS_SENT 196 PRUNED_RANGE_SCANS_RECEIVED 16 -RANGE_SCANS_RECEIVED 205 +RANGE_SCANS_RECEIVED 203 READS_NOT_FOUND 405 READS_RECEIVED 61 -SCAN_ROWS_RETURNED 63643 +SCAN_ROWS_RETURNED 63863 TABLE_SCANS_RECEIVED 196 select sum(spj_counts_at_end.val - spj_counts_at_startup.val) as 'LOCAL+REMOTE READS_SENT' from spj_counts_at_end, spj_counts_at_startup @@ -4086,15 +4086,15 @@ LOCAL+REMOTE READS_SENT drop table spj_counts_at_startup; drop table spj_counts_at_end; scan_count -2003 +2000 pruned_scan_count 7 sorted_scan_count -44 +7 pushed_queries_defined -341 +343 pushed_queries_dropped 11 pushed_queries_executed -265 +264 set ndb_join_pushdown = @save_ndb_join_pushdown; === modified file 'sql/abstract_query_plan.cc' --- a/sql/abstract_query_plan.cc 2010-09-24 07:29:43 +0000 +++ b/sql/abstract_query_plan.cc 2010-10-21 08:31:51 +0000 @@ -449,16 +449,6 @@ namespace AQP {} /** - @return True if sorted results are expetced from this operation. - */ - bool Table_access::is_sorted() const - { - return get_join_tab()->type == JT_NEXT || - (get_join_tab()->type == JT_REF && get_join_tab()->sorted) || - (get_join_tab()->type == JT_REF_OR_NULL && get_join_tab()->sorted); - } - - /** @param plan Iterate over fields within this plan. @param field_item Iterate over Item_fields equal to this. */ === modified file 'sql/abstract_query_plan.h' --- a/sql/abstract_query_plan.h 2010-09-24 07:29:43 +0000 +++ b/sql/abstract_query_plan.h 2010-10-21 08:31:51 +0000 @@ -205,8 +205,6 @@ namespace AQP void dbug_print() const; - bool is_sorted() const; - private: /** Backref. to the Join_plan which this Table_access is part of */ === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2010-10-21 08:14:23 +0000 +++ b/sql/ha_ndbcluster.cc 2010-10-21 08:31:51 +0000 @@ -1302,13 +1302,6 @@ ha_ndbcluster::make_pushed_join(AQP::Joi DBUG_PRINT("info", ("Table %d not a pushable access type", join_cnt)); continue; } - if (join_root->is_sorted() && !is_lookup_operation(child_type)) - { - // Sorted scan cannot have scan descendant. - DBUG_PRINT("info", ("Table %d is scanned. Therefore it cannot be pushed" - " as a descendant of a sorted scan", join_cnt)); - continue; - } if (!context.field_ref_is_join_pushable(join_tab, join_items, join_parent)) { DBUG_PRINT("info", ("Table %d not REF-joined, not pushable", join_cnt)); @@ -1631,8 +1624,7 @@ private: */ bool ha_ndbcluster::check_if_pushable(const NdbQueryOperationTypeWrapper& type, - uint idx, - bool rootSorted) const + uint idx, bool needSorted) const { if (m_pushed_join == NULL) { @@ -1685,6 +1677,12 @@ ha_ndbcluster::check_if_pushable(const N case NdbQueryOperationDef::TableScan: DBUG_ASSERT (idx==MAX_KEY); + if (needSorted) + { + DBUG_PRINT("info", ("TableScan access not not be provied as sorted result. ", + "Therefore, join cannot be pushed.")); + return FALSE; + } break; case NdbQueryOperationDef::OrderedIndexScan: @@ -1692,30 +1690,12 @@ ha_ndbcluster::check_if_pushable(const N // DBUG_ASSERT(m_index[idx].index == expected_index); if (m_index[idx].index != expected_index) { - DBUG_PRINT("info", ("Actual index %s differs from expected index %s." + DBUG_PRINT("info", ("Actual index %s differs from expected index %s. " "Therefore, join cannot be pushed.", m_index[idx].index->getName(), expected_index->getName())); return FALSE; } - // Check that we do not have a sorted scan with sub scans. - if (rootSorted) - { - const NdbQueryDef& query_def = m_pushed_join->get_query_def(); - for (uint i= 1; i < query_def.getNoOfOperations(); i++) - { - const NdbQueryOperationTypeWrapper& child_type= - query_def.getQueryOperation(i)->getType(); - if (child_type == NdbQueryOperationDef::TableScan || - child_type == NdbQueryOperationDef::OrderedIndexScan) - { - DBUG_PRINT("info", ("If the root operation is a sorted scan, then " - "there may not be scan children. This join " - "cannot be pushed.")); - return FALSE; - } - } - } break; default: @@ -1902,6 +1882,46 @@ ha_ndbcluster::test_push_flag(enum ha_pu } DBUG_RETURN(false); + case HA_PUSH_NO_ORDERED_INDEX: + { + if (!m_pushed_join) + { + DBUG_RETURN(true); + } + const NdbQueryDef& query_def = m_pushed_join->get_query_def(); + const NdbQueryOperationTypeWrapper& root_type= + query_def.getQueryOperation(0U)->getType(); + + /** + * Primary key/ unique key lookup is always 'ordered' wrt. itself. + */ + if (root_type == NdbQueryOperationDef::PrimaryKeyAccess || + root_type == NdbQueryOperationDef::UniqueIndexAccess) + { + DBUG_RETURN(false); + } + + /** + * Ordered index scan can be provided as an ordered resultset iff + * it has no child scans. + */ + if (root_type == NdbQueryOperationDef::OrderedIndexScan) + { + for (uint i= 1; i < query_def.getNoOfOperations(); i++) + { + const NdbQueryOperationTypeWrapper& child_type= + query_def.getQueryOperation(i)->getType(); + if (child_type == NdbQueryOperationDef::TableScan || + child_type == NdbQueryOperationDef::OrderedIndexScan) + { + DBUG_RETURN(true); + } + } + DBUG_RETURN(false); + } + DBUG_RETURN(true); + } + default: DBUG_ASSERT(0); DBUG_RETURN(false); === modified file 'sql/handler.h' --- a/sql/handler.h 2010-10-15 21:04:14 +0000 +++ b/sql/handler.h 2010-10-21 08:31:51 +0000 @@ -441,6 +441,11 @@ enum ha_push_flag { within this pushed join */ ,HA_PUSH_MULTIPLE_DEPENDENCY + + /* Handler is unable to return the result in sorted order using an + ordered index on the parent operation. + */ + ,HA_PUSH_NO_ORDERED_INDEX }; /** === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-10-21 07:50:44 +0000 +++ b/sql/sql_select.cc 2010-10-21 08:31:51 +0000 @@ -1702,21 +1702,23 @@ make_pushed_join(THD *thd, JOIN *join) } } - /* If we just pushed a join containing an ORDER BY and/or GROUP BY clause, + /* If we just pushed a join containing an ORDER BY and/or a GROUP BY clause, * we have to ensure that we either can skip the sort by scanning an ordered index, * or write to a temp. table later being filesorted. */ if (join->const_tables < join->tables && join->join_tab[join->const_tables].table->file->is_parent_of_pushed_join()) { + const handler *ha=join->join_tab[join->const_tables].table->file; + if (join->group_list && join->simple_group && - !plan.group_by_filesort_is_skippable()) + (!plan.group_by_filesort_is_skippable() || ha->test_push_flag(HA_PUSH_NO_ORDERED_INDEX))) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; } else if (join->order && join->simple_order && - !plan.order_by_filesort_is_skippable()) + (!plan.order_by_filesort_is_skippable() || ha->test_push_flag(HA_PUSH_NO_ORDERED_INDEX))) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; @@ -12111,8 +12113,8 @@ join_read_key_unlock_row(st_join_table * /** Read a table *assumed* to be included in execution of a pushed join. - This is the counterpart of join_read_key() for child tables in a - pushed join. + This is the counterpart of join_read_key() / join_read_always_key() + for child tables in a pushed join. When the table access is performed as part of the pushed join, all 'linked' child colums are prefetched together with the parent row. @@ -12139,6 +12141,7 @@ join_read_linked_first(JOIN_TAB *tab) TABLE *table= tab->table; DBUG_ENTER("join_read_linked_first"); + DBUG_ASSERT(!tab->sorted); // Pushed child can't be sorted if (!table->file->inited) table->file->ha_index_init(tab->ref.key, tab->sorted); @@ -13973,6 +13976,7 @@ check_reverse_order: DBUG_RETURN(0); // Reverse sort not supported } select->quick=tmp; + DBUG_ASSERT(select->quick->sorted); } } else if (tab->type != JT_NEXT && tab->type != JT_REF_OR_NULL && No bundle (reason: useless for push emails).