From: Ole John Aske Date: March 23 2012 1:34pm Subject: bzr push into mysql-trunk-cluster branch (ole.john.aske:3458 to 3459) List-Archive: http://lists.mysql.com/commits/143304 Message-Id: <20120323133448.B08D9244@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3459 Ole John Aske 2012-03-23 SPJ: Addapt 'Abstract query plan' interface and SPJ-optimizer integration to latest review comments. These changes has been made possible by rewrites introduced by WL5558 and adding native sorted-scan-scan support to the SPJ interface: - AQP::Join_plan* argument to several methods can now be made 'const' as AQP will not any longer modify the query plan. - Join_plan::group_by_filesort_is_skippable() & ::order_by_filesort_is_skippable() has become obsolete. - Integrate optimizer code which sets up the pushed join version of *read_first_record accessor functions into pick_table_access_method(). - Change optimizer such that pick_table_access_method() is called at the end of the optimizer phase when the complete query plan is known (Including pushed join execution) modified: mysql-test/suite/ndb/t/ndb_join_pushdown.inc sql/abstract_query_plan.cc sql/abstract_query_plan.h sql/ha_ndbcluster.cc sql/handler.cc sql/handler.h sql/sql_executor.cc sql/sql_executor.h sql/sql_optimizer.cc sql/sql_select.cc 3458 Ole John Aske 2012-03-23 Cherry picked implementation of native support of pushed sorted-scan-scan joins which has already been pushe dto mysql-5.5-cluster-7.2-spj modified: mysql-test/suite/ndb/r/ndb_join_pushdown_default.result mysql-test/suite/ndb/r/ndb_join_pushdown_nobnl.result mysql-test/suite/ndb/r/ndb_join_pushdown_none.result sql/abstract_query_plan.cc sql/abstract_query_plan.h sql/ha_ndbcluster.cc sql/ha_ndbcluster.h sql/ha_ndbcluster_push.cc sql/ha_ndbcluster_push.h sql/handler.h sql/sql_optimizer.cc storage/ndb/src/ndbapi/NdbQueryBuilder.cpp storage/ndb/src/ndbapi/NdbQueryOperation.cpp storage/ndb/test/ndbapi/testSpj.cpp storage/ndb/test/tools/spj_sanity_test.cpp === modified file 'mysql-test/suite/ndb/t/ndb_join_pushdown.inc' --- a/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-03-05 14:02:05 +0000 +++ b/mysql-test/suite/ndb/t/ndb_join_pushdown.inc 2012-03-23 13:33:58 +0000 @@ -786,14 +786,14 @@ explain extended (x.a <= 2 or (x.a,x.b) in ((0,0),(5,0),(4,3))) and y.a=x.d and y.b=x.b; -# Test ORDER BY expressons +# Test ORDER BY expressions # Filesort on pushed joins are not possible as the # read of rows to be filesorted will also prefetch rows from pushed child # operands. These are not cached by the filesort buffer mechanisnm and are # effectively lost. # With pushed joins we either has to: # 1) find a suitable ordered index which we can create an ordered indexscan on -# (-> joinType() -> JT_NExT, or type: 'index' w/ explain) +# (-> joinType() -> JT_NEXT, or type: 'index' w/ explain) # or: # 2) Use a temporary result file for the result, which is then filesort'ed # (-> 'Using temporary; Using filesort') === modified file 'sql/abstract_query_plan.cc' --- a/sql/abstract_query_plan.cc 2012-03-23 12:15:24 +0000 +++ b/sql/abstract_query_plan.cc 2012-03-23 13:33:58 +0000 @@ -63,22 +63,6 @@ namespace AQP return m_join_tabs + join_tab_no; } - bool - Join_plan::group_by_filesort_is_skippable() const - { - const JOIN* const join= m_join_tabs->join; - return (join->group_list && join->simple_group && - join->ordered_index_usage==JOIN::ordered_index_group_by); - } - - bool - Join_plan::order_by_filesort_is_skippable() const - { - const JOIN* const join= m_join_tabs->join; - return (join->order && join->simple_order && - join->ordered_index_usage==JOIN::ordered_index_order_by); - } - /** Determine join type between this table access and some other table access that preceeds it in the join plan.. @@ -308,7 +292,12 @@ namespace AQP "'PROCEDURE'-clause post processing cannot be pushed."; DBUG_VOID_RETURN; } - + + /** + * OLEJA: I think this restriction can be removed + * now as WL5558 and other changes has cleaned up the + * ORDER/GROUP BY optimize + execute path. + */ if (join->group_list && !join->tmp_table_param.quick_group) { m_access_type= AT_OTHER; === modified file 'sql/abstract_query_plan.h' --- a/sql/abstract_query_plan.h 2012-03-23 12:15:24 +0000 +++ b/sql/abstract_query_plan.h 2012-03-23 13:33:58 +0000 @@ -71,14 +71,6 @@ namespace AQP uint get_access_count() const; - /** - Can filesort(), normally required by execution of GROUP BY - or ORDER BY, be skipped due to the columns already being - accessible in required sorted order. - */ - bool group_by_filesort_is_skippable() const; - bool order_by_filesort_is_skippable() const; - private: /** Array of the JOIN_TABs that are the internal representation of table === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2012-03-23 12:15:24 +0000 +++ b/sql/ha_ndbcluster.cc 2012-03-23 13:33:58 +0000 @@ -811,7 +811,10 @@ SHOW_VAR ndb_status_index_stat_variables }; #ifndef NDB_WITHOUT_JOIN_PUSHDOWN -static int ndbcluster_make_pushed_join(handlerton *, THD*,AQP::Join_plan*, uint*); +static int ndbcluster_make_pushed_join(handlerton *, + THD*, + const AQP::Join_plan*, + uint*); #endif /* @@ -14826,7 +14829,7 @@ ha_ndbcluster::read_multi_range_fetch_ne static int ndbcluster_make_pushed_join(handlerton *hton, THD* thd, - AQP::Join_plan* plan, + const AQP::Join_plan* plan, uint* pushed) { DBUG_ENTER("ndbcluster_make_pushed_join"); === modified file 'sql/handler.cc' --- a/sql/handler.cc 2012-03-09 11:07:20 +0000 +++ b/sql/handler.cc 2012-03-23 13:33:58 +0000 @@ -4347,9 +4347,9 @@ int ha_table_exists_in_engine(THD* thd, */ struct st_make_pushed_join_args { - AQP::Join_plan* plan; // Query plan provided by optimizer - uint pushed; // #operations which was pushed. - int err; // Error code to return. + const AQP::Join_plan* plan; // Query plan provided by optimizer + uint pushed; // #operations which was pushed. + int err; // Error code to return. }; static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin, @@ -4372,7 +4372,7 @@ static my_bool make_pushed_join_handlert return FALSE; } -int ha_make_pushed_joins(THD *thd, AQP::Join_plan* plan, uint* pushed) +int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan, uint* pushed) { DBUG_ENTER("ha_make_pushed_joins"); st_make_pushed_join_args args= {plan, 0, 0}; === modified file 'sql/handler.h' --- a/sql/handler.h 2012-03-23 12:15:24 +0000 +++ b/sql/handler.h 2012-03-23 13:33:58 +0000 @@ -863,7 +863,7 @@ struct handlerton #ifndef MCP_WL4784 int (*make_pushed_join)(handlerton *hton, THD* thd, - AQP::Join_plan* plan, + const AQP::Join_plan* plan, uint* pushed); #endif #ifndef MCP_GLOBAL_SCHEMA_LOCK @@ -2790,7 +2790,7 @@ int ha_release_savepoint(THD *thd, SAVEP #ifndef MCP_WL4784 /* Build pushed joins in handlers implementing this feature */ -int ha_make_pushed_joins(THD *thd, AQP::Join_plan* plan, uint* pushed); +int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan, uint* pushed); #endif /* these are called by storage engines */ === modified file 'sql/sql_executor.cc' --- a/sql/sql_executor.cc 2012-03-09 11:07:20 +0000 +++ b/sql/sql_executor.cc 2012-03-23 13:33:58 +0000 @@ -140,37 +140,6 @@ JOIN::exec() THD_STAGE_INFO(thd, stage_executing); -#ifndef MCP_WL4784 - int active_pushed_joins= 0; - - // Set up table accessors for child operations of pushed joins - for (uint i=const_tables ; i < tables ; i++) - { - JOIN_TAB *tab=join_tab+i; - - uint pushed_joins= tab->table->file->number_of_pushed_joins(); - if (pushed_joins > 0) - { - if (tab->table->file->root_of_pushed_join() == tab->table) - { - active_pushed_joins += pushed_joins; - } - else - { - // Is child of a pushed join operation: - // Replace 'read_key' access with its linked counterpart - // ... Which is effectively a NOOP as the row is read as part of the linked operation - tab->read_first_record= join_read_linked_first; - DBUG_ASSERT(tab->read_record.read_record != join_read_next_same_or_null); - tab->read_record.read_record= join_read_linked_next; - tab->read_record.unlock_row= rr_unlock_row; // FIXME: likely incorrect - } - active_pushed_joins--; - } - } - DBUG_ASSERT(active_pushed_joins==0); -#endif // MCP_WL4784 - if (prepare_result(&columns_list)) DBUG_VOID_RETURN; @@ -3139,7 +3108,7 @@ join_read_last_key(JOIN_TAB *tab) /* ARGSUSED */ static int -join_no_more_records(READ_RECORD *info __attribute__((unused))) +join_no_more_records(READ_RECORD *info) { #ifndef MCP_WL4784 /** @@ -3417,9 +3386,47 @@ join_read_next_same_or_null(READ_RECORD @param tab Table reference to put access method */ +#ifndef MCP_WL4784 +void +pick_table_access_method(JOIN_TAB *tab, int *active_pushed_joins) +{ + uint pushed_joins= tab->table->file->number_of_pushed_joins(); + + /** + Set up modified access function for pushed joins. + */ + if (pushed_joins > 0) + { + active_pushed_joins--; + if (tab->table->file->root_of_pushed_join() == tab->table) + { + *active_pushed_joins += pushed_joins; + } + else + { + // Is child of a pushed join operation: + // Replace 'read_key' access with its linked counterpart + // ... Which is effectively a NOOP as the row is read + // as part of the linked operation + DBUG_ASSERT(tab->type != JT_REF_OR_NULL); + tab->read_first_record= join_read_linked_first; + tab->read_record.read_record= join_read_linked_next; + tab->read_record.unlock_row= rr_unlock_row; + return; + } + } + + // Already set to some non-default value in sql_select.cc + // TODO: Move these settings into pick_table_access_method() also + else if (tab->read_first_record != NULL) + return; +#else + void pick_table_access_method(JOIN_TAB *tab) { +#endif // MCP_WL4784 + switch (tab->type) { case JT_REF: === modified file 'sql/sql_executor.h' --- a/sql/sql_executor.h 2011-12-14 12:32:55 +0000 +++ b/sql/sql_executor.h 2012-03-23 13:33:58 +0000 @@ -125,7 +125,12 @@ int report_error(TABLE *table, int error int safe_index_read(JOIN_TAB *tab); SORT_FIELD * make_unireg_sortorder(ORDER *order, uint *length, SORT_FIELD *sortorder); + +#ifndef MCP_WL4784 +void pick_table_access_method(JOIN_TAB *tab, int *active_pushed_joins); +#else void pick_table_access_method(JOIN_TAB *tab); +#endif int join_read_const_table(JOIN_TAB *tab, POSITION *pos); void join_read_key_unlock_row(st_join_table *tab); === modified file 'sql/sql_optimizer.cc' --- a/sql/sql_optimizer.cc 2012-03-23 12:15:24 +0000 +++ b/sql/sql_optimizer.cc 2012-03-23 13:33:58 +0000 @@ -950,6 +950,17 @@ JOIN::optimize() #ifndef MCP_WL4784 if (make_pushed_join(thd, this)) DBUG_RETURN(1); + + { + int active_pushed_joins= 0; + for (uint i= this->const_tables; i < this->tables; i++) + { + JOIN_TAB *const tab= this->join_tab+i; + pick_table_access_method (tab, &active_pushed_joins); + } + /* Ensure all (if any) pushed joins were processed. */ + DBUG_ASSERT(!active_pushed_joins); + } #endif tmp_having= having; @@ -984,6 +995,17 @@ setup_subq_exit: } #ifndef MCP_WL4784 +/** + Push join to handler, if possible. Currently this is only supported by + NDB. + + @note If it happens so that handler can't do ordering this function + will force tmp table creation and resolving ORDER/GROUP BY by filesort. + + @returns + 0 ok, join isn't pushed or pushed without errors. + error handler's error otherwise +*/ static int make_pushed_join(THD *thd, JOIN *join) { @@ -1004,12 +1026,14 @@ make_pushed_join(THD *thd, JOIN *join) if (join->const_tables < join->tables && join->join_tab[join->const_tables].table->file->number_of_pushed_joins() > 0) { - if (join->group_list && join->simple_group && !plan.group_by_filesort_is_skippable()) + if (join->group_list && join->simple_group && + join->ordered_index_usage!=JOIN::ordered_index_group_by) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; } - else if (join->order && join->simple_order && !plan.order_by_filesort_is_skippable()) + else if (join->order && join->simple_order && + join->ordered_index_usage!=JOIN::ordered_index_order_by) { join->need_tmp= 1; join->simple_order= join->simple_group= 0; === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2012-03-09 11:07:20 +0000 +++ b/sql/sql_select.cc 2012-03-23 13:33:58 +0000 @@ -2633,7 +2633,13 @@ make_join_readinfo(JOIN *join, ulonglong tab->sorted= (tab->type != JT_EQ_REF) ? sorted : false; sorted= false; // only first must be sorted table->status= STATUS_GARBAGE | STATUS_NOT_FOUND; +#ifndef MCP_WL4784 + tab->read_first_record= NULL; // Access methods not set yet + tab->read_record.read_record= NULL; + tab->read_record.unlock_row= rr_unlock_row; +#else pick_table_access_method (tab); +#endif Opt_trace_object trace_refine_table(trace); trace_refine_table.add_utf8_table(table); @@ -3830,7 +3836,9 @@ test_if_skip_sort_order(JOIN_TAB *tab,OR goto use_filesort; DBUG_ASSERT(tab->type != JT_REF_OR_NULL && tab->type != JT_FT); +#ifdef MCP_WL4784 pick_table_access_method(tab); +#endif } else { No bundle (reason: useless for push emails).