From: Ole John Aske Date: November 10 2011 2:09pm Subject: bzr push into mysql-trunk-cluster branch (ole.john.aske:3405 to 3406) List-Archive: http://lists.mysql.com/commits/141797 Message-Id: <20111110140913.9875F233@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3406 Ole John Aske 2011-11-10 MRR refactoring of ha_ndbcluster::multi_range_read_info() and ha_ndbcluster::multi_range_read_info_const() to use the same pattern as the DS-MRR implementation of these methods (DsMrr_impl::dsmrr_info_[const]) This should simplify future maintenance if this code if/when it changes. modified: sql/ha_ndbcluster.cc sql/ha_ndbcluster.h 3405 Ole John Aske 2011-11-10 [merge] Cherry picked fix for Bug#13330645 into mysql-trunk-cluster (EXPLAIN OF 'USING MRR' IS INCORRECT IF HANDLER NATIVELY SUPPORTS 'HA_MRR_SORTED') BEWARE: This fix is not yet reviewed, so we may have to update the MCP-tagged code based on review comments. However, we need this fixed in order to continue with integrating the new MRR interface with trunk-cluster. Updated lots of resultfiles which now will explain 'Using MRR'. modified: mysql-test/suite/ndb/r/ndb_condition_pushdown.result mysql-test/suite/ndb/r/ndb_join_pushdown.result mysql-test/suite/ndb/r/ndb_read_multi_range.result mysql-test/suite/ndb/r/ndb_row_count.result mysql-test/suite/ndb/r/ndb_statistics0.result mysql-test/suite/ndb/r/ndb_statistics1.result sql/ha_ndbcluster.cc sql/handler.cc sql/handler.h sql/opt_explain.cc sql/opt_range.cc === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2011-11-10 13:08:24 +0000 +++ b/sql/ha_ndbcluster.cc 2011-11-10 14:08:18 +0000 @@ -14409,9 +14409,9 @@ multi_range_put_custom(HANDLER_BUFFER *b */ static my_bool read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info, - const KEY_MULTI_RANGE *r) + const KEY_MULTI_RANGE *r, bool is_pushed) { - if (cur_index_type == ORDERED_INDEX) + if (cur_index_type == ORDERED_INDEX || is_pushed) return TRUE; if (cur_index_type == PRIMARY_KEY_INDEX || cur_index_type == UNIQUE_INDEX) @@ -14445,116 +14445,45 @@ read_multi_needs_scan(NDB_INDEX_TYPE cur ha_rows ha_ndbcluster::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges_arg, uint *bufsz, + uint n_ranges, uint *bufsz, uint *flags, COST_VECT *cost) { - KEY_MULTI_RANGE range; - range_seq_t seq_it; - ha_rows rows, total_rows= 0; - uint n_ranges=0; - bool null_ranges= FALSE; - THD *thd= current_thd; - NDB_INDEX_TYPE key_type= get_index_type(keyno); - KEY* key_info= table->key_info + keyno; - ulong reclength= table_share->reclength; - ulong total_bufsize; - uint save_bufsize= *bufsz; - DBUG_ENTER("ha_ndbcluster::multi_range_read_info_const"); + ha_rows rows; + uint def_flags= *flags; + uint def_bufsz= *bufsz; - total_bufsize= multi_range_fixed_size(n_ranges_arg); + DBUG_ENTER("ha_ndbcluster::multi_range_read_info_const"); - seq_it= seq->init(seq_init_param, n_ranges, *flags); - while (!seq->next(seq_it, &range)) + /* Get cost/flags/mem_usage of default MRR implementation */ + rows= handler::multi_range_read_info_const(keyno, seq, seq_init_param, + n_ranges, &def_bufsz, + &def_flags, cost); + if (unlikely(rows == HA_POS_ERROR)) { - if (unlikely(thd->killed != 0)) - DBUG_RETURN(HA_POS_ERROR); - - n_ranges++; - key_range *min_endp= range.start_key.length? &range.start_key : NULL; - key_range *max_endp= range.end_key.length? &range.end_key : NULL; - null_ranges|= (range.range_flag & NULL_RANGE); - if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) - rows= 1; /* there can be at most one row */ - else - { - if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, - max_endp))) - { - /* Can't scan one range => can't do MRR scan at all */ - total_rows= HA_POS_ERROR; - break; - } - } - total_rows+= rows; - total_bufsize+= - multi_range_max_entry((read_multi_needs_scan(key_type, key_info, &range) ? - ORDERED_INDEX : - UNIQUE_INDEX), - reclength); - } - - if (total_rows != HA_POS_ERROR) - { - if (uses_blob_value(table->read_set) || - ((get_index_type(keyno) == UNIQUE_INDEX && - has_null_in_unique_index(keyno)) && null_ranges)) - { - /* Use default MRR implementation */ - *flags|= HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= 0; - } - else - { - total_bufsize+= multi_range_fixed_size(total_rows); + DBUG_RETURN(rows); + } - DBUG_PRINT("info", ("MRR bufsize suggested=%u want=%lu limit=%d", - save_bufsize, total_bufsize, - (*flags & HA_MRR_LIMITS) != 0)); - - if (unlikely(total_bufsize > (ulong)UINT_MAX)) - total_bufsize= (ulong)UINT_MAX; - - /* - We'll be most efficient when we have buffer big enough to accomodate - all ranges. But we need at least sufficient buffer for one range to - do MRR at all. - */ - uint entry_size= multi_range_max_entry(key_type, reclength); - uint min_total_size= entry_size + multi_range_fixed_size(1); - if (save_bufsize < min_total_size) - { - if(*flags & HA_MRR_LIMITS) - { - /* Too small buffer limit to do MRR. */ - *flags|= HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= 0; - } - else - { - *flags&= ~HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= min_total_size; - } - } - else - { - *flags&= ~HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= min(save_bufsize, total_bufsize); - } - } - DBUG_PRINT("info", ("MRR bufsize set to %u", *bufsz)); - cost->zero(); - cost->avg_io_cost= 1; /* assume random seeks */ - if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) - cost->io_count= index_only_read_time(keyno, total_rows); - else - cost->io_count= read_time(keyno, n_ranges, total_rows); - cost->cpu_cost= total_rows * ROW_EVALUATE_COST + 0.01; + /* + If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is + an order to use the default MRR implementation. + Otherwise, make a choice based on requested *flags, handler + capabilities, cost and mrr* flags of @@optimizer_switch. + */ + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, n_ranges, rows, bufsz, flags, cost)) + { + DBUG_PRINT("info", ("Default MRR implementation choosen")); + *flags= def_flags; + *bufsz= def_bufsz; + DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL); } - DBUG_RETURN(total_rows); + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("NDB-MRR implementation choosen")); + DBUG_ASSERT(!(*flags & HA_MRR_USE_DEFAULT_IMPL)); + } + DBUG_RETURN(rows); } @@ -14566,62 +14495,165 @@ ha_ndbcluster::multi_range_read_info_con */ ha_rows -ha_ndbcluster::multi_range_read_info(uint keyno, uint n_ranges, uint keys, +ha_ndbcluster::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, uint *bufsz, uint *flags, COST_VECT *cost) { ha_rows res; - uint save_bufsize= *bufsz; + uint def_flags= *flags; + uint def_bufsz= *bufsz; + DBUG_ENTER("ha_ndbcluster::multi_range_read_info"); - res= handler::multi_range_read_info(keyno, n_ranges, keys, bufsz, flags, + /* Get cost/flags/mem_usage of default MRR implementation */ + res= handler::multi_range_read_info(keyno, n_ranges, n_rows, &def_bufsz, &def_flags, cost); + if (unlikely(res == HA_POS_ERROR)) + { + /* Default implementation can't perform MRR scan => we can't either */ + DBUG_RETURN(res); + } + DBUG_ASSERT(!res); + + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, n_ranges, n_rows, bufsz, flags, cost)) + { + /* Default implementation is choosen */ + DBUG_PRINT("info", ("Default MRR implementation choosen")); + *flags= def_flags; + *bufsz= def_bufsz; + DBUG_ASSERT(*flags & HA_MRR_USE_DEFAULT_IMPL); + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("NDB-MRR implementation choosen")); + DBUG_ASSERT(!(*flags & HA_MRR_USE_DEFAULT_IMPL)); + } + DBUG_RETURN(res); +} + +/** + Internals: Choose between Default MRR implementation and + native ha_ndbcluster MRR + + Make the choice between using Default MRR implementation and ha_ndbcluster-MRR. + This function contains common functionality factored out of multi_range_read_info() + and multi_range_read_info_const(). The function assumes that the default MRR + implementation's applicability requirements are satisfied. + + @param keyno Index number + @param n_ranges Number of ranges/keys (i.e. intervals) in the range sequence. + @param n_rows E(full rows to be retrieved) + @param bufsz OUT If DS-MRR is choosen, buffer use of DS-MRR implementation + else the value is not modified + @param flags IN MRR flags provided by the MRR user + OUT If DS-MRR is choosen, flags of DS-MRR implementation + else the value is not modified + @param cost IN Cost of default MRR implementation + OUT If DS-MRR is choosen, cost of DS-MRR scan + else the value is not modified + + @retval TRUE Default MRR implementation should be used + @retval FALSE NDB-MRR implementation should be used +*/ + +bool ha_ndbcluster::choose_mrr_impl(uint keyno, uint n_ranges, ha_rows n_rows, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + THD *thd= current_thd; NDB_INDEX_TYPE key_type= get_index_type(keyno); + /* Disable MRR on blob read and on NULL lookup in unique index. */ - if (uses_blob_value(table->read_set) || + if (!thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) || + uses_blob_value(table->read_set) || ( key_type == UNIQUE_INDEX && has_null_in_unique_index(keyno) && !(*flags & HA_MRR_NO_NULL_ENDPOINTS))) { - *flags|= HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= 0; + /* Use the default implementation, don't modify args: See comments */ + return true; } - else + + /** + * Calculate *bufsz, fallback to default MRR if we can't allocate + * suffient buffer space for NDB-MRR + */ { + uint save_bufsize= *bufsz; ulong reclength= table_share->reclength; uint entry_size= multi_range_max_entry(key_type, reclength); uint min_total_size= entry_size + multi_range_fixed_size(1); DBUG_PRINT("info", ("MRR bufsize suggested=%u want=%u limit=%d", - save_bufsize, (keys + 1) * entry_size, + save_bufsize, (uint)(n_rows + 1) * entry_size, (*flags & HA_MRR_LIMITS) != 0)); if (save_bufsize < min_total_size) { - if(*flags & HA_MRR_LIMITS) - { - /* Too small buffer limit to do MRR. */ - *flags|= HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= 0; - } - else + if (*flags & HA_MRR_LIMITS) { - *flags&= ~HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; - *bufsz= min_total_size; + /* Too small buffer limit for native NDB-MRR. */ + return true; } + *bufsz= min_total_size; } else { - *flags&= ~HA_MRR_USE_DEFAULT_IMPL; - *flags|= HA_MRR_SUPPORT_SORTED; + uint max_ranges= (n_ranges > 0) ? n_ranges : MRR_MAX_RANGES; *bufsz= min(save_bufsize, - keys * entry_size + multi_range_fixed_size(n_ranges)); + n_rows * entry_size + multi_range_fixed_size(max_ranges)); } DBUG_PRINT("info", ("MRR bufsize set to %u", *bufsz)); } - DBUG_RETURN(res); + + /** + * Cost based MRR optimization is known to be incorrect. + * Disabled - always use NDB-MRR whenever possible + */ +if (false) +{ + /** + * FIXME: Cost calculation is a copy of current default-MRR + * cost calculation. (Which also is incorrect!) + * TODO: We have to invent our own metrics for NDB-MRR. + */ + COST_VECT mrr_cost; + mrr_cost.zero(); + mrr_cost.avg_io_cost= 1; /* assume random seeks */ + if ((*flags & HA_MRR_INDEX_ONLY) && n_rows > 2) + mrr_cost.io_count= index_only_read_time(keyno, n_rows); + else + mrr_cost.io_count= read_time(keyno, n_ranges, n_rows); + mrr_cost.cpu_cost= n_rows * ROW_EVALUATE_COST + 0.01; + + bool force_mrr; + /* + If @@optimizer_switch has "mrr" on and "mrr_cost_based" off, then set cost + of DS-MRR to be minimum of DS-MRR and Default implementations cost. This + allows one to force use of DS-MRR whenever it is applicable without + affecting other cost-based choices. + */ + if ((force_mrr= + (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR) && + !thd->optimizer_switch_flag(OPTIMIZER_SWITCH_MRR_COST_BASED))) && + mrr_cost.total_cost() > cost->total_cost()) + { + mrr_cost= *cost; + } + if (!force_mrr && mrr_cost.total_cost() > cost->total_cost()) + { + /* Use the default MRR implementation */ + return true; + } + *cost= mrr_cost; +} // if (false) + + /* Use the NDB-MRR implementation */ + *flags&= ~HA_MRR_USE_DEFAULT_IMPL; + *flags|= HA_MRR_SUPPORT_SORTED; + + return false; } + int ha_ndbcluster::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, uint n_ranges, uint mode, @@ -14704,8 +14736,15 @@ int ha_ndbcluster::multi_range_start_ret const NdbOperation *oplist[MRR_MAX_RANGES]; uint num_keyops= 0; NdbTransaction *trans= m_thd_ndb->trans; + bool is_pushed= false; int error; +#ifndef NDB_WITHOUT_JOIN_PUSHDOWN + is_pushed= check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, + active_index, + !m_active_query && mrr_is_output_sorted); +#endif + DBUG_ENTER("multi_range_start_retrievals"); /* @@ -14746,7 +14785,8 @@ int ha_ndbcluster::multi_range_start_ret */ uint min_entry_size= multi_range_entry_size(!read_multi_needs_scan(cur_index_type, key_info, - &mrr_cur_range), reclength); + &mrr_cur_range, is_pushed), + reclength); ulong bufsize= end_of_buffer - multi_range_buffer->buffer; int max_range= multi_range_max_ranges(ranges_in_seq, bufsize - min_entry_size); @@ -14768,10 +14808,7 @@ int ha_ndbcluster::multi_range_start_ret if (range_no >= max_range) break; my_bool need_scan= - read_multi_needs_scan(cur_index_type, key_info, &mrr_cur_range) || - // Pushed joins restricted to ordered range scan in mrr - (m_pushed_join_operation==PUSHED_ROOT && - m_pushed_join_member->get_query_def().isScanQuery()); + read_multi_needs_scan(cur_index_type, key_info, &mrr_cur_range, is_pushed); if (row_buf + multi_range_entry_size(!need_scan, reclength) > end_of_buffer) break; if (need_scan) @@ -14849,9 +14886,7 @@ int ha_ndbcluster::multi_range_start_ret #ifndef NDB_WITHOUT_JOIN_PUSHDOWN /* Create the scan operation for the first scan range. */ - if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, - active_index, - !m_active_query && mrr_is_output_sorted)) + if (is_pushed) { DBUG_ASSERT(!m_read_before_write_removal_used); if (!m_active_query) === modified file 'sql/ha_ndbcluster.h' --- a/sql/ha_ndbcluster.h 2011-11-02 08:49:13 +0000 +++ b/sql/ha_ndbcluster.h 2011-11-10 14:08:18 +0000 @@ -295,6 +295,11 @@ class ha_ndbcluster: public handler ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, uint *bufsz, uint *flags, COST_VECT *cost); private: + bool choose_mrr_impl(uint keyno, uint n_ranges, ha_rows n_rows, + uint *bufsz, uint *flags, + COST_VECT *cost); + +private: uint first_running_range; uint first_range_in_batch; uint first_unstarted_range; No bundle (reason: useless for push emails).