From: Ole John Aske Date: October 25 2011 8:22am Subject: bzr push into mysql-trunk-cluster branch (ole.john.aske:3399 to 3400) List-Archive: http://lists.mysql.com/commits/141574 Message-Id: <20111025082252.45815233@fimafeng09.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3400 Ole John Aske 2011-10-25 Removed old MRR implementation from ha_ndbcluster (Dead code) modified: sql/ha_ndbcluster.cc sql/ha_ndbcluster_glue.h 3399 Ole John Aske 2011-10-25 Integrated pushed join execution into new MRR interface. modified: mysql-test/suite/ndb/r/ndb_join_pushdown.result sql/ha_ndbcluster.cc === modified file 'sql/ha_ndbcluster.cc' --- a/sql/ha_ndbcluster.cc 2011-10-25 08:09:27 +0000 +++ b/sql/ha_ndbcluster.cc 2011-10-25 08:22:25 +0000 @@ -14455,38 +14455,6 @@ int ha_ndbcluster::write_ndb_file(const DBUG_RETURN(error); } -#ifndef NDB_WITH_NEW_MRR_INTERFACE -bool -ha_ndbcluster::null_value_index_search(KEY_MULTI_RANGE *ranges, - KEY_MULTI_RANGE *end_range, - HANDLER_BUFFER *buffer) -{ - DBUG_ENTER("null_value_index_search"); - KEY* key_info= table->key_info + active_index; - KEY_MULTI_RANGE *range= ranges; - ulong reclength= table->s->reclength; - uchar *curr= (uchar*)buffer->buffer; - uchar *end_of_buffer= (uchar*)buffer->buffer_end; - - /* All passed ranges whose results could fit into the - * buffer are examined, although some may later be - * marked for skipping, wasting buffer space. - */ - assert(!(range->range_flag & SKIP_RANGE)); - - for (; rangestart_key.key; - uint key_len= range->start_key.length; - if (check_null_in_key(key_info, key, key_len)) - DBUG_RETURN(TRUE); - curr += reclength; - } - DBUG_RETURN(FALSE); -} -#endif - void ha_ndbcluster::check_read_before_write_removal() { DBUG_ENTER("check_read_before_write_removal"); @@ -14506,12 +14474,11 @@ void ha_ndbcluster::check_read_before_wr DBUG_VOID_RETURN; } -#ifdef NDB_WITH_NEW_MRR_INTERFACE + /**************************************************************************** * MRR interface implementation ***************************************************************************/ - /** We will not attempt to deal with more than this many ranges in a single MRR execute(). @@ -15070,6 +15037,7 @@ int ha_ndbcluster::multi_range_start_ret any_real_read= TRUE; DBUG_PRINT("info", ("any_real_read= TRUE")); +#ifndef NDB_WITHOUT_JOIN_PUSHDOWN /* Create the scan operation for the first scan range. */ if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, active_index, @@ -15088,8 +15056,9 @@ int ha_ndbcluster::multi_range_start_ret ERR_RETURN(query->getNdbError()); } } // check_if_pushable() - - else if (!m_multi_cursor) + else +#endif + if (!m_multi_cursor) { /* Do a multi-range index scan for ranges not done by primary/unique key. */ NdbScanOperation::ScanOptions options; @@ -15514,644 +15483,6 @@ int ha_ndbcluster::multi_range_read_next } } -#else // not 'NDB_WITH_NEW_MRR_INTERFACE' - -/* - This is used to check if an ordered index scan is needed for a range in - a multi range read. - If a scan is not needed, we use a faster primary/unique key operation - instead. -*/ -static my_bool -read_multi_needs_scan(NDB_INDEX_TYPE cur_index_type, const KEY *key_info, - const KEY_MULTI_RANGE *r) -{ - if (cur_index_type == ORDERED_INDEX) - return TRUE; - if (cur_index_type == PRIMARY_KEY_INDEX || - cur_index_type == UNIQUE_INDEX) - return FALSE; - DBUG_ASSERT(cur_index_type == PRIMARY_KEY_ORDERED_INDEX || - cur_index_type == UNIQUE_ORDERED_INDEX); - if (r->start_key.length != key_info->key_length || - r->start_key.flag != HA_READ_KEY_EXACT) - return TRUE; // Not exact match, need scan - if (cur_index_type == UNIQUE_ORDERED_INDEX && - check_null_in_key(key_info, r->start_key.key,r->start_key.length)) - return TRUE; // Can't use for NULL values - return FALSE; -} - -int -ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p, - KEY_MULTI_RANGE *ranges, - uint range_count, - bool sorted, - HANDLER_BUFFER *buffer) -{ - KEY* key_info= table->key_info + active_index; - NDB_INDEX_TYPE cur_index_type= get_index_type(active_index); - ulong reclength= table_share->reclength; - NdbTransaction *trans= m_thd_ndb->trans; - int error; - - DBUG_ENTER("ha_ndbcluster::read_multi_range_first"); - DBUG_PRINT("info", ("blob fields=%d read_set=0x%x", table_share->blob_fields, table->read_set->bitmap[0])); - - /** - * Blobs and unique hash index with NULL can't be batched currently. - * Neither are pushed lookup joins batchable. - */ - if (uses_blob_value(table->read_set) || - (cur_index_type == UNIQUE_INDEX && - has_null_in_unique_index(active_index) && - null_value_index_search(ranges, ranges+range_count, buffer)) - || (m_pushed_join_operation==PUSHED_ROOT && - !m_disable_pushed_join && - !m_pushed_join_member->get_query_def().isScanQuery()) - || m_delete_cannot_batch || m_update_cannot_batch) - { - DBUG_PRINT("info", ("read_multi_range not possible, falling back to default handler implementation")); - m_disable_multi_read= TRUE; - DBUG_RETURN(handler::read_multi_range_first(found_range_p, - ranges, - range_count, - sorted, - buffer)); - } - - /** - * There may still be an open m_multi_cursor from the previous mrr access on this handler. - * Close it now to free up resources for this NdbScanOperation. - */ - if (unlikely((error= close_scan()))) - DBUG_RETURN(error); - - m_disable_multi_read= FALSE; - - /* - * Copy arguments into member variables - */ - m_multi_ranges= ranges; - multi_range_curr= ranges; - multi_range_end= ranges+range_count; - multi_range_sorted= sorted; - multi_range_buffer= buffer; - - /* - * read multi range will read ranges as follows (if not ordered) - * - * input read order - * ====== ========== - * pk-op 1 pk-op 1 - * pk-op 2 pk-op 2 - * range 3 range (3,5) NOTE result rows will be intermixed - * pk-op 4 pk-op 4 - * range 5 - * pk-op 6 pk-op 6 - */ - - /* - We first loop over all ranges, converting into primary/unique key - operations if possible, and counting ranges that require an - ordered index scan. If the supplied HANDLER_BUFFER is too small, we - may also need to do only part of the multi read at once. - - Afterwards, we create the ordered index scan cursor (if needed). - */ - - DBUG_ASSERT(cur_index_type != UNDEFINED_INDEX); - DBUG_ASSERT(m_multi_cursor==NULL); - DBUG_ASSERT(m_active_query==NULL); - - const NdbOperation* lastOp= trans ? trans->getLastDefinedOperation() : 0; - const NdbOperation::LockMode lm = get_ndb_lock_mode(m_lock.type); - uchar *row_buf= (uchar *)buffer->buffer; - const uchar *end_of_buffer= buffer->buffer_end; - uint num_scan_ranges= 0; - uint i; - bool any_real_read= FALSE; - - if (m_read_before_write_removal_possible) - check_read_before_write_removal(); - for (i= 0; i < range_count; i++) - { - KEY_MULTI_RANGE *r= &ranges[i]; - - part_id_range part_spec; - if (m_use_partition_pruning) - { - get_partition_set(table, table->record[0], active_index, &r->start_key, - &part_spec); - DBUG_PRINT("info", ("part_spec.start_part: %u part_spec.end_part: %u", - part_spec.start_part, part_spec.end_part)); - /* - If partition pruning has found no partition in set - we can skip this scan - */ - if (part_spec.start_part > part_spec.end_part) - { - /* - We can skip this partition since the key won't fit into any - partition - */ - r->range_flag|= SKIP_RANGE; - row_buf += reclength; - continue; - } - if (!trans && - (part_spec.start_part == part_spec.end_part)) - if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part, - error)))) - DBUG_RETURN(error); - } - r->range_flag&= ~(uint)SKIP_RANGE; - - if ((m_pushed_join_operation==PUSHED_ROOT && - m_pushed_join_member->get_query_def().isScanQuery()) || // Pushed joins restricted to ordered range scan in mrr - read_multi_needs_scan(cur_index_type, key_info, r)) - { - if (!trans) - { - // ToDo see if we can use start_transaction_key here instead - if (!m_use_partition_pruning) - { - get_partition_set(table, table->record[0], active_index, &r->start_key, - &part_spec); - if (part_spec.start_part == part_spec.end_part) - { - if (unlikely(!(trans= start_transaction_part_id(part_spec.start_part, - error)))) - DBUG_RETURN(error); - } - else if (unlikely(!(trans= start_transaction(error)))) - DBUG_RETURN(error); - } - else if (unlikely(!(trans= start_transaction(error)))) - DBUG_RETURN(error); - } - - any_real_read= TRUE; - DBUG_PRINT("info", ("any_real_read= TRUE")); - - /* - If we reach the limit of ranges allowed in a single scan: stop - here, send what we have so far, and continue when done with that. - */ - if (i > NdbIndexScanOperation::MaxRangeNo) - { - DBUG_PRINT("info", ("Reached the limit of ranges allowed in a single" - "scan")); - break; - } - -#ifndef NDB_WITHOUT_JOIN_PUSHDOWN - /* Create the scan operation for the first scan range. */ - if (check_if_pushable(NdbQueryOperationDef::OrderedIndexScan, - active_index, - !m_active_query && sorted)) - { - if (!m_active_query) - { - const int error= create_pushed_join(); - if (unlikely(error)) - DBUG_RETURN(error); - - NdbQuery* const query= m_active_query; - if (sorted && - query->getQueryOperation((uint)PUSHED_ROOT)->setOrdering(NdbQueryOptions::ScanOrdering_ascending)) - ERR_RETURN(query->getNdbError()); - } - } - else -#endif - if (!m_multi_cursor) - { - if (m_pushed_join_operation == PUSHED_ROOT) - { - m_thd_ndb->m_pushed_queries_dropped++; - } - /* Do a multi-range index scan for ranges not done by primary/unique key. */ - NdbScanOperation::ScanOptions options; - NdbInterpretedCode code(m_table); - - options.optionsPresent= - NdbScanOperation::ScanOptions::SO_SCANFLAGS | - NdbScanOperation::ScanOptions::SO_PARALLEL; - - options.scan_flags= - NdbScanOperation::SF_ReadRangeNo | - NdbScanOperation::SF_MultiRange; - - if (lm == NdbOperation::LM_Read) - options.scan_flags|= NdbScanOperation::SF_KeyInfo; - if (sorted) - options.scan_flags|= NdbScanOperation::SF_OrderByFull; - - options.parallel= DEFAULT_PARALLELISM; - - NdbOperation::GetValueSpec gets[2]; - if (table_share->primary_key == MAX_KEY) - get_hidden_fields_scan(&options, gets); - - if (m_cond && m_cond->generate_scan_filter(&code, &options)) - ERR_RETURN(code.getNdbError()); - - /* Define scan */ - NdbIndexScanOperation *scanOp= trans->scanIndex - (m_index[active_index].ndb_record_key, - m_ndb_record, - lm, - (uchar *)(table->read_set->bitmap), - NULL, /* All bounds specified below */ - &options, - sizeof(NdbScanOperation::ScanOptions)); - - if (!scanOp) - ERR_RETURN(trans->getNdbError()); - - m_multi_cursor= scanOp; - - /* - We do not get_blob_values() here, as when using blobs we always - fallback to non-batched multi range read (see if statement at - top of this function). - */ - - /* We set m_next_row=0 to say that no row was fetched from the scan yet. */ - m_next_row= 0; - } - - Ndb::PartitionSpec ndbPartitionSpec; - const Ndb::PartitionSpec* ndbPartSpecPtr= NULL; - - /* If this table uses user-defined partitioning, use MySQLD provided - * partition info as pruning info - * Otherwise, scan range pruning is performed automatically by - * NDBAPI based on distribution key values. - */ - if (m_use_partition_pruning && - m_user_defined_partitioning && - (part_spec.start_part == part_spec.end_part)) - { - DBUG_PRINT("info", ("Range on user-def-partitioned table can be pruned to part %u", - part_spec.start_part)); - ndbPartitionSpec.type= Ndb::PartitionSpec::PS_USER_DEFINED; - ndbPartitionSpec.UserDefined.partitionId= part_spec.start_part; - ndbPartSpecPtr= &ndbPartitionSpec; - } - - /* Include this range in the ordered index scan. */ - NdbIndexScanOperation::IndexBound bound; - compute_index_bounds(bound, key_info, &r->start_key, &r->end_key, 0); - bound.range_no= i; - - const NdbRecord *key_rec= m_index[active_index].ndb_record_key; - if (m_active_query) - { - DBUG_PRINT("info", ("setBound:%d, for pushed join", bound.range_no)); - if (m_active_query->setBound(key_rec, &bound)) - { - ERR_RETURN(trans->getNdbError()); - } - } - else - { - if (m_multi_cursor->setBound(key_rec, - bound, - ndbPartSpecPtr, // Only for user-def tables - sizeof(Ndb::PartitionSpec))) - { - ERR_RETURN(trans->getNdbError()); - } - } - - r->range_flag&= ~(uint)UNIQUE_RANGE; - num_scan_ranges++; - } - else // if ((...PUSHED_ROOT && m_pushed_join->get_query_def().isScanQuery()) ||... - { - if (m_pushed_join_operation == PUSHED_ROOT) - { - m_thd_ndb->m_pushed_queries_dropped++; - } - if (!trans) - { - DBUG_ASSERT(active_index != MAX_KEY); - if (unlikely(!(trans= start_transaction_key(active_index, - r->start_key.key, - error)))) - DBUG_RETURN(error); - } - /* - Convert to primary/unique key operation. - - If there is not enough buffer for reading the row: stop here, send - what we have so far, and continue when done with that. - */ - if (row_buf + reclength > end_of_buffer) - break; - - if (m_read_before_write_removal_used) - { - r->range_flag|= READ_KEY_FROM_RANGE; - continue; - } - else - { - any_real_read= TRUE; - DBUG_PRINT("info", ("m_read_before_write_removal_used == FALSE, " - "any_real_read= TRUE")); - } - r->range_flag|= UNIQUE_RANGE; - - Uint32 partitionId; - Uint32* ppartitionId = NULL; - - if (m_user_defined_partitioning && - (cur_index_type == PRIMARY_KEY_ORDERED_INDEX || - cur_index_type == PRIMARY_KEY_INDEX)) - { - partitionId=part_spec.start_part; - ppartitionId=&partitionId; - } - - DBUG_PRINT("info", ("Generating Pk/Unique key read for range %u", i)); - - // 'Pushable codepath' is incomplete and expected not - // to be produced as make_join_pushed() handle - // AT_MULTI_UNIQUE_KEY as non-pushable - if (m_pushed_join_operation==PUSHED_ROOT && - !m_disable_pushed_join && - !m_pushed_join_member->get_query_def().isScanQuery()) - { - DBUG_ASSERT(false); // Incomplete code, should not be executed - DBUG_ASSERT(lm == NdbOperation::LM_CommittedRead); - const int error= pk_unique_index_read_key_pushed(active_index, - r->start_key.key, - ppartitionId); - if (unlikely(error)) - DBUG_RETURN(error); - } - else - { - if (m_pushed_join_operation == PUSHED_ROOT) - { - DBUG_PRINT("info", ("Cannot push join due to incomplete implementation.")); - m_thd_ndb->m_pushed_queries_dropped++; - } - const NdbOperation* op; - if (!(op= pk_unique_index_read_key(active_index, - r->start_key.key, - row_buf, lm, - ppartitionId))) - ERR_RETURN(trans->getNdbError()); - } - row_buf+= reclength; - } - } - DBUG_ASSERT(i > 0 || i == range_count); // Require progress - m_multi_range_defined_end= ranges + i; - - buffer->end_of_used_area= row_buf; - - if (m_active_query != NULL && - m_pushed_join_member->get_query_def().isScanQuery()) - { - m_thd_ndb->m_scan_count++; - if (sorted) - { - m_thd_ndb->m_sorted_scan_count++; - } - - bool prunable = false; - if (unlikely(m_active_query->isPrunable(prunable) != 0)) - ERR_RETURN(m_active_query->getNdbError()); - if (prunable) - m_thd_ndb->m_pruned_scan_count++; - - DBUG_PRINT("info", ("Is MRR scan-query pruned to 1 partition? :%u", prunable)); - DBUG_ASSERT(!m_multi_cursor); - }; - if (m_multi_cursor) - { - DBUG_PRINT("info", ("Is MRR scan pruned to 1 partition? :%u", - m_multi_cursor->getPruned())); - m_thd_ndb->m_scan_count++; - m_thd_ndb->m_pruned_scan_count += (m_multi_cursor->getPruned()? 1 : 0); - if (sorted) - { - m_thd_ndb->m_sorted_scan_count++; - } - }; - - if (any_real_read) - { - /* Get pointer to first range key operation (not scans) */ - const NdbOperation* rangeOp= lastOp ? lastOp->next() : - trans->getFirstDefinedOperation(); - - DBUG_PRINT("info", ("Executing reads")); - - if (execute_no_commit_ie(m_thd_ndb, trans) == 0) - { - m_multi_range_result_ptr= buffer->buffer; - - /* We must check the result of any primary or unique key - * ranges now, as these operations may be invalidated by - * further execute+releaseOperations calls on this transaction by - * different handler objects. - */ - KEY_MULTI_RANGE* rangeInfo= multi_range_curr; - - for (;rangeInfo < m_multi_range_defined_end; rangeInfo++) - { - DBUG_PRINT("info", ("range flag is %u", rangeInfo->range_flag)); - if (rangeInfo->range_flag & SKIP_RANGE) - continue; - - if ((rangeInfo->range_flag & UNIQUE_RANGE) && - (!(rangeInfo->range_flag & READ_KEY_FROM_RANGE))) - { - assert(rangeOp != NULL); - if (rangeOp->getNdbError().code == 0) - { - /* Successful read, results are in buffer. - */ - rangeInfo->range_flag &= ~(uint)EMPTY_RANGE; - - DBUG_PRINT("info", ("Unique range op has result")); - } - else - { - NdbError err= rangeOp->getNdbError(); - - if (err.classification != - NdbError::NoDataFound) - DBUG_RETURN(ndb_err(trans)); - - DBUG_PRINT("info", ("Unique range op has no result")); - /* Indicate to read_multi_range_next that this - * result is empty - */ - rangeInfo->range_flag |= EMPTY_RANGE; - } - - /* Move to next completed operation */ - rangeOp= trans->getNextCompletedOperation(rangeOp); - } - - /* For scan ranges, do nothing here */ - } - } - else - ERR_RETURN(trans->getNdbError()); - } - - DBUG_RETURN(read_multi_range_next(found_range_p)); -} - -int -ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p) -{ - DBUG_ENTER("ha_ndbcluster::read_multi_range_next"); - if (m_disable_multi_read) - { - DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p)); - } - - const ulong reclength= table_share->reclength; - - while (multi_range_curr < m_multi_range_defined_end) - { - if (multi_range_curr->range_flag & SKIP_RANGE) - { - /* Nothing in this range, move to next one, skipping a buffer - 'slot' - */ - m_multi_range_result_ptr += reclength; - multi_range_curr++; - } - else if (multi_range_curr->range_flag & READ_KEY_FROM_RANGE) - { - DBUG_PRINT("info", ("using read before write removal optimisation")); - KEY* key_info= table->key_info + active_index; - key_restore(table->record[0], (uchar*)multi_range_curr->start_key.key, - key_info, key_info->key_length); - table->status= 0; - multi_range_curr++; - DBUG_RETURN(0); - } - else if (multi_range_curr->range_flag & UNIQUE_RANGE) - { - /* - Move to next range; we can have at most one record from a unique range. - */ - KEY_MULTI_RANGE *old_multi_range_curr= multi_range_curr; - multi_range_curr= old_multi_range_curr + 1; - /* - Clear m_active_cursor; it is used as a flag in update_row() / - delete_row() to know whether the current tuple is from a scan - or pk operation. - */ - m_active_cursor= NULL; - const uchar *src_row= m_multi_range_result_ptr; - m_multi_range_result_ptr= src_row + table_share->reclength; - - if (!(old_multi_range_curr->range_flag & EMPTY_RANGE)) - { - *multi_range_found_p= old_multi_range_curr; - memcpy(table->record[0], src_row, table_share->reclength); - DBUG_RETURN(0); - } - - /* No row found, so fall through to try the next range. */ - } - else - { - /* An index scan range. */ - { - int res; - if ((res= read_multi_range_fetch_next()) != 0) - DBUG_RETURN(res); - } - if (!m_next_row) - { - /* - The whole scan is done, and the cursor has been closed. - So nothing more for this range. Move to next. - */ - multi_range_curr++; - } - else - { - int current_range_no= m_current_range_no; - int expected_range_no; - /* - For a sorted index scan, we will receive rows in increasing range_no - order, so we can return ranges in order, pausing when range_no - indicate that the currently processed range (multi_range_curr) is - done. - - But for unsorted scan, we may receive a high range_no from one - fragment followed by a low range_no from another fragment. So we - need to process all index scan ranges together. - */ - if (!multi_range_sorted || - (expected_range_no= multi_range_curr - m_multi_ranges) - == current_range_no) - { - *multi_range_found_p= m_multi_ranges + current_range_no; - /* Copy out data from the new row. */ - unpack_record(table->record[0], m_next_row); - table->status= 0; - /* - Mark that we have used this row, so we need to fetch a new - one on the next call. - */ - m_next_row= 0; - /* - Set m_active_cursor; it is used as a flag in update_row() / - delete_row() to know whether the current tuple is from a scan or - pk operation. - */ - m_active_cursor= m_multi_cursor; - - DBUG_RETURN(0); - } - else if (current_range_no > expected_range_no) - { - /* Nothing more in scan for this range. Move to next. */ - multi_range_curr++; - } - else - { - /* - Should not happen. Ranges should be returned from NDB API in - the order we requested them. - */ - DBUG_ASSERT(0); - multi_range_curr++; // Attempt to carry on - } - } - } - } - - if (multi_range_curr == multi_range_end) - { - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - - /* - Read remaining ranges - */ - DBUG_RETURN(read_multi_range_first(multi_range_found_p, - multi_range_curr, - multi_range_end - multi_range_curr, - multi_range_sorted, - multi_range_buffer)); -} -#endif /* Fetch next row from the ordered index cursor in multi range scan. === modified file 'sql/ha_ndbcluster_glue.h' --- a/sql/ha_ndbcluster_glue.h 2011-10-05 07:32:06 +0000 +++ b/sql/ha_ndbcluster_glue.h 2011-10-25 08:22:25 +0000 @@ -234,11 +234,4 @@ uint partition_info_num_subparts(const p #endif } -#if MYSQL_VERSION_ID >= 50600 - -/* New multi range read interface replaced original mrr */ -#define NDB_WITH_NEW_MRR_INTERFACE - -#endif - #endif No bundle (reason: useless for push emails).