# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2006/03/18 14:45:40+03:00 sergefp@stripped
# WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
#
# sql/sql_select.cc
# 2006/03/18 14:44:57+03:00 sergefp@stripped +9 -16
# WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
#
# sql/opt_range.h
# 2006/03/18 14:44:57+03:00 sergefp@stripped +20 -1
# WL#2474 Multi Range Read: Changed the range optimizer to use new MRR interface
#
# sql/opt_range.cc
# 2006/03/18 14:44:56+03:00 sergefp@stripped +480 -374
# WL#2474 Multi Range Read: Changed the range optimizer to use new MRR interface
#
# sql/handler.h
# 2006/03/18 14:44:55+03:00 sergefp@stripped +125 -9
# WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
#
# sql/handler.cc
# 2006/03/18 14:44:55+03:00 sergefp@stripped +252 -70
# WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
#
# sql/ha_ndbcluster.h
# 2006/03/18 14:44:54+03:00 sergefp@stripped +10 -7
# WL#2474 Multi Range Read: Changed MRR/NDB to provide the new interface, albeit with a help of interface non-compliant
# mrr_persistent_flag_storage() function.
#
# sql/ha_ndbcluster.cc
# 2006/03/18 14:44:53+03:00 sergefp@stripped +118 -107
# WL#2474 Multi Range Read: Changed MRR/NDB to provide the new interface, albeit with a help of interface non-compliant
# mrr_persistent_flag_storage() function.
#
# include/my_base.h
# 2006/03/18 14:44:52+03:00 sergefp@stripped +26 -0
# WL#2474 Multi Range Read: added comments
#
diff -Nru a/include/my_base.h b/include/my_base.h
--- a/include/my_base.h 2006-03-18 14:54:11 +03:00
+++ b/include/my_base.h 2006-03-18 14:54:11 +03:00
@@ -439,14 +439,40 @@
/* For key ranges */
+/* from - inf */
#define NO_MIN_RANGE 1
+
+/* to +inf */
#define NO_MAX_RANGE 2
+
+/* X < key (i.e. not X <= key */
#define NEAR_MIN 4
+
+/* X > key (i.e. not X>= key */
#define NEAR_MAX 8
+
+/*
+ This flag means that index is a unique index, and the interval is
+ equivalent to "AND(keypart_i = const_i)", where all of const_i are not NULLs.
+*/
#define UNIQUE_RANGE 16
+
+/*
+ This flag means that the interval is equivalent to
+ "AND(keypart_i = const_i)", where not all key parts may be used but all of
+ const_i are not NULLs.
+*/
#define EQ_RANGE 32
+
+/*
+ This flag has the same meaning as UNIQUE_RANGE, except that for at least
+ one keypart the condition is "keypart IS NULL".
+*/
#define NULL_RANGE 64
+
#define GEOM_FLAG 128
+
+/* Used only in NDB, at row retrieval stage */
#define SKIP_RANGE 256
typedef struct st_key_range
diff -Nru a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
--- a/sql/ha_ndbcluster.cc 2006-03-18 14:54:11 +03:00
+++ b/sql/ha_ndbcluster.cc 2006-03-18 14:54:11 +03:00
@@ -7017,45 +7017,71 @@
DBUG_RETURN(error);
}
-int
-ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
- KEY_MULTI_RANGE *ranges,
- uint range_count,
- bool sorted,
+/****************************************************************************
+ * MRR interface implementation
+ ***************************************************************************/
+
+#if 0
+#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("multi_range_read_next: case %d\n", x));
+#else
+#define DBUG_MULTI_RANGE(x)
+#endif
+
+//new
+int ha_ndbcluster::multi_range_read_init(RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param,
+ uint n_ranges, uint mode,
HANDLER_BUFFER *buffer)
{
- DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
- m_write_op= FALSE;
-
int res;
- KEY* key_info= table->key_info + active_index;
- NDB_INDEX_TYPE index_type= get_index_type(active_index);
- ulong reclength= table_share->reclength;
- NdbOperation* op;
+ DBUG_ENTER("ha_ndbcluster::multi_range_read_init");
+ m_write_op= FALSE;
- if (uses_blob_value())
+ if (uses_blob_value() || (seq_funcs->init != quick_range_seq_init))
{
/**
* blobs can't be batched currently
*/
m_disable_multi_read= TRUE;
- DBUG_RETURN(handler::read_multi_range_first(found_range_p,
- ranges,
- range_count,
- sorted,
- buffer));
+ DBUG_RETURN(handler::multi_range_read_init(seq_funcs, seq_init_param,
+ n_ranges, mode, buffer));
}
-
m_disable_multi_read= FALSE;
- /**
- * Copy arguments into member variables
- */
- m_multi_ranges= ranges;
- multi_range_curr= ranges;
- multi_range_end= ranges+range_count;
- multi_range_sorted= sorted;
+ mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
multi_range_buffer= buffer;
+ mrr_funcs= *seq_funcs;
+ mrr_iter= mrr_funcs.init(seq_init_param, n_ranges, mode);
+ ranges_in_seq= n_ranges;
+
+ res= multi_range_start_retrievals(-1);
+ if (first_unstarted_range == n_ranges)
+ {
+ /**
+ * Mark that we're using entire buffer (even if might not) as
+ * we haven't read all ranges for some reason
+ * This as we don't want mysqld to reuse the buffer when we read
+ * the remaining ranges
+ */
+ buffer->end_of_used_area= (byte*)multi_range_buffer->buffer_end;
+ }
+ else
+ {
+ /* Using all buffer */
+ }
+
+ DBUG_RETURN(res);
+}
+
+
+int ha_ndbcluster::multi_range_start_retrievals(int starting_range)
+{
+ int res, range_res;
+ KEY* key_info= table->key_info + active_index;
+ NDB_INDEX_TYPE index_type= get_index_type(active_index);
+ ulong reclength= table_share->reclength;
+ NdbOperation* op;
+ DBUG_ENTER("multi_range_start_retrievals");
/**
* read multi range will read ranges as follows (if not ordered)
@@ -7073,8 +7099,8 @@
/**
* Variables for loop
*/
- byte *curr= (byte*)buffer->buffer;
- byte *end_of_buffer= (byte*)buffer->buffer_end;
+ byte *curr= (byte*)multi_range_buffer->buffer;
+ byte *end_of_buffer= (byte*)multi_range_buffer->buffer_end;
NdbOperation::LockMode lm=
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
const NDBTAB *tab= (const NDBTAB *) m_table;
@@ -7082,21 +7108,20 @@
const NDBINDEX *idx= (NDBINDEX *) m_index[active_index].index;
const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation();
NdbIndexScanOperation* scanOp= 0;
- for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer;
- multi_range_curr++)
+
+ int range_no= -1;
+ int mrr_range_no= starting_range;
+
+ while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)) &&
+ curr+reclength <= end_of_buffer)
{
+ range_no++;
+ mrr_range_no++;
part_id_range part_spec;
if (m_use_partition_function)
{
- get_partition_set(table, curr, active_index,
- &multi_range_curr->start_key,
+ get_partition_set(table, curr, active_index, &mrr_cur_range.start_key,
&part_spec);
- DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u",
- part_spec.start_part, part_spec.end_part));
- /*
- If partition pruning has found no partition in set
- we can skip this scan
- */
if (part_spec.start_part > part_spec.end_part)
{
/*
@@ -7104,22 +7129,23 @@
partition
*/
curr += reclength;
- multi_range_curr->range_flag |= SKIP_RANGE;
+ mrr_persistent_flag_storage(mrr_iter, mrr_range_no) |= SKIP_RANGE;
continue;
}
}
+
switch(index_type){
case PRIMARY_KEY_ORDERED_INDEX:
- if (!(multi_range_curr->start_key.length == key_info->key_length &&
- multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
+ if (!(mrr_cur_range.start_key.length == key_info->key_length &&
+ mrr_cur_range.start_key.flag == HA_READ_KEY_EXACT))
goto range;
// else fall through
case PRIMARY_KEY_INDEX:
{
- multi_range_curr->range_flag |= UNIQUE_RANGE;
+ mrr_persistent_flag_storage(mrr_iter, mrr_range_no) |= UNIQUE_RANGE;
if ((op= m_active_trans->getNdbOperation(tab)) &&
!op->readTuple(lm) &&
- !set_primary_key(op, multi_range_curr->start_key.key) &&
+ !set_primary_key(op, mrr_cur_range.start_key.key) &&
!define_read_attrs(curr, op) &&
(op->setAbortOption(AO_IgnoreError), TRUE) &&
(!m_use_partition_function ||
@@ -7129,20 +7155,23 @@
ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
break;
}
- break;
case UNIQUE_ORDERED_INDEX:
- if (!(multi_range_curr->start_key.length == key_info->key_length &&
- multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
- !check_null_in_key(key_info, multi_range_curr->start_key.key,
- multi_range_curr->start_key.length)))
+ {
+ if (!(mrr_cur_range.start_key.length == key_info->key_length &&
+ mrr_cur_range.start_key.flag == HA_READ_KEY_EXACT &&
+ !check_null_in_key(key_info, mrr_cur_range.start_key.key,
+ mrr_cur_range.start_key.length)))
+ {
goto range;
+ }
// else fall through
+ }
case UNIQUE_INDEX:
{
- multi_range_curr->range_flag |= UNIQUE_RANGE;
+ mrr_persistent_flag_storage(mrr_iter, mrr_range_no) |= UNIQUE_RANGE;
if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) &&
!op->readTuple(lm) &&
- !set_index_key(op, key_info, multi_range_curr->start_key.key) &&
+ !set_index_key(op, key_info, mrr_cur_range.start_key.key) &&
!define_read_attrs(curr, op) &&
(op->setAbortOption(AO_IgnoreError), TRUE))
curr += reclength;
@@ -7150,15 +7179,16 @@
ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
break;
}
- case ORDERED_INDEX: {
+ case ORDERED_INDEX:
+ {
range:
- multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
+ mrr_persistent_flag_storage(mrr_iter, mrr_range_no) &= ~(uint16)UNIQUE_RANGE;
if (scanOp == 0)
{
if (m_multi_cursor)
{
scanOp= m_multi_cursor;
- DBUG_ASSERT(scanOp->getSorted() == sorted);
+ DBUG_ASSERT(scanOp->getSorted() == mrr_is_output_sorted);
DBUG_ASSERT(scanOp->getLockMode() ==
(NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
if (scanOp->reset_bounds(m_force_send))
@@ -7167,7 +7197,8 @@
end_of_buffer -= reclength;
}
else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab))
- &&!scanOp->readTuples(lm, 0, parallelism, sorted, FALSE, TRUE)
+ &&!scanOp->readTuples(lm, 0, parallelism,
+ mrr_is_output_sorted, FALSE, TRUE)
&&!generate_scan_filter(m_cond_stack, scanOp)
&&!define_read_attrs(end_of_buffer-reclength, scanOp))
{
@@ -7181,10 +7212,9 @@
}
}
- const key_range *keys[2]= { &multi_range_curr->start_key,
- &multi_range_curr->end_key };
- if ((res= set_bounds(scanOp, active_index, false, keys,
- multi_range_curr-ranges)))
+ const key_range *keys[2]= { &mrr_cur_range.start_key,
+ &mrr_cur_range.end_key };
+ if ((res= set_bounds(scanOp, active_index, false, keys, range_no)))
DBUG_RETURN(res);
break;
}
@@ -7195,21 +7225,6 @@
}
}
- if (multi_range_curr != multi_range_end)
- {
- /**
- * Mark that we're using entire buffer (even if might not) as
- * we haven't read all ranges for some reason
- * This as we don't want mysqld to reuse the buffer when we read
- * the remaining ranges
- */
- buffer->end_of_used_area= (byte*)buffer->buffer_end;
- }
- else
- {
- buffer->end_of_used_area= curr;
- }
-
/**
* Set first operation in multi range
*/
@@ -7217,40 +7232,36 @@
lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation();
if (!(res= execute_no_commit_ie(this, m_active_trans)))
{
- m_multi_range_defined= multi_range_curr;
- multi_range_curr= ranges;
- m_multi_range_result_ptr= (byte*)buffer->buffer;
- DBUG_RETURN(read_multi_range_next(found_range_p));
+ m_multi_range_result_ptr= (byte*)multi_range_buffer->buffer;
+ first_running_range= first_range_in_batch= starting_range + 1;
+ first_unstarted_range= mrr_range_no + 1;
+ DBUG_RETURN(0);
}
ERR_RETURN(m_active_trans->getNdbError());
}
-#if 0
-#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x));
-#else
-#define DBUG_MULTI_RANGE(x)
-#endif
-
-int
-ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
+//new
+int ha_ndbcluster::multi_range_read_next(char **range_info)
{
- DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
+ DBUG_ENTER("ha_ndbcluster::multi_range_read_next");
if (m_disable_multi_read)
{
DBUG_MULTI_RANGE(11);
- DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
+ DBUG_RETURN(handler::multi_range_read_next(range_info));
}
int res;
int range_no;
ulong reclength= table_share->reclength;
const NdbOperation* op= m_current_multi_operation;
- for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
+
+ //for each range (we should have remembered the number)
+ for (;first_running_range < first_unstarted_range; first_running_range++)
{
DBUG_MULTI_RANGE(12);
- if (multi_range_curr->range_flag & SKIP_RANGE)
+ if (mrr_persistent_flag_storage(mrr_iter, first_running_range) & SKIP_RANGE)
continue;
- if (multi_range_curr->range_flag & UNIQUE_RANGE)
+ if (mrr_persistent_flag_storage(mrr_iter, first_running_range) & UNIQUE_RANGE)
{
if (op->getNdbError().code == 0)
{
@@ -7262,7 +7273,7 @@
m_multi_range_result_ptr += reclength;
continue;
}
- else if (m_multi_cursor && !multi_range_sorted)
+ else if (m_multi_cursor && !mrr_is_output_sorted)
{
DBUG_MULTI_RANGE(1);
if ((res= fetch_next(m_multi_cursor)) == 0)
@@ -7277,7 +7288,7 @@
goto close_scan;
}
}
- else if (m_multi_cursor && multi_range_sorted)
+ else if (m_multi_cursor && mrr_is_output_sorted)
{
if (m_active_cursor && (res= fetch_next(m_multi_cursor)))
{
@@ -7286,7 +7297,7 @@
}
range_no= m_multi_cursor->get_range_no();
- uint current_range_no= multi_range_curr - m_multi_ranges;
+ uint current_range_no= first_running_range - first_range_in_batch;
if ((uint) range_no == current_range_no)
{
DBUG_MULTI_RANGE(4);
@@ -7310,7 +7321,7 @@
DBUG_MULTI_RANGE(15);
goto close_scan;
}
- multi_range_curr--; // Will be increased in for-loop
+ first_running_range--; // Will be increased in for-loop
continue;
}
}
@@ -7318,7 +7329,7 @@
{
DBUG_MULTI_RANGE(7);
/**
- * Corresponds to range 5 in example in read_multi_range_first
+ * Corresponds to range 5 in example in multi_range_start_retrievals
*/
(void)1;
continue;
@@ -7340,27 +7351,23 @@
}
}
- if (multi_range_curr == multi_range_end)
+ if (first_running_range == ranges_in_seq)
{
DBUG_MULTI_RANGE(16);
DBUG_RETURN(HA_ERR_END_OF_FILE);
}
- /**
- * Read remaining ranges
- */
- DBUG_RETURN(read_multi_range_first(multi_range_found_p,
- multi_range_curr,
- multi_range_end - multi_range_curr,
- multi_range_sorted,
- multi_range_buffer));
+ /* Read remaining ranges */
+ if ((res= multi_range_start_retrievals(first_running_range-1)))
+ DBUG_RETURN(res);
+ DBUG_RETURN(multi_range_read_next(range_info));
found:
/**
* Found a record belonging to a scan
*/
m_active_cursor= m_multi_cursor;
- * multi_range_found_p= m_multi_ranges + range_no;
+ *range_info= mrr_get_ptr_by_idx(mrr_iter, first_running_range);
memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength);
setup_recattr(m_active_cursor->getFirstRecAttr());
unpack_record(table->record[0]);
@@ -7372,17 +7379,21 @@
* Found a record belonging to a pk/index op,
* copy result and move to next to prepare for next call
*/
- * multi_range_found_p= multi_range_curr;
+ *range_info= mrr_get_ptr_by_idx(mrr_iter, first_running_range++);
memcpy(table->record[0], m_multi_range_result_ptr, reclength);
setup_recattr(op->getFirstRecAttr());
unpack_record(table->record[0]);
table->status= 0;
- multi_range_curr++;
m_current_multi_operation= m_active_trans->getNextCompletedOperation(op);
m_multi_range_result_ptr += reclength;
DBUG_RETURN(0);
}
+
+
+/*****************************************************************************
+ * MRR implementation ends
+ ****************************************************************************/
int
ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
diff -Nru a/sql/ha_ndbcluster.h b/sql/ha_ndbcluster.h
--- a/sql/ha_ndbcluster.h 2006-03-18 14:54:11 +03:00
+++ b/sql/ha_ndbcluster.h 2006-03-18 14:54:11 +03:00
@@ -579,12 +579,17 @@
int alter_tablespace(st_alter_tablespace *info);
/**
- * Multi range stuff
+ * Multi Range Read interface
*/
- int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
- KEY_MULTI_RANGE*ranges, uint range_count,
- bool sorted, HANDLER_BUFFER *buffer);
- int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+ int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+ int multi_range_read_next(char **range_info);
+private:
+ uint first_running_range;
+ uint first_range_in_batch;
+ uint first_unstarted_range;
+ int multi_range_start_retrievals(int first_range);
+public:
bool get_error_message(int error, String *buf);
void info(uint);
@@ -852,8 +857,6 @@
Ndb_cond_stack *m_cond_stack;
bool m_disable_multi_read;
byte *m_multi_range_result_ptr;
- KEY_MULTI_RANGE *m_multi_ranges;
- KEY_MULTI_RANGE *m_multi_range_defined;
const NdbOperation *m_current_multi_operation;
NdbIndexScanOperation *m_multi_cursor;
byte *m_multi_range_cursor_result_ptr;
diff -Nru a/sql/handler.cc b/sql/handler.cc
--- a/sql/handler.cc 2006-03-18 14:54:11 +03:00
+++ b/sql/handler.cc 2006-03-18 14:54:11 +03:00
@@ -2742,94 +2742,277 @@
}
#endif
+
/*
- Read the first row of a multi-range set.
+ Calculate cost of 'index only' scan for given index and number of records.
SYNOPSIS
- read_multi_range_first()
- found_range_p Returns a pointer to the element in 'ranges' that
- corresponds to the returned row.
- ranges An array of KEY_MULTI_RANGE range descriptions.
- range_count Number of ranges in 'ranges'.
- sorted If result should be sorted per key.
- buffer A HANDLER_BUFFER for internal handler usage.
+ index_only_read_time()
+ keynr key to read
+ records #of records to read
NOTES
- Record is read into table->record[0].
- *found_range_p returns a valid value only if read_multi_range_first()
- returns 0.
- Sorting is done within each range. If you want an overall sort, enter
- 'ranges' with sorted ranges.
+ It is assumed that we will read trough the whole key range and that all
+ key blocks are half full (normally things are much better). It is also
+ assumed that each time we read the next key from the index, the handler
+ performs a random seek, thus the cost is proportional to the number of
+ blocks read.
+
+ TODO:
+ Move this to handler->read_time() by adding a flag 'index-only-read' to
+ this call. The reason for doing this is that the current function doesn't
+ handle the case when the row is stored in the b-tree (like in innodb
+ clustered index)
+*/
+
+double handler::index_only_read_time(uint keynr, double records)
+{
+ double read_time;
+ uint keys_per_block= (block_size/2/
+ (table->key_info[keynr].key_length + ref_length) + 1);
+ read_time=((double) (records + keys_per_block-1) /
+ (double) keys_per_block);
+ return read_time;
+}
+
+
+/*
+ Get cost and other information about MRR scan over a known list of ranges
+
+ SYNOPSIS
+ multi_range_read_info_const()
+ keyno Index number
+ seq Range sequence to be traversed
+ seq_init_param First parameter for seq->init()
+ n_ranges Number of ranges in the sequence
+ bufsz INOUT IN: Size of the buffer available for use
+ OUT: Size of the buffer that is expected to be actually
+ used, or 0 if buffer is not needed.
+ flags INOUT A combination of HA_MRR_* flags
+ cost OUT Estimated cost of MRR access
+
+ DESCRIPTION
+ Obtain the cost and other information about an MRR scan when the list of
+ ranges to be scanned is known.
+
+ NOTE
+ Reasons for having this function:
+ * Let high-roundtrip-cost handlers, like NDB/Federated produce the
+ estimates in one roundtrip. (One might ask, why don't we use
+ condition-pushdown-based system for the purpose of estimating number of
+ records that match the table condition?... good question... in any case
+ there is another reason)
+ * We need a specialized cost-of-MRR-over-constant-ranges function.
+ = It must be different from non-MRR-cost function (since MRR is supposed
+ to be better and have a smaller cost)
+ = It must be different from multi_range_read_info() because here we know
+ the ranges.
+ Therefore, we have this function.
RETURN
- 0 OK, found a row
- HA_ERR_END_OF_FILE No rows in range
- # Error code
+ 0 - OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+ parameters.
+ 1 - Error or can't perform the requested scan
*/
-int handler::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
- KEY_MULTI_RANGE *ranges, uint range_count,
- bool sorted, HANDLER_BUFFER *buffer)
-{
- int result= HA_ERR_END_OF_FILE;
- DBUG_ENTER("handler::read_multi_range_first");
- multi_range_sorted= sorted;
- multi_range_buffer= buffer;
-
- for (multi_range_curr= ranges, multi_range_end= ranges + range_count;
- multi_range_curr < multi_range_end;
- multi_range_curr++)
+ha_rows
+handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param, uint n_ranges,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ KEY_MULTI_RANGE range;
+ range_seq_t seq_it;
+ ha_rows rows, total_rows;
+ *bufsz= 0;
+ total_rows= 0;
+ uint part, max_used_part= 0;
+
+ seq_it= seq->init(seq_init_param, n_ranges, *flags);
+ n_ranges=0;
+ while (!seq->next(seq_it, &range))
{
- result= read_range_first(multi_range_curr->start_key.length ?
- &multi_range_curr->start_key : 0,
- multi_range_curr->end_key.length ?
- &multi_range_curr->end_key : 0,
- test(multi_range_curr->range_flag & EQ_RANGE),
- multi_range_sorted);
- if (result != HA_ERR_END_OF_FILE)
- break;
+ n_ranges++;
+ key_range *min_endp= range.start_key.length? &range.start_key : NULL;
+ key_range *max_endp= range.end_key.length? &range.end_key : NULL;
+ if (!(range.range_flag & UNIQUE_RANGE))
+ {
+ if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
+ max_endp)))
+ {
+ /* Can't scan one range => can't do MRR scan at all */
+ total_rows= HA_POS_ERROR;
+ break;
+ }
+ }
+ else
+ rows= 1; // UNIQUE_RANGE
+
+ total_rows += rows;
+ part= (int)range.ptr;
+ if (part > max_used_part)
+ max_used_part= part;
}
- *found_range_p= multi_range_curr;
- DBUG_PRINT("exit",("result %d", result));
- DBUG_RETURN(result);
+ cost->zero();
+ if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
+ cost->io_count= index_only_read_time(keyno, total_rows);
+ else
+ cost->io_count= read_time(keyno, n_ranges, total_rows);
+ cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+
+ cost->avg_io_cost= 1; // Assume random seeks
+ return total_rows;
}
/*
- Read the next row of a multi-range set.
+ Get cost and other information about MRR scan over currently unknown ranges
SYNOPSIS
- read_multi_range_next()
- found_range_p Returns a pointer to the element in 'ranges' that
- corresponds to the returned row.
+ multi_range_read_info()
+ keyno Index number
+ n_ranges Estimated number of ranges (i.e. intervals) in the
+ range sequence.
+ n_rows Estimated total number of records contained within all
+ of the ranges
+ bufsz INOUT IN: Size of the buffer available for use
+ OUT: Size of the buffer that will be actually used, or
+ 0 if buffer is not needed.
+ flags INOUT A combination of HA_MRR_* flags
+ cost OUT Estimated cost of MRR access
+
+ DESCRIPTION
+ This function obtains the cost and other info about about the MRR scan.
+ The caller has some information about what kinds of ranges will be scanned
+ (e.g. "1000 {keypart1=const} ranges"}, but the constant themselves are
+ assumed not to be known when this function is called.
+
+ The flags parameter is a combination of those flags: HA_MRR_SORTED,
+ HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
+
+ RETURN
+ 0 - OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+ parameters.
+ other - Error or can't perform the requested scan
+*/
+
+int handler::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint *bufsz, uint *flags, COST_VECT *cost)
+{
+ *bufsz= 0; // No need for buffer
+
+ cost->zero();
+ cost->avg_io_cost= 1; // Assume random seeks
+
+ //TODO: flags & HA_MRR_SINGLEPOINT
+
+ /* Produce the same cost as non-MRR code does */
+ if (*flags & HA_MRR_INDEX_ONLY)
+ cost->io_count= index_only_read_time(keyno, keys);
+ else
+ cost->io_count= read_time(keyno, n_ranges, keys);
+ return 0;
+}
+
+
+/*
+ Initialize the MRR scan
+
+ SYNOPSIS
+ multi_range_read_init()
+ seq Range sequence to be traversed
+ seq_init_param First parameter for seq->init()
+ n_ranges Number of ranges in the sequence
+ mode Flags, see the description for details
+ buf INOUT, memory buffer to be used.
+
+ DESCRIPTION
+ Initialize the MRR scan. This function may do heavyweight scan
+ initialization like row prefetching/sorting/etc (NOTE: but better not do
+ it here as we may not need it, e.g. if we never satisfy WHERE clause on
+ previous tables. For many implementations it would be natural to do such
+ initializations in _next() call)
+
+ mode is a combination of the following flags: HA_MRR_SORTED,
+ HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
NOTES
- Record is read into table->record[0].
- *found_range_p returns a valid value only if read_multi_range_next()
- returns 0.
+ One must have called index_init() before calling this function. Several
+ multi_range_read_init() calls may be made in course of one query.
+
+ Until WL#2623 is done (see section 3.2), the following will also hold:
+ The caller will guarantee that if "seq->init == mrr_ranges_array_init"
+ then seq_init_param will be an array of KEY_MULTI_RANGE structures (with
+ n_ranges elements). This property will only be used by NDB handler until
+ WL#2623 is done (see section 3.2 again).
+
+ Buffer memory management is done according to the following scenario:
+ The caller allocates the buffer and provides it to the callee by filling
+ in the members of HANDLER_BUFFER structure (see current code for
+ definition of HANDLER_BUFFER).
+ The callee consumes all or some fraction of the provided buffer space, and
+ sets the HANDLER_BUFFER members accordingly.
+ The callee may use the buffer memory until the next multi_range_read_init()
+ call is made, all records have been read, or until index_end() call is
+ made, whichever comes first.
RETURN
- 0 OK, found a row
- HA_ERR_END_OF_FILE No (more) rows in range
- # Error code
+ 0 - OK
+ 1 - Error
*/
-int handler::read_multi_range_next(KEY_MULTI_RANGE **found_range_p)
+int
+handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+ uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+ DBUG_ENTER("handler::multi_range_read_init");
+ /* For now, do nothing here... just initialize the ranges traversal */
+ mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+ mrr_funcs= *seq_funcs;
+ mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
+ mrr_have_range= FALSE;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get next record in MRR scan
+
+ SYNOPSIS
+ multi_range_read_next()
+ range_info OUT If HA_MRR_NO_ASSOCIATION flag is in effect or an error
+ occured, nothing (but the caller is still allowed to
+ store an arbitrary value there)
+ Otherwise, the opaque value associated with the range
+ that contains the returned record.
+
+ NOTE
+ It should be possible to process range lists of arbitrary length without
+ any kind of overflows.
+
+ RETURN
+ 0 OK
+ other Error code
+*/
+
+int handler::multi_range_read_next(char **range_info)
{
int result;
- DBUG_ENTER("handler::read_multi_range_next");
+ int range_res;
+ DBUG_ENTER("handler::multi_range_read_next");
- /* We should not be called after the last call returned EOF. */
- DBUG_ASSERT(multi_range_curr < multi_range_end);
+ if (!mrr_have_range)
+ {
+ mrr_have_range= TRUE;
+ goto start;
+ }
do
{
/* Save a call if there can be only one row in range. */
- if (multi_range_curr->range_flag != (UNIQUE_RANGE | EQ_RANGE))
+ if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
{
result= read_range_next();
-
/* On success or non-EOF errors jump to the end. */
if (result != HA_ERR_END_OF_FILE)
break;
@@ -2843,26 +3026,24 @@
result= HA_ERR_END_OF_FILE;
}
+start:
/* Try the next range(s) until one matches a record. */
- for (multi_range_curr++;
- multi_range_curr < multi_range_end;
- multi_range_curr++)
+ while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
{
- result= read_range_first(multi_range_curr->start_key.length ?
- &multi_range_curr->start_key : 0,
- multi_range_curr->end_key.length ?
- &multi_range_curr->end_key : 0,
- test(multi_range_curr->range_flag & EQ_RANGE),
- multi_range_sorted);
+ result= read_range_first(mrr_cur_range.start_key.length ?
+ &mrr_cur_range.start_key : 0,
+ mrr_cur_range.end_key.length ?
+ &mrr_cur_range.end_key : 0,
+ test(mrr_cur_range.range_flag & EQ_RANGE),
+ mrr_is_output_sorted);
if (result != HA_ERR_END_OF_FILE)
break;
}
}
- while ((result == HA_ERR_END_OF_FILE) &&
- (multi_range_curr < multi_range_end));
+ while ((result == HA_ERR_END_OF_FILE) && !range_res);
- *found_range_p= multi_range_curr;
- DBUG_PRINT("exit",("handler::read_multi_range_next: result %d", result));
+ *range_info= mrr_cur_range.ptr;
+ DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
DBUG_RETURN(result);
}
@@ -2889,7 +3070,8 @@
int handler::read_range_first(const key_range *start_key,
const key_range *end_key,
- bool eq_range_arg, bool sorted)
+ bool eq_range_arg,
+ bool sorted /* ignored */)
{
int result;
DBUG_ENTER("handler::read_range_first");
@@ -2922,7 +3104,7 @@
/*
- Read next row between two ranges.
+ Read next row between two endpoints.
SYNOPSIS
read_range_next()
diff -Nru a/sql/handler.h b/sql/handler.h
--- a/sql/handler.h 2006-03-18 14:54:11 +03:00
+++ b/sql/handler.h 2006-03-18 14:54:11 +03:00
@@ -700,6 +700,102 @@
typedef struct system_status_var SSV;
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+ /*
+ Initialize the traversal of range sequence
+
+ SYNOPSIS
+ init()
+ init_params The seq_init_param parameter
+ n_ranges The number of ranges obtained
+ flags HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+ RETURN
+ An opaque value to be used as RANGE_SEQ_IF::next() parameter
+ */
+ range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+ /*
+ Get the next range in the range sequence
+
+ SYNOPSIS
+ next()
+ seq The value returned by RANGE_SEQ_IF::init()
+ range OUT
+
+ RETURN
+ 0 - Ok, the range structure filled with info about the next range
+ 1 - No more ranges
+ */
+ uint (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+} RANGE_SEQ_IF;
+
+uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx);
+char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx);
+
+class COST_VECT
+{
+public:
+ double io_count; /* number of I/O */
+ double avg_io_cost; /* cost of an average I/O oper. */
+ double cpu_cost; /* cost of operations in CPU */
+ double mem_cost; /* cost of used memory */
+ double import_cost; /* cost of remote operations */
+
+ enum { IO_COEFF=1 };
+ enum { CPU_COEFF=1 };
+ enum { MEM_COEFF=1 };
+ enum { IMPORT_COEFF=1 };
+
+ double total_cost()
+ {
+ return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+ MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+ }
+
+ void zero()
+ {
+ avg_io_cost= 1.0;
+ io_count= cpu_cost= mem_cost= import_cost= 0.0;
+ }
+};
+
+
+/*
+ The below two are not used (and not handled) in this milestone of this WL
+ entry because there seems to be no use for them at this stage of
+ implementation. (TODO is that so??)
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY 2
+
+/*
+ Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+ 'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/*
+ The MRR user will provide ranges in key order, and MRR implementation
+ must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/*
+ The passed memory buffer is of maximum possible size, the caller can't
+ assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
/*
The handler class is the interface for dynamically loadable
storage engines. Do not add ifdefs and take care when adding or
@@ -746,12 +842,20 @@
time_t create_time; /* When table was created */
time_t check_time;
time_t update_time;
+
+ /* Multi range read implementation-used members: */
+ range_seq_t mrr_iter; /* MRR range sequence iterator being traversed */
+ RANGE_SEQ_IF mrr_funcs; /* Saved MRR range sequence traversal functions */
+ HANDLER_BUFFER *multi_range_buffer; /* Saved MRR buffer info */
+ uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+ /* TRUE <=> source MRR ranges and the output are ordered */
+ bool mrr_is_output_sorted;
+
+ /* TRUE <=> we're currently traversing a range in mrr_cur_range. */
+ bool mrr_have_range;
+ /* Current range (the one we're now returning rows from) */
+ KEY_MULTI_RANGE mrr_cur_range;
- /* The following are for read_multi_range */
- bool multi_range_sorted;
- KEY_MULTI_RANGE *multi_range_curr;
- KEY_MULTI_RANGE *multi_range_end;
- HANDLER_BUFFER *multi_range_buffer;
/* The following are for read_range() */
key_range save_end_range, *end_range;
@@ -826,10 +930,26 @@
table= table_arg;
table_share= share;
}
+ /* Estimates calculation */
virtual double scan_time()
{ return ulonglong2double(data_file_length) / IO_SIZE + 2; }
virtual double read_time(uint index, uint ranges, ha_rows rows)
{ return rows2double(ranges+rows); }
+
+ virtual double index_only_read_time(uint keynr, double records);
+
+ virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, COST_VECT *cost);
+ virtual int multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint *bufsz, uint *flags, COST_VECT *cost);
+ virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf);
+ virtual int multi_range_read_next(char **range_info);
+
+
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
virtual bool has_transactions(){ return 0;}
virtual uint extra_rec_buf_length() const { return 0; }
@@ -1119,10 +1239,6 @@
virtual int index_next_same(byte *buf, const byte *key, uint keylen);
virtual int index_read_last(byte * buf, const byte * key, uint key_len)
{ return (my_errno=HA_ERR_WRONG_COMMAND); }
- virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
- KEY_MULTI_RANGE *ranges, uint range_count,
- bool sorted, HANDLER_BUFFER *buffer);
- virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
virtual int read_range_first(const key_range *start_key,
const key_range *end_key,
bool eq_range, bool sorted);
diff -Nru a/sql/opt_range.cc b/sql/opt_range.cc
--- a/sql/opt_range.cc 2006-03-18 14:54:11 +03:00
+++ b/sql/opt_range.cc 2006-03-18 14:54:11 +03:00
@@ -35,7 +35,7 @@
The lists are returned in form of complicated structure of interlinked
SEL_TREE/SEL_IMERGE/SEL_ARG objects.
- See check_quick_keys, find_used_partitions for examples of how to walk
+ See quick_range_seq_next, find_used_partitions for examples of how to walk
this structure.
All direct "users" of this module are located within this file, too.
@@ -212,6 +212,7 @@
}
void store_min(uint length,char **min_key,uint min_key_flag)
{
+ /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
if ((min_flag & GEOM_FLAG) ||
(!(min_flag & NO_MIN_RANGE) &&
!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
@@ -226,6 +227,16 @@
(*min_key)+= length;
}
}
+
+ /*
+ SYNOPSIS
+ store()
+ length Key pack length
+ min_key INOUT store here and advance the pointer
+ min_key_flag prev-key-part
+ max_key INOUT store here and advance the pointer
+ max_key_flag prev-key-part
+ */
void store(uint length,char **min_key,uint min_key_flag,
char **max_key, uint max_key_flag)
{
@@ -244,12 +255,20 @@
}
}
+ /*
+ store_min_key()
+ key
+ range_key INOUT
+ range_key_flag INOUT
+ */
void store_min_key(KEY_PART *key,char **range_key, uint *range_key_flag)
{
- SEL_ARG *key_tree= first();
+ SEL_ARG *key_tree= first(); //find the left-most interval of this.
+ // left < X < +inf ... and amend with next key part(s) ???
key_tree->store(key[key_tree->part].store_length,
range_key,*range_key_flag,range_key,NO_MAX_RANGE);
*range_key_flag|= key_tree->min_flag;
+
if (key_tree->next_key_part &&
key_tree->next_key_part->part == key_tree->part+1 &&
!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)) &&
@@ -471,9 +490,6 @@
static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
-static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
- char *min_key,uint min_key_flag,
- char *max_key, uint max_key_flag);
QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
SEL_ARG *key_tree,
@@ -497,8 +513,6 @@
static int get_index_merge_params(PARAM *param, key_map& needed_reg,
SEL_IMERGE *imerge, double *read_time,
ha_rows* imerge_rows);
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
- int keynr);
#ifndef DBUG_OFF
static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
@@ -1938,9 +1952,10 @@
if (!head->used_keys.is_clear_all())
{
int key_for_use= find_shortest_key(head, &head->used_keys);
- double key_read_time= (get_index_only_read_time(¶m, records,
- key_for_use) +
- (double) records / TIME_FOR_COMPARE);
+ double key_read_time=
+ param.table->file->index_only_read_time(key_for_use,
+ rows2double(records)) +
+ (double) records / TIME_FOR_COMPARE;
DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
"read time %g", key_for_use, key_read_time));
if (key_read_time < read_time)
@@ -2715,7 +2730,7 @@
{
/*
Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and
- we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change
+ we got "const1 CMP fieldX CMP const2" interval
*/
DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
ppar->range_param.
@@ -3518,42 +3533,6 @@
}
-/*
- Calculate cost of 'index only' scan for given index and number of records.
-
- SYNOPSIS
- get_index_only_read_time()
- param parameters structure
- records #of records to read
- keynr key to read
-
- NOTES
- It is assumed that we will read trough the whole key range and that all
- key blocks are half full (normally things are much better). It is also
- assumed that each time we read the next key from the index, the handler
- performs a random seek, thus the cost is proportional to the number of
- blocks read.
-
- TODO:
- Move this to handler->read_time() by adding a flag 'index-only-read' to
- this call. The reason for doing this is that the current function doesn't
- handle the case when the row is stored in the b-tree (like in innodb
- clustered index)
-*/
-
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
- int keynr)
-{
- double read_time;
- uint keys_per_block= (param->table->file->block_size/2/
- (param->table->key_info[keynr].key_length+
- param->table->file->ref_length) + 1);
- read_time=((double) (records+keys_per_block-1)/
- (double) keys_per_block);
- return read_time;
-}
-
-
typedef struct st_ror_scan_info
{
uint idx; /* # of used key in param->keys */
@@ -3629,9 +3608,9 @@
if (bitmap_is_set(¶m->needed_fields, key_part->fieldnr))
bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
}
+ double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
ror_scan->index_read_cost=
- get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
- ror_scan->keynr);
+ param->table->file->index_only_read_time(ror_scan->keynr, rows);
DBUG_RETURN(ror_scan);
}
@@ -4398,6 +4377,8 @@
Best range read plan
NULL if no plan found or error occurred
*/
+ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, SEL_ARG *tree,
+ COST_VECT *cost);
static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
bool index_read_must_be_used,
@@ -4419,9 +4400,7 @@
"tree scans"););
tree->ror_scans_map.clear_all();
tree->n_ror_scans= 0;
- for (idx= 0,key=tree->keys, end=key+param->keys;
- key != end ;
- key++,idx++)
+ for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
{
ha_rows found_records;
double found_read_time;
@@ -4435,41 +4414,15 @@
bool read_index_only= index_read_must_be_used ? TRUE :
(bool) param->table->used_keys.is_set(keynr);
- found_records= check_quick_select(param, idx, *key);
- if (param->is_ror_scan)
+ COST_VECT cost;
+ found_records= check_quick_select(param, idx, read_index_only, *key,
+ &cost);
+ found_read_time= cost.total_cost();
+ if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
{
tree->n_ror_scans++;
tree->ror_scans_map.set_bit(idx);
}
- double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
- if (found_records != HA_POS_ERROR && found_records > 2 &&
- read_index_only &&
- (param->table->file->index_flags(keynr, param->max_key_part,1) &
- HA_KEYREAD_ONLY) &&
- !(pk_is_clustered && keynr == param->table->s->primary_key))
- {
- /*
- We can resolve this by only reading through this key.
- 0.01 is added to avoid races between range and 'index' scan.
- */
- found_read_time= get_index_only_read_time(param,found_records,keynr) +
- cpu_cost + 0.01;
- }
- else
- {
- /*
- cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
- The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
- */
- found_read_time= param->table->file->read_time(keynr,
- param->range_count,
- found_records) +
- cpu_cost + 0.01;
- }
- DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
- param->table->key_info[keynr].name, found_read_time,
- read_time));
-
if (read_time > found_read_time && found_records != HA_POS_ERROR
/*|| read_time == DBL_MAX*/ )
{
@@ -4477,7 +4430,6 @@
best_records= found_records;
key_to_read= key;
}
-
}
}
@@ -6437,212 +6389,233 @@
#endif
+/****************************************************************************
+ Implementation of MRR range sequence interface that traverses the SEL_ARG*
+ tree
+ ****************************************************************************/
-/*
- Calculate estimate of number records that will be retrieved by a range
- scan on given index using given SEL_ARG intervals tree.
- SYNOPSIS
- check_quick_select
- param Parameter from test_quick_select
- idx Number of index to use in PARAM::key SEL_TREE::key
- tree Transformed selection condition, tree->key[idx] holds intervals
- tree to be used for scanning.
- NOTES
- param->is_ror_scan is set to reflect if the key scan is a ROR (see
- is_key_scan_ror function for more info)
- param->table->quick_*, param->range_count (and maybe others) are
- updated with data of given key scan, see check_quick_keys for details.
+/* MRR range sequence, SEL_ARG* implementation: stack entry */
+typedef struct st_range_seq_entry
+{
+ /*
+ pointers in min and max keys. They point to right-after-end of key
+ images. The 0-th entry has these pointing to key tuple start.
+ */
+ char *min_key, *max_key;
+
+ /* Flags (accumulated with prev. keypart) */
+ uint min_key_flag, max_key_flag;
- RETURN
- Estimate # of records to be retrieved.
- HA_POS_ERROR if estimate calculation failed due to table handler problems.
+ SEL_ARG *key_tree;
+} RANGE_SEQ_ENTRY;
-*/
-static ha_rows
-check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
+/* MRR range sequence, SEL_ARG* implementation: enumeration context */
+typedef struct st_sel_arg_range_seq
{
- ha_rows records;
- bool cpk_scan;
- uint key;
- DBUG_ENTER("check_quick_select");
+ uint keyno; /* index of used tree in SEL_TREE structure */
+ uint real_keyno; /* Number of the index in tables */
+ PARAM *param;
- param->is_ror_scan= FALSE;
+ SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
+
+ RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
+ int i; //todo:psergey:
+
+ bool at_start; /* TRUE <=> The traversal has just started */
+} SEL_ARG_RANGE_SEQ;
- if (!tree)
- DBUG_RETURN(HA_POS_ERROR); // Can't use it
- param->max_key_part=0;
- param->range_count=0;
- key= param->real_keynr[idx];
- if (tree->type == SEL_ARG::IMPOSSIBLE)
- DBUG_RETURN(0L); // Impossible select. return
- if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
- DBUG_RETURN(HA_POS_ERROR); // Don't use tree
+/*
+ Range sequence interface, SEL_ARG* implementation: Initialize the traversal
+ SYNOPSIS
+ init()
+ init_params SEL_ARG tree traversal context
+ n_ranges [ignored] The number of ranges obtained
+ flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
- enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
- if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
- {
- /* Records are not ordered by rowid for other types of indexes. */
- cpk_scan= FALSE;
- }
- else
- {
- /*
- Clustered PK scan is a special case, check_quick_keys doesn't recognize
- CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
- */
- cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
- param->table->file->primary_key_is_clustered());
- param->is_ror_scan= !cpk_scan;
- }
+ RETURN
+ Same as what has been passed
+*/
- records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
- if (records != HA_POS_ERROR)
- {
- param->table->quick_keys.set_bit(key);
- param->table->quick_rows[key]=records;
- param->table->quick_key_parts[key]=param->max_key_part+1;
+range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+ SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
+ seq->at_start= TRUE;
- if (cpk_scan)
- param->is_ror_scan= TRUE;
- }
- if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
- param->is_ror_scan= FALSE;
- DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
- DBUG_RETURN(records);
+ seq->stack[0].key_tree= NULL;
+ seq->stack[0].min_key= seq->param->min_key;
+ seq->stack[0].min_key_flag= 0;
+ seq->stack[0].max_key= seq->param->max_key;
+ seq->stack[0].max_key_flag= 0;
+ seq->i= 0;
+ return init_param;
}
-/*
- Recursively calculate estimate of # rows that will be retrieved by
- key scan on key idx.
- SYNOPSIS
- check_quick_keys()
- param Parameter from test_quick select function.
- idx Number of key to use in PARAM::keys in list of used keys
- (param->real_keynr[idx] holds the key number in table)
- key_tree SEL_ARG tree being examined.
- min_key Buffer with partial min key value tuple
- min_key_flag
- max_key Buffer with partial max key value tuple
- max_key_flag
-
- NOTES
- The function does the recursive descent on the tree via SEL_ARG::left,
- SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
- are calculated using records_in_range calls at the leaf nodes and then
- summed.
+static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
+{
+ RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
+ RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
+
+ cur->key_tree= key_tree;
+ cur->min_key= prev->min_key;
+ cur->max_key= prev->max_key;
+
+ key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
+ &cur->min_key, prev->min_key_flag,
+ &cur->max_key, prev->max_key_flag);
+ cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
+ cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
- param->min_key and param->max_key are used to hold prefixes of key value
- tuples.
+ (arg->i)++;
+}
- The side effects are:
- param->max_key_part is updated to hold the maximum number of key parts used
- in scan minus 1.
+/*
+ Range sequence interface, SEL_ARG* implementation: get the next interval
+
+ SYNOPSIS
+ sel_arg_range_seq_next()
+ rseq Value returned from sel_arg_range_seq_init
+ range OUT Store information about the range here
- param->range_count is incremented if the function finds a range that
- wasn't counted by the caller.
+ DESCRIPTION
+ This is "get_next" function for Range sequence interface implementation
+ for SEL_ARG* tree.
- param->is_ror_scan is cleared if the function detects that the key scan is
- not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
- function for description of which key scans are ROR scans)
+ RETURN
+ 0 Ok
+ 1 No more ranges in the sequence
*/
-static ha_rows
-check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
- char *min_key,uint min_key_flag, char *max_key,
- uint max_key_flag)
+uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
{
- ha_rows records=0, tmp;
- uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
- char *tmp_min_key, *tmp_max_key;
-
- param->max_key_part=max(param->max_key_part,key_tree->part);
- if (key_tree->left != &null_element)
+ SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
+ SEL_ARG *key_tree, *parent;
+ uint tmp_min_flag, tmp_max_flag;
+ if (seq->at_start)
{
- /*
- There are at least two intervals for current key part, i.e. condition
- was converted to something like
- (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
- This is not a ROR scan if the key is not Clustered Primary Key.
- */
- param->is_ror_scan= FALSE;
- records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
- max_key,max_key_flag);
- if (records == HA_POS_ERROR) // Impossible
- return records;
+ key_tree= seq->start;
+ seq->at_start= FALSE;
+ goto walk_up_right;
}
- tmp_min_key= min_key;
- tmp_max_key= max_key;
- key_tree->store(param->key[idx][key_tree->part].store_length,
- &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
- min_key_length= (uint) (tmp_min_key- param->min_key);
- max_key_length= (uint) (tmp_max_key- param->max_key);
+ key_tree= seq->stack[seq->i].key_tree;
+ /* Ok, we're at some "full tuple" position in the tree */
+
+ /* Step down if we can */
+ if (key_tree->next && key_tree->next != &null_element)
+ {
+ //step down; (update the tuple, we'll step right and stay there)
+ seq->i--;
+ step_down_to(seq, key_tree->next);
+ key_tree= key_tree->next;
+ seq->param->is_ror_scan= FALSE;
+ goto walk_right_up;
+ }
- if (param->is_ror_scan)
+ /* Ok, can't step down, walk left until we can step down */
+ while (1)
{
- /*
- If the index doesn't cover entire key, mark the scan as non-ROR scan.
- Actually we're cutting off some ROR scans here.
- */
- uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
- key_part[key_tree->part].fieldnr - 1;
- if (param->table->field[fieldnr]->key_length() !=
- param->key[idx][key_tree->part].length)
- param->is_ror_scan= FALSE;
+ if (seq->i == 1) // can't step left
+ return 1;
+
+ /* Step left */
+ seq->i--;
+ key_tree= seq->stack[seq->i].key_tree;
+
+ /* Step down if we can */
+ if (key_tree->next && key_tree->next != &null_element)
+ {
+ // Step down; update the tuple
+ seq->i--;
+ step_down_to(seq, key_tree->next);
+ key_tree= key_tree->next;
+ break;
+ }
}
- if (key_tree->next_key_part &&
- key_tree->next_key_part->part == key_tree->part+1 &&
- key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
- { // const key as prefix
- if (min_key_length == max_key_length &&
- !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
- !key_tree->min_flag && !key_tree->max_flag)
- {
- tmp=check_quick_keys(param,idx,key_tree->next_key_part,
- tmp_min_key, min_key_flag | key_tree->min_flag,
- tmp_max_key, max_key_flag | key_tree->max_flag);
- goto end; // Ugly, but efficient
+ // Ok, we've stepped down from the path to previous tuple.
+ // Walk right-up while we can.
+walk_right_up:
+ // stepping right...
+ while (key_tree->next_key_part && key_tree->next_key_part != &null_element &&
+ key_tree->next_key_part->part == key_tree->part + 1)
+ {
+ // Here: check if current is not "t.field = const"
+ // if it is not, store next keypart as "addon" and break;
+ {
+ uint min_key_length= seq->stack[seq->i].min_key - seq->param->min_key;
+ uint max_key_length= seq->stack[seq->i].max_key - seq->param->max_key;
+ uint len= seq->stack[seq->i].min_key - seq->stack[seq->i-1].min_key;
+ if (!(min_key_length == max_key_length &&
+ !memcmp(seq->stack[seq->i-1].min_key, seq->stack[seq->i-1].max_key, len) &&
+ !key_tree->min_flag && !key_tree->max_flag))
+ {
+ seq->param->is_ror_scan= FALSE;
+ if (!key_tree->min_flag)
+ key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno],
+ &seq->stack[seq->i].min_key,
+ &seq->stack[seq->i].min_key_flag);
+ if (!key_tree->max_flag)
+ key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno],
+ &seq->stack[seq->i].max_key,
+ &seq->stack[seq->i].max_key_flag);
+ break;
+ }
}
- else
+
+ // Ok, current is "t.field=const" and there is next_key_part.
+ // step right, and walk up there.
+ key_tree= key_tree->next_key_part;
+
+walk_up_right:
+ while (key_tree->prev && key_tree->prev != &null_element)
{
- /* The interval for current key part is not c1 <= keyXpartY <= c1 */
- param->is_ror_scan= FALSE;
+ /* Step "up" */
+ key_tree= key_tree->prev;
}
-
- tmp_min_flag=key_tree->min_flag;
- tmp_max_flag=key_tree->max_flag;
- if (!tmp_min_flag)
- key_tree->next_key_part->store_min_key(param->key[idx], &tmp_min_key,
- &tmp_min_flag);
- if (!tmp_max_flag)
- key_tree->next_key_part->store_max_key(param->key[idx], &tmp_max_key,
- &tmp_max_flag);
- min_key_length= (uint) (tmp_min_key- param->min_key);
- max_key_length= (uint) (tmp_max_key- param->max_key);
+ step_down_to(seq, key_tree);
}
- else
+
+ /* Ok, we are at full tuple */
+ RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+ uint min_key_length= cur->min_key - seq->param->min_key;
+ uint max_key_length= cur->max_key - seq->param->max_key;
+
+ range->ptr= (char*)(int)(key_tree->part);
+ if (cur->min_key_flag & GEOM_FLAG)
{
- tmp_min_flag=min_key_flag | key_tree->min_flag;
- tmp_max_flag=max_key_flag | key_tree->max_flag;
- }
+ range->range_flag= cur->min_key_flag;
- keynr=param->real_keynr[idx];
- param->range_count++;
- if (!tmp_min_flag && ! tmp_max_flag &&
- (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
- (param->table->key_info[keynr].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
- HA_NOSAME &&
- min_key_length == max_key_length &&
- !memcmp(param->min_key,param->max_key,min_key_length))
- tmp=1; // Max one record
+ /* Here minimum contains also function code bits, and maximum is +inf */
+ range->start_key.key= (byte*)seq->param->min_key;
+ range->start_key.length= min_key_length;
+ range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+ }
else
{
- if (param->is_ror_scan)
+ range->range_flag= cur->min_key_flag | cur->max_key_flag;
+
+ range->start_key.key= (byte*)seq->param->min_key;
+ range->start_key.length= cur->min_key - seq->param->min_key;
+ range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
+ HA_READ_KEY_EXACT);
+
+ range->end_key.key= (byte*)seq->param->max_key;
+ range->end_key.length= cur->max_key - seq->param->max_key;
+ range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
+ HA_READ_AFTER_KEY);
+ if (!cur->min_key_flag && !cur->max_key_flag &&
+ (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
+ (seq->param->table->key_info[seq->real_keyno].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
+ HA_NOSAME &&
+ range->start_key.length == range->end_key.length &&
+ !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
+ range->range_flag= UNIQUE_RANGE;
+
+ if (seq->param->is_ror_scan)
{
/*
If we get here, the condition on the key was converted to form
@@ -6653,63 +6626,108 @@
uncovered "tail" of KeyX parts is either empty or is identical to
first members of clustered primary key.
*/
- if (!(min_key_length == max_key_length &&
- !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
- !key_tree->min_flag && !key_tree->max_flag &&
- is_key_scan_ror(param, keynr, key_tree->part + 1)))
- param->is_ror_scan= FALSE;
- }
-
- if (tmp_min_flag & GEOM_FLAG)
- {
- key_range min_range;
- min_range.key= (byte*) param->min_key;
- min_range.length= min_key_length;
- /* In this case tmp_min_flag contains the handler-read-function */
- min_range.flag= (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);
-
- tmp= param->table->file->records_in_range(keynr, &min_range,
- (key_range*) 0);
- }
- else
- {
- key_range min_range, max_range;
-
- min_range.key= (byte*) param->min_key;
- min_range.length= min_key_length;
- min_range.flag= (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
- HA_READ_KEY_EXACT);
- max_range.key= (byte*) param->max_key;
- max_range.length= max_key_length;
- max_range.flag= (tmp_max_flag & NEAR_MAX ?
- HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
- tmp=param->table->file->records_in_range(keynr,
- (min_key_length ? &min_range :
- (key_range*) 0),
- (max_key_length ? &max_range :
- (key_range*) 0));
+ if (!(!cur->min_key_flag && !cur->max_key_flag &&
+ (range->start_key.length == range->end_key.length) &&
+ !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
+ is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
+ seq->param->is_ror_scan= FALSE;
}
}
- end:
- if (tmp == HA_POS_ERROR) // Impossible range
- return tmp;
- records+=tmp;
- if (key_tree->right != &null_element)
+ seq->param->range_count++;
+ seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+ return 0;
+}
+
+
+/*
+ Calculate estimate of number records that will be retrieved by a range
+ scan on given index using given SEL_ARG intervals tree.
+ SYNOPSIS
+ check_quick_select
+ param Parameter from test_quick_select
+ idx Number of index to use in PARAM::key SEL_TREE::key
+ tree Transformed selection condition, tree->key[idx] holds intervals
+ tree to be used for scanning.
+ NOTES
+ param->is_ror_scan is set to reflect if the key scan is a ROR (see
+ is_key_scan_ror function for more info)
+ param->table->quick_*, param->range_count (and maybe others) are
+ updated with data of given key scan, see quick_range_seq_next for details.
+
+ RETURN
+ Estimate # of records to be retrieved.
+ HA_POS_ERROR if estimate calculation failed due to table handler problems.
+
+*/
+ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, SEL_ARG *tree,
+ COST_VECT *cost)
+{
+ DBUG_ENTER("check_quick_select");
+ SEL_ARG_RANGE_SEQ seq;
+ bool cpk_scan;
+
+ if (tree->type == SEL_ARG::IMPOSSIBLE)
+ DBUG_RETURN(0L); // Impossible select. return
+ if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
+ DBUG_RETURN(HA_POS_ERROR); // Don't use tree
+
+ seq.keyno= idx;
+ seq.real_keyno= param->real_keynr[idx];
+ seq.param= param;
+ seq.start= tree;
+
+ RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next};
+ uint bufsize= 0;
+ uint flags= 0;
+ ha_rows rows;
+
+ handler *file= param->table->file;
+ param->range_count=0;
+ param->max_key_part=0;
+ param->is_ror_scan= FALSE;
+
+ enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
+ if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
+ {
+ /* Records are not ordered by rowid for other types of indexes. */
+ cpk_scan= FALSE;
+ }
+ else
{
/*
- There are at least two intervals for current key part, i.e. condition
- was converted to something like
- (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
- This is not a ROR scan if the key is not Clustered Primary Key.
+ Clustered PK scan is a special case, quick_range_seq_next doesn't
+ recognize CPK scans as ROR scans (while actually any CPK scan is a ROR
+ scan).
*/
- param->is_ror_scan= FALSE;
- tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
- max_key,max_key_flag);
- if (tmp == HA_POS_ERROR)
- return tmp;
- records+=tmp;
+ cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
+ param->table->file->primary_key_is_clustered());
+ param->is_ror_scan= !cpk_scan;
}
- return records;
+
+ bool pk_is_clustered= file->primary_key_is_clustered();
+ uint keynr= param->real_keynr[idx];
+ if (index_only &&
+ (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
+ !(pk_is_clustered && keynr == param->table->s->primary_key))
+ flags |= HA_MRR_INDEX_ONLY;
+
+ rows= file->multi_range_read_info_const(param->real_keynr[idx], &seq_if,
+ (void*)&seq,
+ 0, // todo: n_ranges
+ &bufsize, &flags, cost);
+ if (rows != HA_POS_ERROR)
+ {
+ param->table->quick_keys.set_bit(seq.real_keyno);
+ param->table->quick_rows[seq.real_keyno]=rows;
+ param->table->quick_key_parts[seq.real_keyno]=param->max_key_part+1;
+
+ if (cpk_scan)
+ param->is_ror_scan= TRUE;
+ }
+ if (param->table->file->index_flags(seq.real_keyno, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
+ param->is_ror_scan= FALSE;
+ DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
+ DBUG_RETURN(rows);
}
@@ -6743,7 +6761,7 @@
Scans on HASH indexes are not ROR scans,
any range scan on clustered primary key is ROR scan (3)
- Check (1) is made in check_quick_keys()
+ Check (1) is made in quick_range_seq_next()
Check (3) is made check_quick_select()
Check (2) is made by this function.
@@ -6759,9 +6777,19 @@
KEY_PART_INFO *key_part_end= (table_key->key_part +
table_key->key_parts);
uint pk_number;
+
+ for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
+ {
+ uint16 fieldnr= param->table->key_info[keynr].
+ key_part[kp - table_key->key_part].fieldnr - 1;
+ if (param->table->field[fieldnr]->key_length() != kp->length)
+ return FALSE;
+ }
if (key_part == key_part_end)
return TRUE;
+
+ key_part= table_key->key_part + nparts;
pk_number= param->table->s->primary_key;
if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
return FALSE;
@@ -6971,7 +6999,20 @@
}
-/* Returns TRUE if any part of the key is NULL */
+
+/*
+ Return TRUE if any part of the key is NULL
+
+ SYNOPSIS
+ null_part_in_key()
+ key_part Array of key parts (index description)
+ key Key values tuple
+ length Length of key values tuple in bytes.
+
+ RETURN
+ TRUE The tuple has at least one "keypartX is NULL"
+ FALSE Otherwise
+*/
static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
{
@@ -7426,6 +7467,7 @@
DBUG_RETURN(error);
}
+
int QUICK_RANGE_SELECT::reset()
{
uint mrange_bufsiz;
@@ -7443,7 +7485,7 @@
if (multi_range_length)
{
DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
- DBUG_RETURN(0);
+ goto end;
}
/* Allocate the ranges array. */
@@ -7500,11 +7542,135 @@
bzero((char*) mrange_buff, mrange_bufsiz);
#endif
}
+end:
+ RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next};
+ file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
+ (sorted?HA_MRR_SORTED:0) | HA_MRR_NO_ASSOCIATION, multi_range_buff);
DBUG_RETURN(0);
}
/*
+ Range sequence interface implementation for array<QUICK_RANGE>: initialize
+
+ SYNOPSIS
+ quick_range_seq_init()
+ init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
+ n_ranges Number of ranges in the sequence (ignored)
+ flags MRR flags (currently not used)
+
+ RETURN
+ Opaque value to be passed to quick_range_seq_next
+*/
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+ QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
+ quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer;
+ quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer;
+ quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur +
+ quick->ranges.elements;
+ return &quick->qr_traversal_ctx;
+}
+
+
+/*
+ Range sequence interface implementation for array<QUICK_RANGE>: get next
+
+ SYNOPSIS
+ quick_range_seq_next()
+ rseq Value returned from quick_range_seq_init
+ range OUT Store information about the range here
+
+ RETURN
+ 0 Ok
+ 1 No more ranges in the sequence
+*/
+
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+ QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
+
+ if (ctx->cur == ctx->last)
+ return 1; /* no more ranges */
+
+ QUICK_RANGE *cur= *(ctx->cur);
+ key_range *start_key= &range->start_key;
+ key_range *end_key= &range->end_key;
+
+ start_key->key= (const byte*)cur->min_key;
+ start_key->length= cur->min_length;
+ start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
+ (cur->flag & EQ_RANGE) ?
+ HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
+ end_key->key= (const byte*)cur->max_key;
+ end_key->length= cur->max_length;
+ /*
+ We use HA_READ_AFTER_KEY here because if we are reading on a key
+ prefix. We want to find all keys with this prefix.
+ */
+ end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
+ HA_READ_AFTER_KEY);
+ range->range_flag= cur->flag;
+ ctx->cur++;
+ return 0;
+}
+
+
+/*
+ MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
+
+ SYNOPSIS
+ mrr_persistent_flag_storage()
+ seq Range sequence being traversed
+ idx Number of range
+
+ DESCRIPTION
+ MRR/NDB implementation needs to store some bits for each range. This
+ function returns a reference to the "range_flag" associated with the
+ range number idx.
+
+ This function should be removed when we get a proper MRR/NDB
+ implementation.
+
+ RETURN
+ Reference to range_flag associated with range number #idx
+*/
+
+uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
+{
+ QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
+ return ctx->first[idx]->flag;
+}
+
+
+/*
+ MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
+
+ SYNOPSIS
+ mrr_get_ptr_by_idx()
+ seq Range sequence bening traversed
+ idx Number of range
+
+ DESCRIPTION
+ Current MRR/NDB code traverses the sequence of ranges several times and
+ it accesses range-associated data on the second pass. Multiple range
+ sequence interface traversals are not allowed in the new MRR interface,
+ so this function retu
+
+ RETURN
+ Reference to range-associated data.
+*/
+
+char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
+{
+ static char *dummy;
+ QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
+ return dummy;
+}
+
+
+/*
Get next possible record using quick-struct.
SYNOPSIS
@@ -7521,71 +7687,8 @@
int QUICK_RANGE_SELECT::get_next()
{
- int result;
- KEY_MULTI_RANGE *mrange;
- key_range *start_key;
- key_range *end_key;
- DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
- DBUG_ASSERT(multi_range_length && multi_range &&
- (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
- (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
-
- for (;;)
- {
- if (in_range)
- {
- /* We did already start to read this key. */
- result= file->read_multi_range_next(&mrange);
- if (result != HA_ERR_END_OF_FILE)
- {
- in_range= ! result;
- DBUG_RETURN(result);
- }
- }
-
- uint count= min(multi_range_length, ranges.elements -
- (cur_range - (QUICK_RANGE**) ranges.buffer));
- if (count == 0)
- {
- /* Ranges have already been used up before. None is left for read. */
- in_range= FALSE;
- DBUG_RETURN(HA_ERR_END_OF_FILE);
- }
- KEY_MULTI_RANGE *mrange_slot, *mrange_end;
- for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
- mrange_slot < mrange_end;
- mrange_slot++)
- {
- start_key= &mrange_slot->start_key;
- end_key= &mrange_slot->end_key;
- range= *(cur_range++);
-
- start_key->key= (const byte*) range->min_key;
- start_key->length= range->min_length;
- start_key->flag= ((range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
- (range->flag & EQ_RANGE) ?
- HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
- end_key->key= (const byte*) range->max_key;
- end_key->length= range->max_length;
- /*
- We use HA_READ_AFTER_KEY here because if we are reading on a key
- prefix. We want to find all keys with this prefix.
- */
- end_key->flag= (range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
- HA_READ_AFTER_KEY);
-
- mrange_slot->range_flag= range->flag;
- }
-
- result= file->read_multi_range_first(&mrange, multi_range, count,
- sorted, multi_range_buff);
- if (result != HA_ERR_END_OF_FILE)
- {
- in_range= ! result;
- DBUG_RETURN(result);
- }
- in_range= FALSE; /* No matching rows; go to next set of ranges. */
- }
+ char *dummy;
+ return file->multi_range_read_next(&dummy);
}
@@ -8615,8 +8718,11 @@
cur_index_tree= get_index_range_tree(cur_index, tree, param,
&cur_param_idx);
/* Check if this range tree can be used for prefix retrieval. */
- cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
- cur_index_tree);
+ COST_VECT dummy;
+ cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
+ FALSE /*don't care*/,
+ cur_index_tree,
+ &dummy);
}
cost_group_min_max(table, cur_index_info, used_key_parts,
cur_group_key_parts, tree, cur_index_tree,
diff -Nru a/sql/opt_range.h b/sql/opt_range.h
--- a/sql/opt_range.h 2006-03-18 14:54:11 +03:00
+++ b/sql/opt_range.h 2006-03-18 14:54:11 +03:00
@@ -252,6 +252,22 @@
class PARAM;
class SEL_ARG;
+
+/*
+ MRR range sequence, array<QUICK_RANGE> implementation: sequence traversal
+ context.
+*/
+typedef struct st_quick_range_seq_ctx
+{
+ QUICK_RANGE **first;
+ QUICK_RANGE **cur;
+ QUICK_RANGE **last;
+} QUICK_RANGE_SEQ_CTX;
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags);
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+
+
/*
Quick select that does a range scan on a single key. The records are
returned in key order.
@@ -277,7 +293,7 @@
freed by QUICK_RANGE_SELECT) */
HANDLER_BUFFER *multi_range_buff; /* the handler buffer (allocated and
freed by QUICK_RANGE_SELECT) */
-
+ QUICK_RANGE_SEQ_CTX qr_traversal_ctx;
protected:
friend class TRP_ROR_INTERSECT;
friend
@@ -295,6 +311,9 @@
friend class QUICK_SELECT_DESC;
friend class QUICK_INDEX_MERGE_SELECT;
friend class QUICK_ROR_INTERSECT_SELECT;
+ friend uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+ friend range_seq_t quick_range_seq_init(void *init_param,
+ uint n_ranges, uint flags);
DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */
QUICK_RANGE **cur_range; /* current element in ranges */
diff -Nru a/sql/sql_select.cc b/sql/sql_select.cc
--- a/sql/sql_select.cc 2006-03-18 14:54:10 +03:00
+++ b/sql/sql_select.cc 2006-03-18 14:54:11 +03:00
@@ -3456,12 +3456,10 @@
if (table->used_keys.is_set(key))
{
/* we can use only index tree */
- uint keys_per_block= table->file->block_size/2/
- (keyinfo->key_length+table->file->ref_length)+1;
- tmp = record_count*(tmp+keys_per_block-1)/keys_per_block;
+ tmp= record_count * table->file->index_only_read_time(key, tmp);
}
else
- tmp = record_count*min(tmp,s->worst_seeks);
+ tmp= record_count * min(tmp,s->worst_seeks);
}
}
else
@@ -3546,12 +3544,10 @@
if (table->used_keys.is_set(key))
{
/* we can use only index tree */
- uint keys_per_block= table->file->block_size/2/
- (keyinfo->key_length+table->file->ref_length)+1;
- tmp = record_count*(tmp+keys_per_block-1)/keys_per_block;
+ tmp= record_count * table->file->index_only_read_time(key, tmp);
}
else
- tmp = record_count*min(tmp,s->worst_seeks);
+ tmp = record_count * min(tmp,s->worst_seeks);
}
else
tmp = best_time; // Do nothing
@@ -4468,12 +4464,10 @@
if (table->used_keys.is_set(key))
{
/* we can use only index tree */
- uint keys_per_block= table->file->block_size/2/
- (keyinfo->key_length+table->file->ref_length)+1;
- tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
+ tmp= record_count*table->file->index_only_read_time(key, tmp);
}
else
- tmp=record_count*min(tmp,s->worst_seeks);
+ tmp= record_count*min(tmp,s->worst_seeks);
}
}
else
@@ -4554,12 +4548,11 @@
if (table->used_keys.is_set(key))
{
/* we can use only index tree */
- uint keys_per_block= table->file->block_size/2/
- (keyinfo->key_length+table->file->ref_length)+1;
- tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
+ tmp= record_count * table->file->index_only_read_time(key,
+ tmp);
}
else
- tmp=record_count*min(tmp,s->worst_seeks);
+ tmp=record_count * min(tmp,s->worst_seeks);
}
else
tmp=best_time; // Do nothing
BR
Sergey
--
Sergey Petrunia, Software Developer
MySQL AB, www.mysql.com
Office: N/A
| Thread |
|---|
| • bk commit - 5.2 tree (sergefp:1.2154) WL#2474 | Sergey Petrunia | 18 Mar |