Below is the list of changes that have just been committed into a local
6.0 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-01-12 02:03:55+03:00, sergefp@stripped +4 -0
BUG#33257 "range access on partitioned falcon table actually scans entire indexes"
- Make ha_partition to pass read_range_XXX() calls to partition handlers, without
converting
them to index_read/index_next-family calls.
This is needed for Falcon (which is terribly inefficient when it doesn't get the other
endpoint) and possibly other storage engines with similar characteristics.
This patch also fixes BUG#30573 (bk trigger: mark as fix for BUG#33257)
mysql-test/r/partition_range.result@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped
+20 -0
BUG#30573: Testcase
mysql-test/t/partition_range.test@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +18
-18
BUG#30573: Testcase
sql/ha_partition.cc@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +114 -65
BUG#33257 "range access on partitioned falcon table actually scans entire indexes"
- Make ha_partition to pass read_range_XXX() calls to partition handlers, without
converting
them to index_read/index_next-family calls.
sql/ha_partition.h@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +3 -6
BUG#33257 "range access on partitioned falcon table actually scans entire indexes"
- Make ha_partition to pass read_range_XXX() calls to partition handlers, without
converting
them to index_read/index_next-family calls.
diff -Nrup a/mysql-test/r/partition_range.result b/mysql-test/r/partition_range.result
--- a/mysql-test/r/partition_range.result 2007-12-13 20:34:03 +03:00
+++ b/mysql-test/r/partition_range.result 2008-01-12 02:03:44 +03:00
@@ -709,3 +709,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004-
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p407,p408,p409,p507,p508,p509 ALL NULL NULL NULL NULL 18 Using where
DROP TABLE t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+defid int(10) unsigned NOT NULL,
+day int(10) unsigned NOT NULL,
+count int(10) unsigned NOT NULL,
+filler char(200),
+KEY (defid,day)
+)
+PARTITION BY RANGE (day) (
+PARTITION p7 VALUES LESS THAN (20070401) ,
+PARTITION p8 VALUES LESS THAN (20070501));
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and
20070401 group by defid;
+sum(count)
+579
+drop table t1, t2;
diff -Nrup a/mysql-test/t/partition_range.test b/mysql-test/t/partition_range.test
--- a/mysql-test/t/partition_range.test 2007-12-13 20:34:03 +03:00
+++ b/mysql-test/t/partition_range.test 2008-01-12 02:03:44 +03:00
@@ -762,24 +762,24 @@ DROP TABLE t1;
#
# BUG#30573: get wrong result with "group by" on PARTITIONed table
#
-#create table t1 (a int);
-#insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-#CREATE TABLE t2 (
-# defid int(10) unsigned NOT NULL,
-# day int(10) unsigned NOT NULL,
-# count int(10) unsigned NOT NULL,
-# filler char(200),
-# KEY (defid,day)
-#)
-#PARTITION BY RANGE (day) (
-# PARTITION p7 VALUES LESS THAN (20070401) ,
-# PARTITION p8 VALUES LESS THAN (20070501));
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+ defid int(10) unsigned NOT NULL,
+ day int(10) unsigned NOT NULL,
+ count int(10) unsigned NOT NULL,
+ filler char(200),
+ KEY (defid,day)
+)
+PARTITION BY RANGE (day) (
+ PARTITION p7 VALUES LESS THAN (20070401) ,
+ PARTITION p8 VALUES LESS THAN (20070501));
-#insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
-#insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
-#insert into t2 values(52, 20070321, 123, 'filler') ;
-#insert into t2 values(52, 20070322, 456, 'filler') ;
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
-#select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and
20070401 group by defid;
-#drop table t1, t2;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and
20070401 group by defid;
+drop table t1, t2;
diff -Nrup a/sql/ha_partition.cc b/sql/ha_partition.cc
--- a/sql/ha_partition.cc 2007-12-13 20:40:36 +03:00
+++ b/sql/ha_partition.cc 2008-01-12 02:03:44 +03:00
@@ -3525,10 +3525,12 @@ int ha_partition::index_read_map(uchar *
enum ha_rkey_function find_flag)
{
DBUG_ENTER("ha_partition::index_read_map");
-
end_range= 0;
m_index_scan_type= partition_index_read;
- DBUG_RETURN(common_index_read(buf, key, keypart_map, find_flag));
+ m_start_key.key= key;
+ m_start_key.keypart_map= keypart_map;
+ m_start_key.flag= find_flag;
+ DBUG_RETURN(common_index_read(buf, TRUE));
}
@@ -3536,43 +3538,62 @@ int ha_partition::index_read_map(uchar *
Common routine for a number of index_read variants
SYNOPSIS
- common_index_read
-
- see index_read for rest
+ ha_partition::common_index_read()
+ buf Buffer where the record should be returned
+ have_start_key TRUE <=> the left endpoint is available, i.e.
+ we're in index_read call or in read_range_first
+ call and the range has left endpoint
+
+ FALSE <=> there is no left endpoint (we're in
+ read_range_first() call and the range has no left
+ endpoint)
+
+ DESCRIPTION
+ Start scanning the range (when invoked from read_range_first()) or doing
+ an index lookup (when invoked from index_read_XXX):
+ - If possible, perform partition selection
+ - Find the set of partitions we're going to use
+ - Depending on whether we need ordering:
+ NO: Get the first record from first used partition (see
+ handle_unordered_scan_next_partition)
+ YES: Fill the priority queue and get the record that is the first in
+ the ordering
+
+ RETURN
+ 0 OK
+ other HA_ERR_END_OF_FILE or other error code.
*/
-int ha_partition::common_index_read(uchar *buf, const uchar *key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag)
+int ha_partition::common_index_read(uchar *buf, bool have_start_key)
{
int error;
+ uint key_len;
bool reverse_order= FALSE;
- bool index_read_flag= !(key == NULL);
- uint key_len= calculate_key_len(table, active_index, key, keypart_map);
DBUG_ENTER("ha_partition::common_index_read");
+ LINT_INIT(key_len); /* used iff have_start_key==TRUE */
- if (index_read_flag)
+ if (have_start_key)
{
- memcpy((void*)m_start_key.key, key, key_len);
- m_start_key.keypart_map= keypart_map;
- m_start_key.length= key_len;
- m_start_key.flag= find_flag;
+ m_start_key.length= key_len= calculate_key_len(table, active_index,
+ m_start_key.key,
+ m_start_key.keypart_map);
}
- if ((error= partition_scan_set_up(buf, index_read_flag)))
+ if ((error= partition_scan_set_up(buf, have_start_key)))
{
DBUG_RETURN(error);
}
- if (find_flag == HA_READ_PREFIX_LAST ||
- find_flag == HA_READ_PREFIX_LAST_OR_PREV ||
- find_flag == HA_READ_BEFORE_KEY)
+
+ if (have_start_key &&
+ (m_start_key.flag == HA_READ_PREFIX_LAST ||
+ m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
+ m_start_key.flag == HA_READ_BEFORE_KEY))
{
reverse_order= TRUE;
m_ordered_scan_ongoing= TRUE;
}
if (!m_ordered_scan_ongoing ||
- (find_flag == HA_READ_KEY_EXACT &&
- (key_len >= m_curr_key_info->key_length ||
- key_len == 0)))
+ (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
+ (key_len >= m_curr_key_info->key_length || key_len == 0)))
{
/*
We use unordered index scan either when read_range is used and flag
@@ -3664,7 +3685,7 @@ int ha_partition::index_last(uchar * buf
Common routine for index_first/index_last
SYNOPSIS
- common_index_first_last
+ ha_partition::common_first_last()
see index_first for rest
*/
@@ -3708,7 +3729,10 @@ int ha_partition::index_read_last_map(uc
m_ordered= TRUE; // Safety measure
end_range= 0;
m_index_scan_type= partition_index_read_last;
- DBUG_RETURN(common_index_read(buf, key, keypart_map, HA_READ_PREFIX_LAST));
+ m_start_key.key= key;
+ m_start_key.keypart_map= keypart_map;
+ m_start_key.flag= HA_READ_PREFIX_LAST;
+ DBUG_RETURN(common_index_read(buf, TRUE));
}
@@ -3827,9 +3851,9 @@ int ha_partition::read_range_first(const
{
int error;
DBUG_ENTER("ha_partition::read_range_first");
-
m_ordered= sorted;
eq_range= eq_range_arg;
+
end_range= 0;
if (end_key)
{
@@ -3839,27 +3863,20 @@ int ha_partition::read_range_first(const
((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
(end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
}
- range_key_part= m_curr_key_info->key_part;
- if (!start_key) // Read first record
- {
- if (m_ordered)
- m_index_scan_type= partition_index_first;
- else
- m_index_scan_type= partition_index_first_unordered;
- error= common_first_last(m_rec0);
- }
+ range_key_part= m_curr_key_info->key_part;
+ if (start_key)
+ m_start_key= *start_key;
else
- {
- m_index_scan_type= partition_index_read;
- error= common_index_read(m_rec0,
- start_key->key,
- start_key->keypart_map, start_key->flag);
- }
+ m_start_key.key= NULL;
+
+ m_index_scan_type= partition_read_range;
+ error= common_index_read(m_rec0, test(start_key));
DBUG_RETURN(error);
}
+
/*
Read next record in read of a range with start and end key
@@ -3877,26 +3894,35 @@ int ha_partition::read_range_next()
if (m_ordered)
{
- DBUG_RETURN(handler::read_range_next());
+ DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
}
- DBUG_RETURN(handle_unordered_next(m_rec0, eq_range));
+ DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
}
/*
- Common routine to set up scans
+ Common routine to set up index scans scans
SYNOPSIS
- buf Buffer to later return record in
- idx_read_flag Is it index scan
+ ha_partition::partition_scan_set_up()
+ buf Buffer to later return record in (this function
+ needs it to calculate partitioning function values)
+
+ idx_read_flag TRUE <=> m_start_key has range start endpoint which
+ probably can be used to determine the set of partitions
+ to scan.
+ FALSE <=> there is no start endpoint.
+
+ DESCRIPTION
+ Find out which partitions we'll need to read when scanning the specified
+ range.
+
+ If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
+ as we will not need to do merge ordering.
RETURN VALUE
>0 Error code
0 Success
-
- DESCRIPTION
- This is where we check which partitions to actually scan if not all
- of them
*/
int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
@@ -3978,6 +4004,7 @@ int ha_partition::partition_scan_set_up(
all fields in the partition function is bound. In this case the index
scan is performed on only one partition and thus it isn't necessary to
perform any sort.
+
*/
int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
@@ -3987,10 +4014,19 @@ int ha_partition::handle_unordered_next(
DBUG_ENTER("ha_partition::handle_unordered_next");
/*
- We should consider if this should be split into two functions as
- next_same is alwas a local constant
+ We should consider if this should be split into three functions as
+ partition_read_range is_next_same are always local constants
*/
- if (is_next_same)
+
+ if (m_index_scan_type == partition_read_range)
+ {
+ if (!(error= file->read_range_next()))
+ {
+ m_last_part= m_part_spec.start_part;
+ DBUG_RETURN(0);
+ }
+ }
+ else if (is_next_same)
{
if (!(error= file->index_next_same(buf, m_start_key.key,
m_start_key.length)))
@@ -3999,15 +4035,13 @@ int ha_partition::handle_unordered_next(
DBUG_RETURN(0);
}
}
- else if (!(error= file->index_next(buf)))
+ else
{
- if (!(file->table_flags() & HA_READ_ORDER) ||
- compare_key(end_range) <= 0)
+ if (!(error= file->index_next(buf)))
{
m_last_part= m_part_spec.start_part;
DBUG_RETURN(0); // Row was in range
}
- error= HA_ERR_END_OF_FILE;
}
if (error == HA_ERR_END_OF_FILE)
@@ -4051,6 +4085,10 @@ int ha_partition::handle_unordered_scan_
file= m_file[i];
m_part_spec.start_part= i;
switch (m_index_scan_type) {
+ case partition_read_range:
+ DBUG_PRINT("info", ("read_range_first on partition %d", i));
+ error= file->read_range_first(&m_start_key, end_range, eq_range, FALSE);
+ break;
case partition_index_read:
DBUG_PRINT("info", ("index_read on partition %d", i));
error= file->index_read_map(buf, m_start_key.key,
@@ -4079,13 +4117,8 @@ int ha_partition::handle_unordered_scan_
}
if (!error)
{
- if (!(file->table_flags() & HA_READ_ORDER) ||
- compare_key(end_range) <= 0)
- {
- m_last_part= i;
- DBUG_RETURN(0);
- }
- error= HA_ERR_END_OF_FILE;
+ m_last_part= i;
+ DBUG_RETURN(0);
}
if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
DBUG_RETURN(error);
@@ -4164,6 +4197,17 @@ int ha_partition::handle_ordered_index_s
m_start_key.keypart_map);
reverse_order= TRUE;
break;
+ case partition_read_range:
+ {
+ /*
+ This can only read record to table->record[0], as it was set when
+ the table was being opened. We have to memcpy data ourselves.
+ */
+ error= file->read_range_first(&m_start_key, end_range, eq_range, TRUE);
+ memcpy(rec_buf_ptr, table->record[0], m_rec_length);
+ reverse_order= FALSE;
+ break;
+ }
default:
DBUG_ASSERT(FALSE);
DBUG_RETURN(HA_ERR_END_OF_FILE);
@@ -4244,8 +4288,13 @@ int ha_partition::handle_ordered_next(uc
uint part_id= m_top_entry;
handler *file= m_file[part_id];
DBUG_ENTER("ha_partition::handle_ordered_next");
-
- if (!is_next_same)
+
+ if (m_index_scan_type == partition_read_range)
+ {
+ error= file->read_range_next();
+ memcpy(rec_buf(part_id), table->record[0], m_rec_length);
+ }
+ else if (!is_next_same)
error= file->index_next(rec_buf(part_id));
else
error= file->index_next_same(rec_buf(part_id), m_start_key.key,
diff -Nrup a/sql/ha_partition.h b/sql/ha_partition.h
--- a/sql/ha_partition.h 2007-10-30 16:09:49 +03:00
+++ b/sql/ha_partition.h 2008-01-12 02:03:44 +03:00
@@ -49,7 +49,8 @@ private:
partition_index_first_unordered= 2,
partition_index_last= 3,
partition_index_read_last= 4,
- partition_no_index_scan= 5
+ partition_read_range = 5,
+ partition_no_index_scan= 6
};
/* Data for the partition handler */
int m_mode; // Open mode
@@ -63,8 +64,6 @@ private:
handler **m_reorged_file; // Reorganised partitions
handler **m_added_file; // Added parts kept for errors
partition_info *m_part_info; // local reference to partition
- uchar *m_start_key_ref; // Reference of start key in current
- // index scan info
Field **m_part_field_array; // Part field array locally to save acc
uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
KEY *m_curr_key_info; // Current index
@@ -429,9 +428,7 @@ public:
virtual int read_range_next();
private:
- int common_index_read(uchar * buf, const uchar * key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag);
+ int common_index_read(uchar * buf, bool have_start_key);
int common_first_last(uchar * buf);
int partition_scan_set_up(uchar * buf, bool idx_read_flag);
int handle_unordered_next(uchar * buf, bool next_same);
| Thread |
|---|
| • bk commit into 6.0 tree (sergefp:1.2766) BUG#33257 | Sergey Petrunia | 12 Jan |