#At file:///Users/mattiasj/clones/bzrroot/b30573-5129rc/
2688 Mattias Jonsson 2008-09-18
Bug#30573: Ordered range scan over partitioned tables returns some rows twice
and
Bug#33555: Group By Query does not correctly aggregate partitions
Backport of bug-33257 which is the same bug.
read_range_*() calls was not passed to the partition handlers,
but was translated to index_read/next family calls.
Resulting in duplicates rows and wrong aggregations.
modified:
mysql-test/r/partition_range.result
mysql-test/t/partition_range.test
sql/ha_partition.cc
sql/ha_partition.h
per-file messages:
mysql-test/r/partition_range.result
Bug#30573: Ordered range scan over partitioned tables returns some rows twice
Updated result file
mysql-test/t/partition_range.test
Bug#30573: Ordered range scan over partitioned tables returns some rows twice
Re-enabled the test
sql/ha_partition.cc
Bug#30573: Ordered range scan over partitioned tables returns some rows twice
backport of bug-33257, correct handling of read_range_* calls,
without converting them to index_read/next calls
sql/ha_partition.h
Bug#30573: Ordered range scan over partitioned tables returns some rows twice
backport of bug-33257, correct handling of read_range_* calls,
without converting them to index_read/next calls
=== modified file 'mysql-test/r/partition_range.result'
--- a/mysql-test/r/partition_range.result 2008-07-07 20:42:19 +0000
+++ b/mysql-test/r/partition_range.result 2008-09-18 19:49:34 +0000
@@ -742,3 +742,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004-
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p407,p408,p409,p507,p508,p509 ALL NULL NULL NULL NULL 18 Using where
DROP TABLE t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+defid int(10) unsigned NOT NULL,
+day int(10) unsigned NOT NULL,
+count int(10) unsigned NOT NULL,
+filler char(200),
+KEY (defid,day)
+)
+PARTITION BY RANGE (day) (
+PARTITION p7 VALUES LESS THAN (20070401) ,
+PARTITION p8 VALUES LESS THAN (20070501));
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
+sum(count)
+579
+drop table t1, t2;
=== modified file 'mysql-test/t/partition_range.test'
--- a/mysql-test/t/partition_range.test 2008-02-13 10:29:50 +0000
+++ b/mysql-test/t/partition_range.test 2008-09-18 19:49:34 +0000
@@ -807,24 +807,24 @@ DROP TABLE t1;
#
# BUG#30573: get wrong result with "group by" on PARTITIONed table
#
-#create table t1 (a int);
-#insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-#CREATE TABLE t2 (
-# defid int(10) unsigned NOT NULL,
-# day int(10) unsigned NOT NULL,
-# count int(10) unsigned NOT NULL,
-# filler char(200),
-# KEY (defid,day)
-#)
-#PARTITION BY RANGE (day) (
-# PARTITION p7 VALUES LESS THAN (20070401) ,
-# PARTITION p8 VALUES LESS THAN (20070501));
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+ defid int(10) unsigned NOT NULL,
+ day int(10) unsigned NOT NULL,
+ count int(10) unsigned NOT NULL,
+ filler char(200),
+ KEY (defid,day)
+)
+PARTITION BY RANGE (day) (
+ PARTITION p7 VALUES LESS THAN (20070401) ,
+ PARTITION p8 VALUES LESS THAN (20070501));
-#insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
-#insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
-#insert into t2 values(52, 20070321, 123, 'filler') ;
-#insert into t2 values(52, 20070322, 456, 'filler') ;
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
-#select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
-#drop table t1, t2;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
+drop table t1, t2;
=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc 2008-08-20 15:29:14 +0000
+++ b/sql/ha_partition.cc 2008-09-18 19:49:34 +0000
@@ -3679,10 +3679,12 @@ int ha_partition::index_read_map(uchar *
enum ha_rkey_function find_flag)
{
DBUG_ENTER("ha_partition::index_read_map");
-
end_range= 0;
m_index_scan_type= partition_index_read;
- DBUG_RETURN(common_index_read(buf, key, keypart_map, find_flag));
+ m_start_key.key= key;
+ m_start_key.keypart_map= keypart_map;
+ m_start_key.flag= find_flag;
+ DBUG_RETURN(common_index_read(buf, TRUE));
}
@@ -3690,41 +3692,63 @@ int ha_partition::index_read_map(uchar *
Common routine for a number of index_read variants
SYNOPSIS
- common_index_read
-
- see index_read for rest
+ ha_partition::common_index_read()
+ buf Buffer where the record should be returned
+ have_start_key TRUE <=> the left endpoint is available, i.e.
+ we're in index_read call or in read_range_first
+ call and the range has left endpoint
+
+ FALSE <=> there is no left endpoint (we're in
+ read_range_first() call and the range has no left
+ endpoint)
+
+ DESCRIPTION
+ Start scanning the range (when invoked from read_range_first()) or doing
+ an index lookup (when invoked from index_read_XXX):
+ - If possible, perform partition selection
+ - Find the set of partitions we're going to use
+ - Depending on whether we need ordering:
+ NO: Get the first record from first used partition (see
+ handle_unordered_scan_next_partition)
+ YES: Fill the priority queue and get the record that is the first in
+ the ordering
+
+ RETURN
+ 0 OK
+ other HA_ERR_END_OF_FILE or other error code.
*/
-int ha_partition::common_index_read(uchar *buf, const uchar *key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag)
+int ha_partition::common_index_read(uchar *buf, bool have_start_key)
{
int error;
+ uint key_len;
bool reverse_order= FALSE;
- uint key_len= calculate_key_len(table, active_index, key, keypart_map);
DBUG_ENTER("ha_partition::common_index_read");
+ LINT_INIT(key_len); /* used if have_start_key==TRUE */
- memcpy((void*)m_start_key.key, key, key_len);
- m_start_key.keypart_map= keypart_map;
- m_start_key.length= key_len;
- m_start_key.flag= find_flag;
-
- if ((error= partition_scan_set_up(buf, TRUE)))
+ if (have_start_key)
+ {
+ m_start_key.length= key_len= calculate_key_len(table, active_index,
+ m_start_key.key,
+ m_start_key.keypart_map);
+ }
+ if ((error= partition_scan_set_up(buf, have_start_key)))
{
DBUG_RETURN(error);
}
- if (find_flag == HA_READ_PREFIX_LAST ||
- find_flag == HA_READ_PREFIX_LAST_OR_PREV ||
- find_flag == HA_READ_BEFORE_KEY)
+
+ if (have_start_key &&
+ (m_start_key.flag == HA_READ_PREFIX_LAST ||
+ m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
+ m_start_key.flag == HA_READ_BEFORE_KEY))
{
reverse_order= TRUE;
m_ordered_scan_ongoing= TRUE;
}
if (!m_ordered_scan_ongoing ||
- (find_flag == HA_READ_KEY_EXACT &&
- (key_len >= m_curr_key_info->key_length ||
- key_len == 0)))
- {
+ (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
+ (key_len >= m_curr_key_info->key_length || key_len == 0)))
+ {
/*
We use unordered index scan either when read_range is used and flag
is set to not use ordered or when an exact key is used and in this
@@ -3815,7 +3839,7 @@ int ha_partition::index_last(uchar * buf
Common routine for index_first/index_last
SYNOPSIS
- common_index_first_last
+ ha_partition::common_first_last()
see index_first for rest
*/
@@ -3859,7 +3883,10 @@ int ha_partition::index_read_last_map(uc
m_ordered= TRUE; // Safety measure
end_range= 0;
m_index_scan_type= partition_index_read_last;
- DBUG_RETURN(common_index_read(buf, key, keypart_map, HA_READ_PREFIX_LAST));
+ m_start_key.key= key;
+ m_start_key.keypart_map= keypart_map;
+ m_start_key.flag= HA_READ_PREFIX_LAST;
+ DBUG_RETURN(common_index_read(buf, TRUE));
}
@@ -3990,23 +4017,15 @@ int ha_partition::read_range_first(const
((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
(end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
}
- range_key_part= m_curr_key_info->key_part;
- if (!start_key) // Read first record
- {
- if (m_ordered)
- m_index_scan_type= partition_index_first;
- else
- m_index_scan_type= partition_index_first_unordered;
- error= common_first_last(m_rec0);
- }
+ range_key_part= m_curr_key_info->key_part;
+ if (start_key)
+ m_start_key= *start_key;
else
- {
- m_index_scan_type= partition_index_read;
- error= common_index_read(m_rec0,
- start_key->key,
- start_key->keypart_map, start_key->flag);
- }
+ m_start_key.key= NULL;
+
+ m_index_scan_type= partition_read_range;
+ error= common_index_read(m_rec0, test(start_key));
DBUG_RETURN(error);
}
@@ -4028,26 +4047,36 @@ int ha_partition::read_range_next()
if (m_ordered)
{
- DBUG_RETURN(handler::read_range_next());
+ DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
}
- DBUG_RETURN(handle_unordered_next(m_rec0, eq_range));
+ DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
}
/*
- Common routine to set up scans
+ Common routine to set up index scans
SYNOPSIS
- buf Buffer to later return record in
- idx_read_flag Is it index scan
+ ha_partition::partition_scan_set_up()
+ buf Buffer to later return record in (this function
+ needs it to calculcate partitioning function
+ values)
+
+ idx_read_flag TRUE <=> m_start_key has range start endpoint which
+ probably can be used to determine the set of partitions
+ to scan.
+ FALSE <=> there is no start endpoint.
+
+ DESCRIPTION
+ Find out which partitions we'll need to read when scanning the specified
+ range.
+
+ If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
+ as we will not need to do merge ordering.
RETURN VALUE
>0 Error code
0 Success
-
- DESCRIPTION
- This is where we check which partitions to actually scan if not all
- of them
*/
int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
@@ -4138,10 +4167,19 @@ int ha_partition::handle_unordered_next(
DBUG_ENTER("ha_partition::handle_unordered_next");
/*
- We should consider if this should be split into two functions as
- next_same is alwas a local constant
+ We should consider if this should be split into three functions as
+ partition_read_range is_next_same are always local constants
*/
- if (is_next_same)
+
+ if (m_index_scan_type == partition_read_range)
+ {
+ if (!(error= file->read_range_next()))
+ {
+ m_last_part= m_part_spec.start_part;
+ DBUG_RETURN(0);
+ }
+ }
+ else if (is_next_same)
{
if (!(error= file->index_next_same(buf, m_start_key.key,
m_start_key.length)))
@@ -4150,15 +4188,13 @@ int ha_partition::handle_unordered_next(
DBUG_RETURN(0);
}
}
- else if (!(error= file->index_next(buf)))
+ else
{
- if (!(file->index_flags(active_index, 0, 1) & HA_READ_ORDER) ||
- compare_key(end_range) <= 0)
+ if (!(error= file->index_next(buf)))
{
m_last_part= m_part_spec.start_part;
DBUG_RETURN(0); // Row was in range
}
- error= HA_ERR_END_OF_FILE;
}
if (error == HA_ERR_END_OF_FILE)
@@ -4202,6 +4238,11 @@ int ha_partition::handle_unordered_scan_
file= m_file[i];
m_part_spec.start_part= i;
switch (m_index_scan_type) {
+ case partition_read_range:
+ DBUG_PRINT("info", ("read_range_first on partition %d", i));
+ error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
+ end_range, eq_range, FALSE);
+ break;
case partition_index_read:
DBUG_PRINT("info", ("index_read on partition %d", i));
error= file->index_read_map(buf, m_start_key.key,
@@ -4230,13 +4271,8 @@ int ha_partition::handle_unordered_scan_
}
if (!error)
{
- if (!(file->index_flags(active_index, 0, 1) & HA_READ_ORDER) ||
- compare_key(end_range) <= 0)
- {
- m_last_part= i;
- DBUG_RETURN(0);
- }
- error= HA_ERR_END_OF_FILE;
+ m_last_part= i;
+ DBUG_RETURN(0);
}
if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
DBUG_RETURN(error);
@@ -4315,6 +4351,17 @@ int ha_partition::handle_ordered_index_s
m_start_key.keypart_map);
reverse_order= TRUE;
break;
+ case partition_read_range:
+ {
+ /*
+ This can only read record to table->record[0], as it was set when
+ the table was being opened. We have to memcpy data ourselves.
+ */
+ error= file->read_range_first(&m_start_key, end_range, eq_range, TRUE);
+ memcpy(rec_buf_ptr, table->record[0], m_rec_length);
+ reverse_order= FALSE;
+ break;
+ }
default:
DBUG_ASSERT(FALSE);
DBUG_RETURN(HA_ERR_END_OF_FILE);
@@ -4395,8 +4442,13 @@ int ha_partition::handle_ordered_next(uc
uint part_id= m_top_entry;
handler *file= m_file[part_id];
DBUG_ENTER("ha_partition::handle_ordered_next");
-
- if (!is_next_same)
+
+ if (m_index_scan_type == partition_read_range)
+ {
+ error= file->read_range_next();
+ memcpy(rec_buf(part_id), table->record[0], m_rec_length);
+ }
+ else if (!is_next_same)
error= file->index_next(rec_buf(part_id));
else
error= file->index_next_same(rec_buf(part_id), m_start_key.key,
=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h 2008-08-11 18:06:08 +0000
+++ b/sql/ha_partition.h 2008-09-18 19:49:34 +0000
@@ -49,7 +49,8 @@ private:
partition_index_first_unordered= 2,
partition_index_last= 3,
partition_index_read_last= 4,
- partition_no_index_scan= 5
+ partition_read_range = 5,
+ partition_no_index_scan= 6
};
/* Data for the partition handler */
int m_mode; // Open mode
@@ -63,8 +64,6 @@ private:
handler **m_reorged_file; // Reorganised partitions
handler **m_added_file; // Added parts kept for errors
partition_info *m_part_info; // local reference to partition
- uchar *m_start_key_ref; // Reference of start key in current
- // index scan info
Field **m_part_field_array; // Part field array locally to save acc
uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
KEY *m_curr_key_info; // Current index
@@ -429,9 +428,7 @@ public:
virtual int read_range_next();
private:
- int common_index_read(uchar * buf, const uchar * key,
- key_part_map keypart_map,
- enum ha_rkey_function find_flag);
+ int common_index_read(uchar * buf, bool have_start_key);
int common_first_last(uchar * buf);
int partition_scan_set_up(uchar * buf, bool idx_read_flag);
int handle_unordered_next(uchar * buf, bool next_same);
| Thread |
|---|
| • bzr commit into mysql-5.1 branch (mattias.jonsson:2688) Bug#30573Bug#33555 | Mattias Jonsson | 19 Sep |