From: Date: January 12 2008 12:04am Subject: bk commit into 6.0 tree (sergefp:1.2766) BUG#33257 List-Archive: http://lists.mysql.com/commits/40948 X-Bug: 33257 Message-Id: <20080111230408.6D3DD14C64A@foxhole.localdomain> Below is the list of changes that have just been committed into a local 6.0 repository of psergey. When psergey does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet@stripped, 2008-01-12 02:03:55+03:00, sergefp@stripped +4 -0 BUG#33257 "range access on partitioned falcon table actually scans entire indexes" - Make ha_partition to pass read_range_XXX() calls to partition handlers, without converting them to index_read/index_next-family calls. This is needed for Falcon (which is terribly inefficient when it doesn't get the other endpoint) and possibly other storage engines with similar characteristics. This patch also fixes BUG#30573 (bk trigger: mark as fix for BUG#33257) mysql-test/r/partition_range.result@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +20 -0 BUG#30573: Testcase mysql-test/t/partition_range.test@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +18 -18 BUG#30573: Testcase sql/ha_partition.cc@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +114 -65 BUG#33257 "range access on partitioned falcon table actually scans entire indexes" - Make ha_partition to pass read_range_XXX() calls to partition handlers, without converting them to index_read/index_next-family calls. sql/ha_partition.h@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +3 -6 BUG#33257 "range access on partitioned falcon table actually scans entire indexes" - Make ha_partition to pass read_range_XXX() calls to partition handlers, without converting them to index_read/index_next-family calls. diff -Nrup a/mysql-test/r/partition_range.result b/mysql-test/r/partition_range.result --- a/mysql-test/r/partition_range.result 2007-12-13 20:34:03 +03:00 +++ b/mysql-test/r/partition_range.result 2008-01-12 02:03:44 +03:00 @@ -709,3 +709,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004- id select_type table partitions type possible_keys key key_len ref rows Extra 1 SIMPLE t1 p407,p408,p409,p507,p508,p509 ALL NULL NULL NULL NULL 18 Using where DROP TABLE t1; +create table t1 (a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2 ( +defid int(10) unsigned NOT NULL, +day int(10) unsigned NOT NULL, +count int(10) unsigned NOT NULL, +filler char(200), +KEY (defid,day) +) +PARTITION BY RANGE (day) ( +PARTITION p7 VALUES LESS THAN (20070401) , +PARTITION p8 VALUES LESS THAN (20070501)); +insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B; +insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B; +insert into t2 values(52, 20070321, 123, 'filler') ; +insert into t2 values(52, 20070322, 456, 'filler') ; +select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid; +sum(count) +579 +drop table t1, t2; diff -Nrup a/mysql-test/t/partition_range.test b/mysql-test/t/partition_range.test --- a/mysql-test/t/partition_range.test 2007-12-13 20:34:03 +03:00 +++ b/mysql-test/t/partition_range.test 2008-01-12 02:03:44 +03:00 @@ -762,24 +762,24 @@ DROP TABLE t1; # # BUG#30573: get wrong result with "group by" on PARTITIONed table # -#create table t1 (a int); -#insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -#CREATE TABLE t2 ( -# defid int(10) unsigned NOT NULL, -# day int(10) unsigned NOT NULL, -# count int(10) unsigned NOT NULL, -# filler char(200), -# KEY (defid,day) -#) -#PARTITION BY RANGE (day) ( -# PARTITION p7 VALUES LESS THAN (20070401) , -# PARTITION p8 VALUES LESS THAN (20070501)); +create table t1 (a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2 ( + defid int(10) unsigned NOT NULL, + day int(10) unsigned NOT NULL, + count int(10) unsigned NOT NULL, + filler char(200), + KEY (defid,day) +) +PARTITION BY RANGE (day) ( + PARTITION p7 VALUES LESS THAN (20070401) , + PARTITION p8 VALUES LESS THAN (20070501)); -#insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B; -#insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B; -#insert into t2 values(52, 20070321, 123, 'filler') ; -#insert into t2 values(52, 20070322, 456, 'filler') ; +insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B; +insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B; +insert into t2 values(52, 20070321, 123, 'filler') ; +insert into t2 values(52, 20070322, 456, 'filler') ; -#select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid; -#drop table t1, t2; +select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid; +drop table t1, t2; diff -Nrup a/sql/ha_partition.cc b/sql/ha_partition.cc --- a/sql/ha_partition.cc 2007-12-13 20:40:36 +03:00 +++ b/sql/ha_partition.cc 2008-01-12 02:03:44 +03:00 @@ -3525,10 +3525,12 @@ int ha_partition::index_read_map(uchar * enum ha_rkey_function find_flag) { DBUG_ENTER("ha_partition::index_read_map"); - end_range= 0; m_index_scan_type= partition_index_read; - DBUG_RETURN(common_index_read(buf, key, keypart_map, find_flag)); + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= find_flag; + DBUG_RETURN(common_index_read(buf, TRUE)); } @@ -3536,43 +3538,62 @@ int ha_partition::index_read_map(uchar * Common routine for a number of index_read variants SYNOPSIS - common_index_read - - see index_read for rest + ha_partition::common_index_read() + buf Buffer where the record should be returned + have_start_key TRUE <=> the left endpoint is available, i.e. + we're in index_read call or in read_range_first + call and the range has left endpoint + + FALSE <=> there is no left endpoint (we're in + read_range_first() call and the range has no left + endpoint) + + DESCRIPTION + Start scanning the range (when invoked from read_range_first()) or doing + an index lookup (when invoked from index_read_XXX): + - If possible, perform partition selection + - Find the set of partitions we're going to use + - Depending on whether we need ordering: + NO: Get the first record from first used partition (see + handle_unordered_scan_next_partition) + YES: Fill the priority queue and get the record that is the first in + the ordering + + RETURN + 0 OK + other HA_ERR_END_OF_FILE or other error code. */ -int ha_partition::common_index_read(uchar *buf, const uchar *key, - key_part_map keypart_map, - enum ha_rkey_function find_flag) +int ha_partition::common_index_read(uchar *buf, bool have_start_key) { int error; + uint key_len; bool reverse_order= FALSE; - bool index_read_flag= !(key == NULL); - uint key_len= calculate_key_len(table, active_index, key, keypart_map); DBUG_ENTER("ha_partition::common_index_read"); + LINT_INIT(key_len); /* used iff have_start_key==TRUE */ - if (index_read_flag) + if (have_start_key) { - memcpy((void*)m_start_key.key, key, key_len); - m_start_key.keypart_map= keypart_map; - m_start_key.length= key_len; - m_start_key.flag= find_flag; + m_start_key.length= key_len= calculate_key_len(table, active_index, + m_start_key.key, + m_start_key.keypart_map); } - if ((error= partition_scan_set_up(buf, index_read_flag))) + if ((error= partition_scan_set_up(buf, have_start_key))) { DBUG_RETURN(error); } - if (find_flag == HA_READ_PREFIX_LAST || - find_flag == HA_READ_PREFIX_LAST_OR_PREV || - find_flag == HA_READ_BEFORE_KEY) + + if (have_start_key && + (m_start_key.flag == HA_READ_PREFIX_LAST || + m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV || + m_start_key.flag == HA_READ_BEFORE_KEY)) { reverse_order= TRUE; m_ordered_scan_ongoing= TRUE; } if (!m_ordered_scan_ongoing || - (find_flag == HA_READ_KEY_EXACT && - (key_len >= m_curr_key_info->key_length || - key_len == 0))) + (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT && + (key_len >= m_curr_key_info->key_length || key_len == 0))) { /* We use unordered index scan either when read_range is used and flag @@ -3664,7 +3685,7 @@ int ha_partition::index_last(uchar * buf Common routine for index_first/index_last SYNOPSIS - common_index_first_last + ha_partition::common_first_last() see index_first for rest */ @@ -3708,7 +3729,10 @@ int ha_partition::index_read_last_map(uc m_ordered= TRUE; // Safety measure end_range= 0; m_index_scan_type= partition_index_read_last; - DBUG_RETURN(common_index_read(buf, key, keypart_map, HA_READ_PREFIX_LAST)); + m_start_key.key= key; + m_start_key.keypart_map= keypart_map; + m_start_key.flag= HA_READ_PREFIX_LAST; + DBUG_RETURN(common_index_read(buf, TRUE)); } @@ -3827,9 +3851,9 @@ int ha_partition::read_range_first(const { int error; DBUG_ENTER("ha_partition::read_range_first"); - m_ordered= sorted; eq_range= eq_range_arg; + end_range= 0; if (end_key) { @@ -3839,27 +3863,20 @@ int ha_partition::read_range_first(const ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 : (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); } - range_key_part= m_curr_key_info->key_part; - if (!start_key) // Read first record - { - if (m_ordered) - m_index_scan_type= partition_index_first; - else - m_index_scan_type= partition_index_first_unordered; - error= common_first_last(m_rec0); - } + range_key_part= m_curr_key_info->key_part; + if (start_key) + m_start_key= *start_key; else - { - m_index_scan_type= partition_index_read; - error= common_index_read(m_rec0, - start_key->key, - start_key->keypart_map, start_key->flag); - } + m_start_key.key= NULL; + + m_index_scan_type= partition_read_range; + error= common_index_read(m_rec0, test(start_key)); DBUG_RETURN(error); } + /* Read next record in read of a range with start and end key @@ -3877,26 +3894,35 @@ int ha_partition::read_range_next() if (m_ordered) { - DBUG_RETURN(handler::read_range_next()); + DBUG_RETURN(handle_ordered_next(table->record[0], eq_range)); } - DBUG_RETURN(handle_unordered_next(m_rec0, eq_range)); + DBUG_RETURN(handle_unordered_next(table->record[0], eq_range)); } /* - Common routine to set up scans + Common routine to set up index scans scans SYNOPSIS - buf Buffer to later return record in - idx_read_flag Is it index scan + ha_partition::partition_scan_set_up() + buf Buffer to later return record in (this function + needs it to calculate partitioning function values) + + idx_read_flag TRUE <=> m_start_key has range start endpoint which + probably can be used to determine the set of partitions + to scan. + FALSE <=> there is no start endpoint. + + DESCRIPTION + Find out which partitions we'll need to read when scanning the specified + range. + + If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE + as we will not need to do merge ordering. RETURN VALUE >0 Error code 0 Success - - DESCRIPTION - This is where we check which partitions to actually scan if not all - of them */ int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag) @@ -3978,6 +4004,7 @@ int ha_partition::partition_scan_set_up( all fields in the partition function is bound. In this case the index scan is performed on only one partition and thus it isn't necessary to perform any sort. + */ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same) @@ -3987,10 +4014,19 @@ int ha_partition::handle_unordered_next( DBUG_ENTER("ha_partition::handle_unordered_next"); /* - We should consider if this should be split into two functions as - next_same is alwas a local constant + We should consider if this should be split into three functions as + partition_read_range is_next_same are always local constants */ - if (is_next_same) + + if (m_index_scan_type == partition_read_range) + { + if (!(error= file->read_range_next())) + { + m_last_part= m_part_spec.start_part; + DBUG_RETURN(0); + } + } + else if (is_next_same) { if (!(error= file->index_next_same(buf, m_start_key.key, m_start_key.length))) @@ -3999,15 +4035,13 @@ int ha_partition::handle_unordered_next( DBUG_RETURN(0); } } - else if (!(error= file->index_next(buf))) + else { - if (!(file->table_flags() & HA_READ_ORDER) || - compare_key(end_range) <= 0) + if (!(error= file->index_next(buf))) { m_last_part= m_part_spec.start_part; DBUG_RETURN(0); // Row was in range } - error= HA_ERR_END_OF_FILE; } if (error == HA_ERR_END_OF_FILE) @@ -4051,6 +4085,10 @@ int ha_partition::handle_unordered_scan_ file= m_file[i]; m_part_spec.start_part= i; switch (m_index_scan_type) { + case partition_read_range: + DBUG_PRINT("info", ("read_range_first on partition %d", i)); + error= file->read_range_first(&m_start_key, end_range, eq_range, FALSE); + break; case partition_index_read: DBUG_PRINT("info", ("index_read on partition %d", i)); error= file->index_read_map(buf, m_start_key.key, @@ -4079,13 +4117,8 @@ int ha_partition::handle_unordered_scan_ } if (!error) { - if (!(file->table_flags() & HA_READ_ORDER) || - compare_key(end_range) <= 0) - { - m_last_part= i; - DBUG_RETURN(0); - } - error= HA_ERR_END_OF_FILE; + m_last_part= i; + DBUG_RETURN(0); } if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND)) DBUG_RETURN(error); @@ -4164,6 +4197,17 @@ int ha_partition::handle_ordered_index_s m_start_key.keypart_map); reverse_order= TRUE; break; + case partition_read_range: + { + /* + This can only read record to table->record[0], as it was set when + the table was being opened. We have to memcpy data ourselves. + */ + error= file->read_range_first(&m_start_key, end_range, eq_range, TRUE); + memcpy(rec_buf_ptr, table->record[0], m_rec_length); + reverse_order= FALSE; + break; + } default: DBUG_ASSERT(FALSE); DBUG_RETURN(HA_ERR_END_OF_FILE); @@ -4244,8 +4288,13 @@ int ha_partition::handle_ordered_next(uc uint part_id= m_top_entry; handler *file= m_file[part_id]; DBUG_ENTER("ha_partition::handle_ordered_next"); - - if (!is_next_same) + + if (m_index_scan_type == partition_read_range) + { + error= file->read_range_next(); + memcpy(rec_buf(part_id), table->record[0], m_rec_length); + } + else if (!is_next_same) error= file->index_next(rec_buf(part_id)); else error= file->index_next_same(rec_buf(part_id), m_start_key.key, diff -Nrup a/sql/ha_partition.h b/sql/ha_partition.h --- a/sql/ha_partition.h 2007-10-30 16:09:49 +03:00 +++ b/sql/ha_partition.h 2008-01-12 02:03:44 +03:00 @@ -49,7 +49,8 @@ private: partition_index_first_unordered= 2, partition_index_last= 3, partition_index_read_last= 4, - partition_no_index_scan= 5 + partition_read_range = 5, + partition_no_index_scan= 6 }; /* Data for the partition handler */ int m_mode; // Open mode @@ -63,8 +64,6 @@ private: handler **m_reorged_file; // Reorganised partitions handler **m_added_file; // Added parts kept for errors partition_info *m_part_info; // local reference to partition - uchar *m_start_key_ref; // Reference of start key in current - // index scan info Field **m_part_field_array; // Part field array locally to save acc uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan KEY *m_curr_key_info; // Current index @@ -429,9 +428,7 @@ public: virtual int read_range_next(); private: - int common_index_read(uchar * buf, const uchar * key, - key_part_map keypart_map, - enum ha_rkey_function find_flag); + int common_index_read(uchar * buf, bool have_start_key); int common_first_last(uchar * buf); int partition_scan_set_up(uchar * buf, bool idx_read_flag); int handle_unordered_next(uchar * buf, bool next_same);