List:Commits« Previous MessageNext Message »
From:Mattias Jonsson Date:September 18 2008 7:49pm
Subject:bzr commit into mysql-5.1 branch (mattias.jonsson:2688) Bug#30573
Bug#33555
View as plain text  
#At file:///Users/mattiasj/clones/bzrroot/b30573-5129rc/

 2688 Mattias Jonsson	2008-09-18
      Bug#30573: Ordered range scan over partitioned tables returns some rows twice
      and
      Bug#33555: Group By Query does not correctly aggregate partitions
      
      Backport of bug-33257 which is the same bug.
      
      read_range_*() calls was not passed to the partition handlers,
      but was translated to index_read/next family calls.
      Resulting in duplicates rows and wrong aggregations.
modified:
  mysql-test/r/partition_range.result
  mysql-test/t/partition_range.test
  sql/ha_partition.cc
  sql/ha_partition.h

per-file messages:
  mysql-test/r/partition_range.result
    Bug#30573: Ordered range scan over partitioned tables returns some rows twice
    
    Updated result file
  mysql-test/t/partition_range.test
    Bug#30573: Ordered range scan over partitioned tables returns some rows twice
    
    Re-enabled the test
  sql/ha_partition.cc
    Bug#30573: Ordered range scan over partitioned tables returns some rows twice
    
    backport of bug-33257, correct handling of read_range_* calls,
    without converting them to index_read/next calls
  sql/ha_partition.h
    Bug#30573: Ordered range scan over partitioned tables returns some rows twice
    
    backport of bug-33257, correct handling of read_range_* calls,
    without converting them to index_read/next calls
=== modified file 'mysql-test/r/partition_range.result'
--- a/mysql-test/r/partition_range.result	2008-07-07 20:42:19 +0000
+++ b/mysql-test/r/partition_range.result	2008-09-18 19:49:34 +0000
@@ -742,3 +742,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004-
 id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	p407,p408,p409,p507,p508,p509	ALL	NULL	NULL	NULL	NULL	18	Using where
 DROP TABLE t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+defid int(10) unsigned NOT NULL,
+day int(10) unsigned NOT NULL,
+count int(10) unsigned NOT NULL,
+filler char(200),
+KEY (defid,day)
+) 
+PARTITION BY RANGE (day) (
+PARTITION p7 VALUES LESS THAN (20070401) , 
+PARTITION p8 VALUES LESS THAN (20070501));
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
+sum(count)
+579
+drop table t1, t2;

=== modified file 'mysql-test/t/partition_range.test'
--- a/mysql-test/t/partition_range.test	2008-02-13 10:29:50 +0000
+++ b/mysql-test/t/partition_range.test	2008-09-18 19:49:34 +0000
@@ -807,24 +807,24 @@ DROP TABLE t1;
 #
 # BUG#30573: get wrong result with "group by" on PARTITIONed table
 #
-#create table t1 (a int);
-#insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-#CREATE TABLE t2 (
-#  defid int(10) unsigned NOT NULL,
-#  day int(10) unsigned NOT NULL,
-#  count int(10) unsigned NOT NULL,
-#  filler char(200),
-#  KEY (defid,day)
-#) 
-#PARTITION BY RANGE (day) (
-#  PARTITION p7 VALUES LESS THAN (20070401) , 
-#  PARTITION p8 VALUES LESS THAN (20070501));
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+  defid int(10) unsigned NOT NULL,
+  day int(10) unsigned NOT NULL,
+  count int(10) unsigned NOT NULL,
+  filler char(200),
+  KEY (defid,day)
+) 
+PARTITION BY RANGE (day) (
+  PARTITION p7 VALUES LESS THAN (20070401) , 
+  PARTITION p8 VALUES LESS THAN (20070501));
 
-#insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
-#insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
-#insert into t2 values(52, 20070321, 123, 'filler') ;
-#insert into t2 values(52, 20070322, 456, 'filler') ;
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
 
-#select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
-#drop table t1, t2;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
+drop table t1, t2;
 

=== modified file 'sql/ha_partition.cc'
--- a/sql/ha_partition.cc	2008-08-20 15:29:14 +0000
+++ b/sql/ha_partition.cc	2008-09-18 19:49:34 +0000
@@ -3679,10 +3679,12 @@ int ha_partition::index_read_map(uchar *
                                  enum ha_rkey_function find_flag)
 {
   DBUG_ENTER("ha_partition::index_read_map");
-
   end_range= 0;
   m_index_scan_type= partition_index_read;
-  DBUG_RETURN(common_index_read(buf, key, keypart_map, find_flag));
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= find_flag;
+  DBUG_RETURN(common_index_read(buf, TRUE));
 }
 
 
@@ -3690,41 +3692,63 @@ int ha_partition::index_read_map(uchar *
   Common routine for a number of index_read variants
 
   SYNOPSIS
-    common_index_read
-  
-  see index_read for rest
+    ha_partition::common_index_read()
+      buf             Buffer where the record should be returned
+      have_start_key  TRUE <=> the left endpoint is available, i.e. 
+                      we're in index_read call or in read_range_first
+                      call and the range has left endpoint
+
+                      FALSE <=> there is no left endpoint (we're in
+                      read_range_first() call and the range has no left
+                      endpoint)
+ 
+  DESCRIPTION
+    Start scanning the range (when invoked from read_range_first()) or doing 
+    an index lookup (when invoked from index_read_XXX):
+     - If possible, perform partition selection
+     - Find the set of partitions we're going to use
+     - Depending on whether we need ordering:
+        NO:  Get the first record from first used partition (see 
+             handle_unordered_scan_next_partition)
+        YES: Fill the priority queue and get the record that is the first in
+             the ordering
+
+  RETURN
+    0      OK 
+    other  HA_ERR_END_OF_FILE or other error code.
 */
 
-int ha_partition::common_index_read(uchar *buf, const uchar *key,
-                                    key_part_map keypart_map,
-				    enum ha_rkey_function find_flag)
+int ha_partition::common_index_read(uchar *buf, bool have_start_key)
 {
   int error;
+  uint key_len;
   bool reverse_order= FALSE;
-  uint key_len= calculate_key_len(table, active_index, key, keypart_map);
   DBUG_ENTER("ha_partition::common_index_read");
+  LINT_INIT(key_len); /* used if have_start_key==TRUE */
 
-  memcpy((void*)m_start_key.key, key, key_len);
-  m_start_key.keypart_map= keypart_map;
-  m_start_key.length= key_len;
-  m_start_key.flag= find_flag;
-
-  if ((error= partition_scan_set_up(buf, TRUE)))
+  if (have_start_key)
+  {
+    m_start_key.length= key_len= calculate_key_len(table, active_index, 
+                                                   m_start_key.key,
+                                                   m_start_key.keypart_map);
+  }
+  if ((error= partition_scan_set_up(buf, have_start_key)))
   {
     DBUG_RETURN(error);
   }
-  if (find_flag == HA_READ_PREFIX_LAST ||
-      find_flag == HA_READ_PREFIX_LAST_OR_PREV ||
-      find_flag == HA_READ_BEFORE_KEY)
+
+  if (have_start_key && 
+      (m_start_key.flag == HA_READ_PREFIX_LAST ||
+       m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
+       m_start_key.flag == HA_READ_BEFORE_KEY))
   {
     reverse_order= TRUE;
     m_ordered_scan_ongoing= TRUE;
   }
   if (!m_ordered_scan_ongoing ||
-      (find_flag == HA_READ_KEY_EXACT &&
-       (key_len >= m_curr_key_info->key_length ||
-	key_len == 0)))
-  {
+      (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
+       (key_len >= m_curr_key_info->key_length || key_len == 0)))
+   {
     /*
       We use unordered index scan either when read_range is used and flag
       is set to not use ordered or when an exact key is used and in this
@@ -3815,7 +3839,7 @@ int ha_partition::index_last(uchar * buf
   Common routine for index_first/index_last
 
   SYNOPSIS
-    common_index_first_last
+    ha_partition::common_first_last()
   
   see index_first for rest
 */
@@ -3859,7 +3883,10 @@ int ha_partition::index_read_last_map(uc
   m_ordered= TRUE;				// Safety measure
   end_range= 0;
   m_index_scan_type= partition_index_read_last;
-  DBUG_RETURN(common_index_read(buf, key, keypart_map, HA_READ_PREFIX_LAST));
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= HA_READ_PREFIX_LAST;
+  DBUG_RETURN(common_index_read(buf, TRUE));
 }
 
 
@@ -3990,23 +4017,15 @@ int ha_partition::read_range_first(const
       ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
        (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
   }
-  range_key_part= m_curr_key_info->key_part;
 
-  if (!start_key)				// Read first record
-  {
-    if (m_ordered)
-      m_index_scan_type= partition_index_first;
-    else
-      m_index_scan_type= partition_index_first_unordered;
-    error= common_first_last(m_rec0);
-  }
+  range_key_part= m_curr_key_info->key_part;
+  if (start_key)
+    m_start_key= *start_key;
   else
-  {
-    m_index_scan_type= partition_index_read;
-    error= common_index_read(m_rec0,
-			     start_key->key,
-                             start_key->keypart_map, start_key->flag);
-  }
+    m_start_key.key= NULL;
+
+  m_index_scan_type= partition_read_range;
+  error= common_index_read(m_rec0, test(start_key));
   DBUG_RETURN(error);
 }
 
@@ -4028,26 +4047,36 @@ int ha_partition::read_range_next()
 
   if (m_ordered)
   {
-    DBUG_RETURN(handler::read_range_next());
+    DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
   }
-  DBUG_RETURN(handle_unordered_next(m_rec0, eq_range));
+  DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
 }
 
 
 /*
-  Common routine to set up scans
+  Common routine to set up index scans
 
   SYNOPSIS
-    buf                  Buffer to later return record in
-    idx_read_flag        Is it index scan
+    ha_partition::partition_scan_set_up()
+      buf            Buffer to later return record in (this function
+                     needs it to calculcate partitioning function
+                     values)
+
+      idx_read_flag  TRUE <=> m_start_key has range start endpoint which 
+                     probably can be used to determine the set of partitions
+                     to scan.
+                     FALSE <=> there is no start endpoint.
+
+  DESCRIPTION
+    Find out which partitions we'll need to read when scanning the specified
+    range.
+
+    If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
+    as we will not need to do merge ordering.
 
   RETURN VALUE
     >0                    Error code
     0                     Success
-
-  DESCRIPTION
-    This is where we check which partitions to actually scan if not all
-    of them
 */
 
 int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
@@ -4138,10 +4167,19 @@ int ha_partition::handle_unordered_next(
   DBUG_ENTER("ha_partition::handle_unordered_next");
 
   /*
-    We should consider if this should be split into two functions as
-    next_same is alwas a local constant
+    We should consider if this should be split into three functions as
+    partition_read_range is_next_same are always local constants
   */
-  if (is_next_same)
+
+  if (m_index_scan_type == partition_read_range)
+  {
+    if (!(error= file->read_range_next()))
+    {
+      m_last_part= m_part_spec.start_part;
+      DBUG_RETURN(0);
+    }
+  }
+  else if (is_next_same)
   {
     if (!(error= file->index_next_same(buf, m_start_key.key,
                                        m_start_key.length)))
@@ -4150,15 +4188,13 @@ int ha_partition::handle_unordered_next(
       DBUG_RETURN(0);
     }
   }
-  else if (!(error= file->index_next(buf)))
+  else 
   {
-    if (!(file->index_flags(active_index, 0, 1) & HA_READ_ORDER) ||
-        compare_key(end_range) <= 0)
+    if (!(error= file->index_next(buf)))
     {
       m_last_part= m_part_spec.start_part;
       DBUG_RETURN(0);                           // Row was in range
     }
-    error= HA_ERR_END_OF_FILE;
   }
 
   if (error == HA_ERR_END_OF_FILE)
@@ -4202,6 +4238,11 @@ int ha_partition::handle_unordered_scan_
     file= m_file[i];
     m_part_spec.start_part= i;
     switch (m_index_scan_type) {
+    case partition_read_range:
+      DBUG_PRINT("info", ("read_range_first on partition %d", i));
+      error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
+                                    end_range, eq_range, FALSE);
+      break;
     case partition_index_read:
       DBUG_PRINT("info", ("index_read on partition %d", i));
       error= file->index_read_map(buf, m_start_key.key,
@@ -4230,13 +4271,8 @@ int ha_partition::handle_unordered_scan_
     }
     if (!error)
     {
-      if (!(file->index_flags(active_index, 0, 1) & HA_READ_ORDER) ||
-          compare_key(end_range) <= 0)
-      {
-        m_last_part= i;
-        DBUG_RETURN(0);
-      }
-      error= HA_ERR_END_OF_FILE;
+      m_last_part= i;
+      DBUG_RETURN(0);
     }
     if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
       DBUG_RETURN(error);
@@ -4315,6 +4351,17 @@ int ha_partition::handle_ordered_index_s
                                        m_start_key.keypart_map);
       reverse_order= TRUE;
       break;
+    case partition_read_range:
+    {
+      /* 
+        This can only read record to table->record[0], as it was set when
+        the table was being opened. We have to memcpy data ourselves.
+      */
+      error= file->read_range_first(&m_start_key, end_range, eq_range, TRUE);
+      memcpy(rec_buf_ptr, table->record[0], m_rec_length);
+      reverse_order= FALSE;
+      break;
+    }
     default:
       DBUG_ASSERT(FALSE);
       DBUG_RETURN(HA_ERR_END_OF_FILE);
@@ -4395,8 +4442,13 @@ int ha_partition::handle_ordered_next(uc
   uint part_id= m_top_entry;
   handler *file= m_file[part_id];
   DBUG_ENTER("ha_partition::handle_ordered_next");
-
-  if (!is_next_same)
+  
+  if (m_index_scan_type == partition_read_range)
+  {
+    error= file->read_range_next();
+    memcpy(rec_buf(part_id), table->record[0], m_rec_length);
+  }
+  else if (!is_next_same)
     error= file->index_next(rec_buf(part_id));
   else
     error= file->index_next_same(rec_buf(part_id), m_start_key.key,

=== modified file 'sql/ha_partition.h'
--- a/sql/ha_partition.h	2008-08-11 18:06:08 +0000
+++ b/sql/ha_partition.h	2008-09-18 19:49:34 +0000
@@ -49,7 +49,8 @@ private:
     partition_index_first_unordered= 2,
     partition_index_last= 3,
     partition_index_read_last= 4,
-    partition_no_index_scan= 5
+    partition_read_range = 5,
+    partition_no_index_scan= 6
   };
   /* Data for the partition handler */
   int  m_mode;                          // Open mode
@@ -63,8 +64,6 @@ private:
   handler **m_reorged_file;             // Reorganised partitions
   handler **m_added_file;               // Added parts kept for errors
   partition_info *m_part_info;          // local reference to partition
-  uchar *m_start_key_ref;                // Reference of start key in current
-                                        // index scan info
   Field **m_part_field_array;           // Part field array locally to save acc
   uchar *m_ordered_rec_buffer;           // Row and key buffer for ord. idx scan
   KEY *m_curr_key_info;                 // Current index
@@ -429,9 +428,7 @@ public:
   virtual int read_range_next();
 
 private:
-  int common_index_read(uchar * buf, const uchar * key,
-                        key_part_map keypart_map,
-                        enum ha_rkey_function find_flag);
+  int common_index_read(uchar * buf, bool have_start_key);
   int common_first_last(uchar * buf);
   int partition_scan_set_up(uchar * buf, bool idx_read_flag);
   int handle_unordered_next(uchar * buf, bool next_same);

Thread
bzr commit into mysql-5.1 branch (mattias.jonsson:2688) Bug#30573Bug#33555Mattias Jonsson19 Sep