List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:January 11 2008 11:04pm
Subject:bk commit into 6.0 tree (sergefp:1.2766) BUG#33257
View as plain text  
Below is the list of changes that have just been committed into a local
6.0 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2008-01-12 02:03:55+03:00, sergefp@stripped +4 -0
  BUG#33257 "range access on partitioned falcon table actually scans entire indexes"
  - Make ha_partition to pass read_range_XXX() calls to partition handlers, without converting 
    them to index_read/index_next-family calls. 
  
    This is needed for Falcon (which is terribly inefficient when it doesn't get the other 
     endpoint) and possibly other storage engines with similar characteristics.
  
  This patch also fixes BUG#30573 (bk trigger: mark as fix for BUG#33257)

  mysql-test/r/partition_range.result@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +20 -0
    BUG#30573: Testcase

  mysql-test/t/partition_range.test@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +18 -18
    BUG#30573: Testcase

  sql/ha_partition.cc@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +114 -65
    BUG#33257 "range access on partitioned falcon table actually scans entire indexes"
    - Make ha_partition to pass read_range_XXX() calls to partition handlers, without converting 
      them to index_read/index_next-family calls.

  sql/ha_partition.h@stripped, 2008-01-12 02:03:44+03:00, sergefp@stripped +3 -6
    BUG#33257 "range access on partitioned falcon table actually scans entire indexes"
    - Make ha_partition to pass read_range_XXX() calls to partition handlers, without converting 
      them to index_read/index_next-family calls.

diff -Nrup a/mysql-test/r/partition_range.result b/mysql-test/r/partition_range.result
--- a/mysql-test/r/partition_range.result	2007-12-13 20:34:03 +03:00
+++ b/mysql-test/r/partition_range.result	2008-01-12 02:03:44 +03:00
@@ -709,3 +709,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004-
 id	select_type	table	partitions	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	p407,p408,p409,p507,p508,p509	ALL	NULL	NULL	NULL	NULL	18	Using where
 DROP TABLE t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+defid int(10) unsigned NOT NULL,
+day int(10) unsigned NOT NULL,
+count int(10) unsigned NOT NULL,
+filler char(200),
+KEY (defid,day)
+) 
+PARTITION BY RANGE (day) (
+PARTITION p7 VALUES LESS THAN (20070401) , 
+PARTITION p8 VALUES LESS THAN (20070501));
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
+sum(count)
+579
+drop table t1, t2;
diff -Nrup a/mysql-test/t/partition_range.test b/mysql-test/t/partition_range.test
--- a/mysql-test/t/partition_range.test	2007-12-13 20:34:03 +03:00
+++ b/mysql-test/t/partition_range.test	2008-01-12 02:03:44 +03:00
@@ -762,24 +762,24 @@ DROP TABLE t1;
 #
 # BUG#30573: get wrong result with "group by" on PARTITIONed table
 #
-#create table t1 (a int);
-#insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-#CREATE TABLE t2 (
-#  defid int(10) unsigned NOT NULL,
-#  day int(10) unsigned NOT NULL,
-#  count int(10) unsigned NOT NULL,
-#  filler char(200),
-#  KEY (defid,day)
-#) 
-#PARTITION BY RANGE (day) (
-#  PARTITION p7 VALUES LESS THAN (20070401) , 
-#  PARTITION p8 VALUES LESS THAN (20070501));
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+CREATE TABLE t2 (
+  defid int(10) unsigned NOT NULL,
+  day int(10) unsigned NOT NULL,
+  count int(10) unsigned NOT NULL,
+  filler char(200),
+  KEY (defid,day)
+) 
+PARTITION BY RANGE (day) (
+  PARTITION p7 VALUES LESS THAN (20070401) , 
+  PARTITION p8 VALUES LESS THAN (20070501));
 
-#insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
-#insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
-#insert into t2 values(52, 20070321, 123, 'filler') ;
-#insert into t2 values(52, 20070322, 456, 'filler') ;
+insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
+insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
+insert into t2 values(52, 20070321, 123, 'filler') ;
+insert into t2 values(52, 20070322, 456, 'filler') ;
 
-#select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
-#drop table t1, t2;
+select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
+drop table t1, t2;
 
diff -Nrup a/sql/ha_partition.cc b/sql/ha_partition.cc
--- a/sql/ha_partition.cc	2007-12-13 20:40:36 +03:00
+++ b/sql/ha_partition.cc	2008-01-12 02:03:44 +03:00
@@ -3525,10 +3525,12 @@ int ha_partition::index_read_map(uchar *
                                  enum ha_rkey_function find_flag)
 {
   DBUG_ENTER("ha_partition::index_read_map");
-
   end_range= 0;
   m_index_scan_type= partition_index_read;
-  DBUG_RETURN(common_index_read(buf, key, keypart_map, find_flag));
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= find_flag;
+  DBUG_RETURN(common_index_read(buf, TRUE));
 }
 
 
@@ -3536,43 +3538,62 @@ int ha_partition::index_read_map(uchar *
   Common routine for a number of index_read variants
 
   SYNOPSIS
-    common_index_read
-  
-  see index_read for rest
+    ha_partition::common_index_read()
+      buf             Buffer where the record should be returned
+      have_start_key  TRUE <=> the left endpoint is available, i.e. 
+                      we're in index_read call or in read_range_first
+                      call and the range has left endpoint
+
+                      FALSE <=> there is no left endpoint (we're in
+                      read_range_first() call and the range has no left
+                      endpoint)
+ 
+  DESCRIPTION
+    Start scanning the range (when invoked from read_range_first()) or doing 
+    an index lookup (when invoked from index_read_XXX):
+     - If possible, perform partition selection
+     - Find the set of partitions we're going to use
+     - Depending on whether we need ordering:
+        NO:  Get the first record from first used partition (see 
+             handle_unordered_scan_next_partition)
+        YES: Fill the priority queue and get the record that is the first in
+             the ordering
+
+  RETURN
+    0      OK 
+    other  HA_ERR_END_OF_FILE or other error code.
 */
 
-int ha_partition::common_index_read(uchar *buf, const uchar *key,
-                                    key_part_map keypart_map,
-				    enum ha_rkey_function find_flag)
+int ha_partition::common_index_read(uchar *buf, bool have_start_key)
 {
   int error;
+  uint key_len;
   bool reverse_order= FALSE;
-  bool index_read_flag= !(key == NULL);
-  uint key_len= calculate_key_len(table, active_index, key, keypart_map);
   DBUG_ENTER("ha_partition::common_index_read");
+  LINT_INIT(key_len); /* used iff have_start_key==TRUE */
 
-  if (index_read_flag)
+  if (have_start_key)
   {
-    memcpy((void*)m_start_key.key, key, key_len);
-    m_start_key.keypart_map= keypart_map;
-    m_start_key.length= key_len;
-    m_start_key.flag= find_flag;
+    m_start_key.length= key_len= calculate_key_len(table, active_index, 
+                                                   m_start_key.key,
+                                                   m_start_key.keypart_map);
   }
-  if ((error= partition_scan_set_up(buf, index_read_flag)))
+  if ((error= partition_scan_set_up(buf, have_start_key)))
   {
     DBUG_RETURN(error);
   }
-  if (find_flag == HA_READ_PREFIX_LAST ||
-      find_flag == HA_READ_PREFIX_LAST_OR_PREV ||
-      find_flag == HA_READ_BEFORE_KEY)
+
+  if (have_start_key && 
+      (m_start_key.flag == HA_READ_PREFIX_LAST ||
+       m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
+       m_start_key.flag == HA_READ_BEFORE_KEY))
   {
     reverse_order= TRUE;
     m_ordered_scan_ongoing= TRUE;
   }
   if (!m_ordered_scan_ongoing ||
-      (find_flag == HA_READ_KEY_EXACT &&
-       (key_len >= m_curr_key_info->key_length ||
-	key_len == 0)))
+      (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
+       (key_len >= m_curr_key_info->key_length || key_len == 0)))
   {
     /*
       We use unordered index scan either when read_range is used and flag
@@ -3664,7 +3685,7 @@ int ha_partition::index_last(uchar * buf
   Common routine for index_first/index_last
 
   SYNOPSIS
-    common_index_first_last
+    ha_partition::common_first_last()
   
   see index_first for rest
 */
@@ -3708,7 +3729,10 @@ int ha_partition::index_read_last_map(uc
   m_ordered= TRUE;				// Safety measure
   end_range= 0;
   m_index_scan_type= partition_index_read_last;
-  DBUG_RETURN(common_index_read(buf, key, keypart_map, HA_READ_PREFIX_LAST));
+  m_start_key.key= key;
+  m_start_key.keypart_map= keypart_map;
+  m_start_key.flag= HA_READ_PREFIX_LAST;
+  DBUG_RETURN(common_index_read(buf, TRUE));
 }
 
 
@@ -3827,9 +3851,9 @@ int ha_partition::read_range_first(const
 {
   int error;
   DBUG_ENTER("ha_partition::read_range_first");
-
   m_ordered= sorted;
   eq_range= eq_range_arg;
+
   end_range= 0;
   if (end_key)
   {
@@ -3839,27 +3863,20 @@ int ha_partition::read_range_first(const
       ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
        (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
   }
-  range_key_part= m_curr_key_info->key_part;
 
-  if (!start_key)				// Read first record
-  {
-    if (m_ordered)
-      m_index_scan_type= partition_index_first;
-    else
-      m_index_scan_type= partition_index_first_unordered;
-    error= common_first_last(m_rec0);
-  }
+  range_key_part= m_curr_key_info->key_part;
+  if (start_key)
+    m_start_key= *start_key;
   else
-  {
-    m_index_scan_type= partition_index_read;
-    error= common_index_read(m_rec0,
-			     start_key->key,
-                             start_key->keypart_map, start_key->flag);
-  }
+    m_start_key.key= NULL;
+
+  m_index_scan_type= partition_read_range;
+  error= common_index_read(m_rec0, test(start_key));
   DBUG_RETURN(error);
 }
 
 
+
 /*
   Read next record in read of a range with start and end key
 
@@ -3877,26 +3894,35 @@ int ha_partition::read_range_next()
 
   if (m_ordered)
   {
-    DBUG_RETURN(handler::read_range_next());
+    DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
   }
-  DBUG_RETURN(handle_unordered_next(m_rec0, eq_range));
+  DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
 }
 
 
 /*
-  Common routine to set up scans
+  Common routine to set up index scans scans
 
   SYNOPSIS
-    buf                  Buffer to later return record in
-    idx_read_flag        Is it index scan
+    ha_partition::partition_scan_set_up()
+      buf            Buffer to later return record in (this function
+                     needs it to calculate partitioning function values)
+
+      idx_read_flag  TRUE <=> m_start_key has range start endpoint which 
+                     probably can be used to determine the set of partitions
+                     to scan.
+                     FALSE <=> there is no start endpoint.
+
+  DESCRIPTION
+    Find out which partitions we'll need to read when scanning the specified
+    range.
+
+    If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
+    as we will not need to do merge ordering.
 
   RETURN VALUE
     >0                    Error code
     0                     Success
-
-  DESCRIPTION
-    This is where we check which partitions to actually scan if not all
-    of them
 */
 
 int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
@@ -3978,6 +4004,7 @@ int ha_partition::partition_scan_set_up(
     all fields in the partition function is bound. In this case the index
     scan is performed on only one partition and thus it isn't necessary to
     perform any sort.
+
 */
 
 int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
@@ -3987,10 +4014,19 @@ int ha_partition::handle_unordered_next(
   DBUG_ENTER("ha_partition::handle_unordered_next");
 
   /*
-    We should consider if this should be split into two functions as
-    next_same is alwas a local constant
+    We should consider if this should be split into three functions as
+    partition_read_range is_next_same are always local constants
   */
-  if (is_next_same)
+
+  if (m_index_scan_type == partition_read_range)
+  {
+    if (!(error= file->read_range_next()))
+    {
+      m_last_part= m_part_spec.start_part;
+      DBUG_RETURN(0);
+    }
+  }
+  else if (is_next_same)
   {
     if (!(error= file->index_next_same(buf, m_start_key.key,
                                        m_start_key.length)))
@@ -3999,15 +4035,13 @@ int ha_partition::handle_unordered_next(
       DBUG_RETURN(0);
     }
   }
-  else if (!(error= file->index_next(buf)))
+  else 
   {
-    if (!(file->table_flags() & HA_READ_ORDER) ||
-        compare_key(end_range) <= 0)
+    if (!(error= file->index_next(buf)))
     {
       m_last_part= m_part_spec.start_part;
       DBUG_RETURN(0);                           // Row was in range
     }
-    error= HA_ERR_END_OF_FILE;
   }
 
   if (error == HA_ERR_END_OF_FILE)
@@ -4051,6 +4085,10 @@ int ha_partition::handle_unordered_scan_
     file= m_file[i];
     m_part_spec.start_part= i;
     switch (m_index_scan_type) {
+    case partition_read_range:
+      DBUG_PRINT("info", ("read_range_first on partition %d", i));
+      error= file->read_range_first(&m_start_key, end_range, eq_range, FALSE);
+      break;
     case partition_index_read:
       DBUG_PRINT("info", ("index_read on partition %d", i));
       error= file->index_read_map(buf, m_start_key.key,
@@ -4079,13 +4117,8 @@ int ha_partition::handle_unordered_scan_
     }
     if (!error)
     {
-      if (!(file->table_flags() & HA_READ_ORDER) ||
-          compare_key(end_range) <= 0)
-      {
-        m_last_part= i;
-        DBUG_RETURN(0);
-      }
-      error= HA_ERR_END_OF_FILE;
+      m_last_part= i;
+      DBUG_RETURN(0);
     }
     if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
       DBUG_RETURN(error);
@@ -4164,6 +4197,17 @@ int ha_partition::handle_ordered_index_s
                                        m_start_key.keypart_map);
       reverse_order= TRUE;
       break;
+    case partition_read_range:
+    {
+      /* 
+        This can only read record to table->record[0], as it was set when
+        the table was being opened. We have to memcpy data ourselves.
+      */
+      error= file->read_range_first(&m_start_key, end_range, eq_range, TRUE);
+      memcpy(rec_buf_ptr, table->record[0], m_rec_length);
+      reverse_order= FALSE;
+      break;
+    }
     default:
       DBUG_ASSERT(FALSE);
       DBUG_RETURN(HA_ERR_END_OF_FILE);
@@ -4244,8 +4288,13 @@ int ha_partition::handle_ordered_next(uc
   uint part_id= m_top_entry;
   handler *file= m_file[part_id];
   DBUG_ENTER("ha_partition::handle_ordered_next");
-
-  if (!is_next_same)
+  
+  if (m_index_scan_type == partition_read_range)
+  {
+    error= file->read_range_next();
+    memcpy(rec_buf(part_id), table->record[0], m_rec_length);
+  }
+  else if (!is_next_same)
     error= file->index_next(rec_buf(part_id));
   else
     error= file->index_next_same(rec_buf(part_id), m_start_key.key,
diff -Nrup a/sql/ha_partition.h b/sql/ha_partition.h
--- a/sql/ha_partition.h	2007-10-30 16:09:49 +03:00
+++ b/sql/ha_partition.h	2008-01-12 02:03:44 +03:00
@@ -49,7 +49,8 @@ private:
     partition_index_first_unordered= 2,
     partition_index_last= 3,
     partition_index_read_last= 4,
-    partition_no_index_scan= 5
+    partition_read_range = 5,
+    partition_no_index_scan= 6
   };
   /* Data for the partition handler */
   int  m_mode;                          // Open mode
@@ -63,8 +64,6 @@ private:
   handler **m_reorged_file;             // Reorganised partitions
   handler **m_added_file;               // Added parts kept for errors
   partition_info *m_part_info;          // local reference to partition
-  uchar *m_start_key_ref;                // Reference of start key in current
-                                        // index scan info
   Field **m_part_field_array;           // Part field array locally to save acc
   uchar *m_ordered_rec_buffer;           // Row and key buffer for ord. idx scan
   KEY *m_curr_key_info;                 // Current index
@@ -429,9 +428,7 @@ public:
   virtual int read_range_next();
 
 private:
-  int common_index_read(uchar * buf, const uchar * key,
-                        key_part_map keypart_map,
-                        enum ha_rkey_function find_flag);
+  int common_index_read(uchar * buf, bool have_start_key);
   int common_first_last(uchar * buf);
   int partition_scan_set_up(uchar * buf, bool idx_read_flag);
   int handle_unordered_next(uchar * buf, bool next_same);
Thread
bk commit into 6.0 tree (sergefp:1.2766) BUG#33257Sergey Petrunia12 Jan