List:Commits« Previous MessageNext Message »
From:Martin Hansson Date:May 7 2010 3:07pm
Subject:bzr commit into mysql-5.1-bugteam branch (martin.hansson:3373) Bug#50939
View as plain text  
#At file:///data0/martin/bzr/bug50939/5.1bt/ based on revid:martin.hansson@stripped

 3373 Martin Hansson	2010-05-07
      Bug#50939: Loose Index Scan unduly relies on engine to
      remember range endpoints
      
      The Loose Index Scan optimization keeps track of a sequence
      of intervals. For the current interval it maintains the
      current interval's endpoints. But the maximum endpoint was
      not stored in the SQL layer; rather, it relied on the
      storage engine to retain this value in-between reads. By
      coincidence this holds for MyISAM and InnoDB. Not for the
      partitioning engine, however.
      
      Fixed by making the key values iterator 
      (QUICK_RANGE_SELECT) keep track of the current maximum endpoint.
      This is also more efficient as we save a call through the
      handler API in case of open-ended intervals.
      
      The code to calculate endpoints was extracted into 
      separate methods in QUICK_RANGE_SELECT, and it was possible to
      get rid of some code duplication as part of fix.

    modified:
      mysql-test/r/range.result
      mysql-test/t/range.test
      sql/opt_range.cc
      sql/opt_range.h
=== modified file 'mysql-test/r/range.result'
--- a/mysql-test/r/range.result	2009-12-08 09:26:11 +0000
+++ b/mysql-test/r/range.result	2010-05-07 15:07:55 +0000
@@ -1653,4 +1653,48 @@ a	b
 0	0
 1	1
 DROP TABLE t1;
+#
+# Bug#50939: Loose Index Scan unduly relies on engine to remember range 
+# endpoints
+#
+CREATE TABLE t1 (
+a INT,
+b INT,
+KEY ( a, b )
+) PARTITION BY HASH (a) PARTITIONS 1;
+CREATE TABLE t2 (
+a INT,
+b INT,
+KEY ( a, b )
+);
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+INSERT INTO t1 SELECT a +  5, b +  5 FROM t1;
+INSERT INTO t1 SELECT a + 10, b + 10 FROM t1;
+INSERT INTO t1 SELECT a + 20, b + 20 FROM t1;
+INSERT INTO t1 SELECT a + 40, b + 40 FROM t1;
+INSERT INTO t2 SELECT * FROM t1;
+# plans should be identical
+EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10,100) GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	5	NULL	1	Using where; Using index for group-by
+EXPLAIN SELECT a, MAX(b) FROM t2 WHERE a IN (10,100) GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	a	a	5	NULL	2	Using where; Using index for group-by
+FLUSH status;
+SELECT a, MAX(b) FROM t1 WHERE a IN (10, 100) GROUP BY a;
+a	MAX(b)
+10	10
+# Should be no more than 4 reads.
+SHOW status LIKE 'handler_read_key';
+Variable_name	Value
+Handler_read_key	4
+FLUSH status;
+SELECT a, MAX(b) FROM t2 WHERE a IN (10, 100) GROUP BY a;
+a	MAX(b)
+10	10
+# Should be no more than 4 reads.
+SHOW status LIKE 'handler_read_key';
+Variable_name	Value
+Handler_read_key	4
+DROP TABLE t1, t2;
 End of 5.1 tests

=== modified file 'mysql-test/t/range.test'
--- a/mysql-test/t/range.test	2009-12-08 09:26:11 +0000
+++ b/mysql-test/t/range.test	2010-05-07 15:07:55 +0000
@@ -1313,4 +1313,45 @@ SELECT * FROM t1 FORCE INDEX (PRIMARY) 
 
 DROP TABLE t1;
 
+--echo #
+--echo # Bug#50939: Loose Index Scan unduly relies on engine to remember range 
+--echo # endpoints
+--echo #
+CREATE TABLE t1 (
+ a INT,
+ b INT,
+ KEY ( a, b )
+) PARTITION BY HASH (a) PARTITIONS 1;
+
+CREATE TABLE t2 (
+ a INT,
+ b INT,
+ KEY ( a, b )
+);
+
+INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+
+INSERT INTO t1 SELECT a +  5, b +  5 FROM t1;
+INSERT INTO t1 SELECT a + 10, b + 10 FROM t1;
+INSERT INTO t1 SELECT a + 20, b + 20 FROM t1;
+INSERT INTO t1 SELECT a + 40, b + 40 FROM t1;
+
+INSERT INTO t2 SELECT * FROM t1;
+
+--echo # plans should be identical
+EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10,100) GROUP BY a;
+EXPLAIN SELECT a, MAX(b) FROM t2 WHERE a IN (10,100) GROUP BY a;
+
+FLUSH status;
+SELECT a, MAX(b) FROM t1 WHERE a IN (10, 100) GROUP BY a;
+--echo # Should be no more than 4 reads.
+SHOW status LIKE 'handler_read_key';
+
+FLUSH status;
+SELECT a, MAX(b) FROM t2 WHERE a IN (10, 100) GROUP BY a;
+--echo # Should be no more than 4 reads.
+SHOW status LIKE 'handler_read_key';
+
+DROP TABLE t1, t2;
+
 --echo End of 5.1 tests

=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc	2010-02-26 13:16:46 +0000
+++ b/sql/opt_range.cc	2010-05-07 15:07:55 +0000
@@ -8532,8 +8532,6 @@ int QUICK_RANGE_SELECT::get_next()
 {
   int             result;
   KEY_MULTI_RANGE *mrange;
-  key_range       *start_key;
-  key_range       *end_key;
   DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
   DBUG_ASSERT(multi_range_length && multi_range &&
               (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
@@ -8573,26 +8571,9 @@ int QUICK_RANGE_SELECT::get_next()
          mrange_slot < mrange_end;
          mrange_slot++)
     {
-      start_key= &mrange_slot->start_key;
-      end_key= &mrange_slot->end_key;
       last_range= *(cur_range++);
-
-      start_key->key=    (const uchar*) last_range->min_key;
-      start_key->length= last_range->min_length;
-      start_key->flag=   ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
-                          (last_range->flag & EQ_RANGE) ?
-                          HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
-      start_key->keypart_map= last_range->min_keypart_map;
-      end_key->key=      (const uchar*) last_range->max_key;
-      end_key->length=   last_range->max_length;
-      /*
-        We use HA_READ_AFTER_KEY here because if we are reading on a key
-        prefix. We want to find all keys with this prefix.
-      */
-      end_key->flag=     (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
-                          HA_READ_AFTER_KEY);
-      end_key->keypart_map= last_range->max_keypart_map;
-
+      last_range->make_min_endpoint(&mrange_slot->start_key);
+      last_range->make_max_endpoint(&mrange_slot->end_key);
       mrange_slot->range_flag= last_range->flag;
     }
 
@@ -8616,49 +8597,52 @@ end:
 /*
   Get the next record with a different prefix.
 
-  SYNOPSIS
-    QUICK_RANGE_SELECT::get_next_prefix()
-    prefix_length  length of cur_prefix
-    cur_prefix     prefix of a key to be searched for
+  @param prefix_length   length of cur_prefix
+  @param group_key_parts The number of key parts in the group prefix
+  @param cur_prefix      prefix of a key to be searched for
+
+  Each subsequent call to the method retrieves the first record that has a
+  prefix with length prefix_length and which is different from cur_prefix,
+  such that the record with the new prefix is within the ranges described by
+  this->ranges. The record found is stored into the buffer pointed by
+  this->record. The method is useful for GROUP-BY queries with range
+  conditions to discover the prefix of the next group that satisfies the range
+  conditions.
 
-  DESCRIPTION
-    Each subsequent call to the method retrieves the first record that has a
-    prefix with length prefix_length different from cur_prefix, such that the
-    record with the new prefix is within the ranges described by
-    this->ranges. The record found is stored into the buffer pointed by
-    this->record.
-    The method is useful for GROUP-BY queries with range conditions to
-    discover the prefix of the next group that satisfies the range conditions.
+  @todo
 
-  TODO
     This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
     methods should be unified into a more general one to reduce code
     duplication.
 
-  RETURN
-    0                  on success
-    HA_ERR_END_OF_FILE if returned all keys
-    other              if some error occurred
+  @retval 0                  on success
+  @retval HA_ERR_END_OF_FILE if returned all keys
+  @retval other              if some error occurred
 */
 
 int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length,
-                                        key_part_map keypart_map,
+                                        uint group_key_parts,
                                         uchar *cur_prefix)
 {
   DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");
+  const key_part_map keypart_map= make_prev_keypart_map(group_key_parts);
 
   for (;;)
   {
     int result;
-    key_range start_key, end_key;
     if (last_range)
     {
       /* Read the next record in the same range with prefix after cur_prefix. */
-      DBUG_ASSERT(cur_prefix != 0);
+      DBUG_ASSERT(cur_prefix != NULL);
       result= file->index_read_map(record, cur_prefix, keypart_map,
                                    HA_READ_AFTER_KEY);
-      if (result || (file->compare_key(file->end_range) <= 0))
+      if (result || last_range->max_keypart_map == 0)
         DBUG_RETURN(result);
+
+      key_range previous_endpoint;
+      last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map);
+      if (file->compare_key(&previous_endpoint) <= 0)
+        DBUG_RETURN(0);
     }
 
     uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
@@ -8670,21 +8654,9 @@ int QUICK_RANGE_SELECT::get_next_prefix(
     }
     last_range= *(cur_range++);
 
-    start_key.key=    (const uchar*) last_range->min_key;
-    start_key.length= min(last_range->min_length, prefix_length);
-    start_key.keypart_map= last_range->min_keypart_map & keypart_map;
-    start_key.flag=   ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
-		       (last_range->flag & EQ_RANGE) ?
-		       HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
-    end_key.key=      (const uchar*) last_range->max_key;
-    end_key.length=   min(last_range->max_length, prefix_length);
-    end_key.keypart_map= last_range->max_keypart_map & keypart_map;
-    /*
-      We use READ_AFTER_KEY here because if we are reading on a key
-      prefix we want to find all keys with this prefix
-    */
-    end_key.flag=     (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
-		       HA_READ_AFTER_KEY);
+    key_range start_key, end_key;
+    last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
+    last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
 
     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
 				   last_range->max_keypart_map ? &end_key : 0,
@@ -8779,9 +8751,9 @@ bool QUICK_RANGE_SELECT::row_in_ranges()
 }
 
 /*
-  This is a hack: we inherit from QUICK_SELECT so that we can use the
+  This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the
   get_next() interface, but we have to hold a pointer to the original
-  QUICK_SELECT because its data are used all over the place.  What
+  QUICK_RANGE_SELECT because its data are used all over the place. What
   should be done is to factor out the data that is needed into a base
   class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
   which handle the ranges and implement the get_next() function.  But
@@ -10903,7 +10875,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_pre
   {
     uchar *cur_prefix= seen_first_key ? group_prefix : NULL;
     if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
-                         make_prev_keypart_map(group_key_parts), cur_prefix)))
+                                                      group_key_parts, 
+                                                      cur_prefix)))
       DBUG_RETURN(result);
     seen_first_key= TRUE;
   }

=== modified file 'sql/opt_range.h'
--- a/sql/opt_range.h	2008-08-25 17:18:22 +0000
+++ b/sql/opt_range.h	2010-05-07 15:07:55 +0000
@@ -65,6 +65,85 @@ class QUICK_RANGE :public Sql_alloc {
       dummy=0;
 #endif
     }
+
+  /**
+     Initalizes a key_range object for communication with storage engine. 
+
+     This function facilitates communication with the Storage Engine API by
+     translating the minimum endpoint of the interval represented by this
+     QUICK_RANGE into an index range endpoint specifier for the engine.
+
+     @param Pointer to an uninitialized key_range C struct.
+
+     @param prefix_length The length of the search key prefix to be used for
+     lookup.
+     
+     @param keypart_map A set (bitmap) of keyparts to be used.
+  */
+  void make_min_endpoint(key_range *kr, uint prefix_length, 
+                         key_part_map keypart_map) {
+    make_min_endpoint(kr);
+    kr->length= min(kr->length, prefix_length);
+    kr->keypart_map&= keypart_map;
+  }
+  
+  /**
+     Initalizes a key_range object for communication with storage engine. 
+
+     This function facilitates communication with the Storage Engine API by
+     translating the minimum endpoint of the interval represented by this
+     QUICK_RANGE into an index range endpoint specifier for the engine.
+
+     @param Pointer to an uninitialized key_range C struct.
+  */
+  void make_min_endpoint(key_range *kr) {
+    kr->key= (const uchar*)min_key;
+    kr->length= min_length;
+    kr->keypart_map= min_keypart_map;
+    kr->flag= ((flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
+               (flag & EQ_RANGE) ? HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
+  }
+
+  /**
+     Initalizes a key_range object for communication with storage engine. 
+
+     This function facilitates communication with the Storage Engine API by
+     translating the maximum endpoint of the interval represented by this
+     QUICK_RANGE into an index range endpoint specifier for the engine.
+
+     @param Pointer to an uninitialized key_range C struct.
+
+     @param prefix_length The length of the search key prefix to be used for
+     lookup.
+     
+     @param keypart_map A set (bitmap) of keyparts to be used.
+  */
+  void make_max_endpoint(key_range *kr, uint prefix_length, 
+                         key_part_map keypart_map) {
+    make_max_endpoint(kr);
+    kr->length= min(kr->length, prefix_length);
+    kr->keypart_map&= keypart_map;
+  }
+
+  /**
+     Initalizes a key_range object for communication with storage engine. 
+
+     This function facilitates communication with the Storage Engine API by
+     translating the maximum endpoint of the interval represented by this
+     QUICK_RANGE into an index range endpoint specifier for the engine.
+
+     @param Pointer to an uninitialized key_range C struct.
+  */
+  void make_max_endpoint(key_range *kr) {
+    kr->key= (const uchar*)max_key;
+    kr->length= max_length;
+    kr->keypart_map= max_keypart_map;
+    /*
+      We use READ_AFTER_KEY here because if we are reading on a key
+      prefix we want to find all keys with this prefix
+    */
+    kr->flag= (flag & NEAR_MAX ? HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
+  }
 };
 
 
@@ -331,7 +410,7 @@ public:
   int reset(void);
   int get_next();
   void range_end();
-  int get_next_prefix(uint prefix_length, key_part_map keypart_map,
+  int get_next_prefix(uint prefix_length, uint group_key_parts, 
                       uchar *cur_prefix);
   bool reverse_sorted() { return 0; }
   bool unique_key_range();
@@ -611,7 +690,7 @@ private:
   uchar *record;          /* Buffer where the next record is returned. */
   uchar *tmp_record;      /* Temporary storage for next_min(), next_max(). */
   uchar *group_prefix;    /* Key prefix consisting of the GROUP fields. */
-  uint group_prefix_len; /* Length of the group prefix. */
+  const uint group_prefix_len; /* Length of the group prefix. */
   uint group_key_parts;  /* A number of keyparts in the group prefix */
   uchar *last_prefix;     /* Prefix of the last group for detecting EOF. */
   bool have_min;         /* Specify whether we are computing */


Attachment: [text/bzr-bundle] bzr/martin.hansson@sun.com-20100507150755-v9caeainkmfqojz7.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (martin.hansson:3373) Bug#50939Martin Hansson7 May