List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:September 12 2006 10:49am
Subject:bk commit into 5.2 tree (sergefp:1.2154)
View as plain text  
Below is the list of changes that have just been committed into a local
5.2 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-09-12 13:49:46+03:00, sergefp@stripped +29 -0
  WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
  WL#2475 "Batched range read functions for MyISAM/InnoDb"
  combined together into one commit.

  include/my_base.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +26 -0
    WL#2474 Multi Range Read: added comments

  include/myisam.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +1 -1
    Fixed comment

  mysql-test/include/mrr_tests.inc@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +106 -0
    New BitKeeper file ``mysql-test/include/mrr_tests.inc''

  mysql-test/include/mrr_tests.inc@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +0 -0

  mysql-test/r/innodb_mrr.result@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +309 -0
    New BitKeeper file ``mysql-test/r/innodb_mrr.result''

  mysql-test/r/innodb_mrr.result@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +0 -0

  mysql-test/r/myisam_mrr.result@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +279 -0
    New BitKeeper file ``mysql-test/r/myisam_mrr.result''

  mysql-test/r/myisam_mrr.result@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +0 -0

  mysql-test/r/ndb_blob.result@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +2 -2
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    updated test results

  mysql-test/r/ndb_condition_pushdown.result@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +11 -11
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    updated test results

  mysql-test/t/innodb_mrr.test@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +49 -0
    New BitKeeper file ``mysql-test/t/innodb_mrr.test''

  mysql-test/t/innodb_mrr.test@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +0 -0

  mysql-test/t/myisam_mrr.test@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +14 -0
    New BitKeeper file ``mysql-test/t/myisam_mrr.test''

  mysql-test/t/myisam_mrr.test@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +0 -0

  sql/ha_innodb.cc@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +42 -2
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    Make InnoDB handler use DS-MRR implementation

  sql/ha_innodb.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +14 -0
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    Make InnoDB handler use DS-MRR implementation

  sql/ha_myisam.cc@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +43 -2
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    Make MyISAM handler use DS-MRR implementation

  sql/ha_myisam.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +15 -0
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    Make MyISAM handler use DS-MRR implementation

  sql/ha_ndbcluster.cc@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +154 -107
    WL#2474 Multi Range Read: 
     - Changed MRR/NDB to provide the new interface, albeit with a help of interface non-compliant
       mrr_persistent_flag_storage() function.
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Update the NDB table handler to use new interface (to make "Using MRR" show up in EXPLAIN)

  sql/ha_ndbcluster.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +16 -7
    WL#2474 Multi Range Read: 
     - Changed MRR/NDB to provide the new interface, albeit with a help of interface non-compliant
       mrr_persistent_flag_storage() function.
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Update the NDB table handler to use new interface (to make "Using MRR" show up in EXPLAIN)

  sql/handler.cc@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +803 -72
    WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
     - Add DS-MRR implementation functions: cost estimates and execution engine.

  sql/handler.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +202 -12
    WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Add DS-MRR implementation functions: cost estimates and execution engine.
    - Add needed functions into class COST_VECT

  sql/key.cc@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +24 -0
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
     - Add key_zero_nulls() utility function.

  sql/mysql_priv.h@stripped, 2006-09-12 13:49:39+03:00, sergefp@stripped +1 -0
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
     - Add key_zero_nulls() utility function.

  sql/mysqld.cc@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +2 -2
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Change minimum value of @@read_rnd_buff_size 

  sql/opt_range.cc@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +549 -379
    WL#2474 Multi Range Read: Changed the range optimizer to use new MRR interface
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Update range optimizer and QUICK_RANGE_SELECT to use new MRR interface functions
    - Added comments

  sql/opt_range.h@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +22 -1
    WL#2474 Multi Range Read: Changed the range optimizer to use new MRR interface
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Update range optimizer and QUICK_RANGE_SELECT to use new MRR interface functions

  sql/set_var.cc@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +14 -1
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Add @@optimizer_use_mrr system variable

  sql/sql_class.h@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +7 -0
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Add @@optimizer_use_mrr system variable

  sql/sql_select.cc@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +45 -16
    WL#2474 Multi Range Read: Changed the default MRR implementation to implement new MRR interface
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Make EXPLAIN show "Using MRR" if native MRR implementation is used
    - Add comments

  sql/structs.h@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +3 -1
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Added comments

  sql/table.h@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +3 -1
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Added comments

  storage/myisam/mi_key.c@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +27 -3
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Make MyISAM to be able to use "restore scan" calls:
      index_read({key_tuple, rowid})

  storage/myisam/mi_rkey.c@stripped, 2006-09-12 13:49:40+03:00, sergefp@stripped +16 -2
    WL#2475 "Batched range read functions for MyISAM/InnoDb":
    - Make MyISAM to be able to use "restore scan" calls:
      index_read({key_tuple, rowid})

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	pslp.mylan
# Root:	/home/psergey/mysql-5.2-mymrr-03-clean

--- 1.84/include/my_base.h	2006-09-12 13:49:53 +03:00
+++ 1.85/include/my_base.h	2006-09-12 13:49:53 +03:00
@@ -439,14 +439,40 @@
 
 /* For key ranges */
 
+/* from - inf */
 #define NO_MIN_RANGE	1
+
+/* to +inf */
 #define NO_MAX_RANGE	2
+
+/*  X < key (i.e. not X <= key */
 #define NEAR_MIN	4
+
+/* X > key (i.e. not X>= key */
 #define NEAR_MAX	8
+
+/* 
+  This flag means that index is a unique index, and the interval is 
+  equivalent to "AND(keypart_i = const_i)", where all of const_i are not NULLs.
+*/
 #define UNIQUE_RANGE	16
+
+/* 
+  This flag means that the interval is equivalent to 
+  "AND(keypart_i = const_i)", where not all key parts may be used but all of 
+  const_i are not NULLs.
+*/
 #define EQ_RANGE	32
+
+/*
+  This flag has the same meaning as UNIQUE_RANGE, except that for at least
+  one keypart the condition is "keypart IS NULL". 
+*/
 #define NULL_RANGE	64
+
 #define GEOM_FLAG      128
+
+/* Used only in NDB, at row retrieval stage */
 #define SKIP_RANGE     256
 
 typedef struct st_key_range

--- 1.76/include/myisam.h	2006-09-12 13:49:53 +03:00
+++ 1.77/include/myisam.h	2006-09-12 13:49:53 +03:00
@@ -129,7 +129,7 @@
                             (_to_)= (mi_get_mask_all_keys_active(_maxkeys_) & \
                                      (_from_))
 
-	/* Param to/from mi_info */
+	/* Param to/from mi_status */
 
 typedef struct st_mi_isaminfo		/* Struct from h_info */
 {

--- 1.46/storage/myisam/mi_key.c	2006-09-12 13:49:53 +03:00
+++ 1.47/storage/myisam/mi_key.c	2006-09-12 13:49:53 +03:00
@@ -225,16 +225,40 @@
   my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT;
   DBUG_ENTER("_mi_pack_key");
 
-  for (keyseg=info->s->keyinfo[keynr].seg ;
-       keyseg->type && (int) k_length > 0;
+  for (keyseg=info->s->keyinfo[keynr].seg; (int) k_length > 0;
        old+=keyseg->length, keyseg++)
   {
     enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type;
     uint length=min((uint) keyseg->length,(uint) k_length);
     uint char_length;
     uchar *pos;
-    CHARSET_INFO *cs=keyseg->charset;
+    CHARSET_INFO *cs;
+    
+    if (!type)
+    {
+      /* Decode the ROWID value back to to file pointer */
+      if (k_length == length)
+      {
+        pos=old;
+        if (!(info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
+        {
+          my_off_t rowid;
+          rowid= my_get_ptr(pos, length);
+          rowid= rowid / info->s->base.pack_reclength;
+          my_store_ptr(key, length, rowid);
+        }
+        else
+          memcpy(key, pos, length);
 
+        pos+=length;
+        k_length-=length;
+        key+= length;
+        keyseg++;
+      }
+      break;
+    }
+
+    cs=keyseg->charset;
     if (keyseg->null_bit)
     {
       k_length--;

--- 1.21/storage/myisam/mi_rkey.c	2006-09-12 13:49:53 +03:00
+++ 1.22/storage/myisam/mi_rkey.c	2006-09-12 13:49:53 +03:00
@@ -30,6 +30,8 @@
   MI_KEYDEF *keyinfo;
   HA_KEYSEG *last_used_keyseg;
   uint pack_key_length, use_key_length, nextflag;
+  uint myisam_search_flag;
+  my_bool key_tuple_has_rowid= FALSE;
   DBUG_ENTER("mi_rkey");
   DBUG_PRINT("enter", ("base: %lx  buf: %lx  inx: %d  search_flag: %d",
                        (long) info, (long) buf, inx, search_flag));
@@ -60,9 +62,15 @@
     key_buff=info->lastkey+info->s->base.max_key_length;
     pack_key_length=_mi_pack_key(info,(uint) inx, key_buff, (uchar*) key,
 				 key_len, &last_used_keyseg);
+    if (last_used_keyseg > (info->s->keyinfo[inx].seg +
+                           info->s->keyinfo[inx].keysegs))
+    {
+      key_tuple_has_rowid= TRUE;
+      last_used_keyseg--;
+    }
     /* Save packed_key_length for use by the MERGE engine. */
     info->pack_key_length= pack_key_length;
-    DBUG_EXECUTE("key",_mi_print_key(DBUG_FILE, keyinfo->seg,
+    DBUG_EXECUTE("info",_mi_print_key(DBUG_FILE, keyinfo->seg,
 				     key_buff, pack_key_length););
   }
 
@@ -89,8 +97,14 @@
 #endif
   case HA_KEY_ALG_BTREE:
   default:
+    myisam_search_flag= myisam_read_vec[search_flag];
+    if (key_tuple_has_rowid)
+    {
+      /* Fiddle with flags so ha_key_cmp compares the rowid, too */
+      myisam_search_flag &= ~(SEARCH_FIND | SEARCH_NO_FIND);
+    }
     if (!_mi_search(info, keyinfo, key_buff, use_key_length,
-		  myisam_read_vec[search_flag], info->s->state.key_root[inx]))
+		  myisam_search_flag, info->s->state.key_root[inx]))
     {
       while (info->lastpos >= info->state->data_file_length)
       {

--- 1.174/sql/ha_myisam.cc	2006-09-12 13:49:53 +03:00
+++ 1.175/sql/ha_myisam.cc	2006-09-12 13:49:53 +03:00
@@ -187,8 +187,11 @@
   int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
                   HA_DUPP_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
                   HA_FILE_BASED | HA_CAN_GEOMETRY | HA_READ_RND_SAME |
-                  HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD),
-  can_enable_indexes(1)
+                  HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | 
+                  HA_NEED_READ_RANGE_BUFFER
+                  ),
+  can_enable_indexes(1),
+  ds_mrr(this)
 {}
 
 
@@ -1786,3 +1789,41 @@
     return COMPATIBLE_DATA_NO;
   return COMPATIBLE_DATA_YES;
 }
+
+
+/****************************************************************************
+ * DS-MRR implementation 
+ ***************************************************************************/
+
+/**
+ * Multi Range Read interface, DS-MRR calls
+ */
+
+int ha_myisam::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                     uint n_ranges, uint mode, 
+                                     HANDLER_BUFFER *buf)
+{
+  return ds_mrr.dsmrr_init(this, &table->key_info[active_index], 
+                           seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_myisam::multi_range_read_next(char **range_info)
+{
+  return ds_mrr.dsmrr_next(this, range_info);
+}
+
+ha_rows ha_myisam::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                               void *seq_init_param, 
+                                               uint n_ranges, uint *bufsz,
+                                               uint *flags, COST_VECT *cost)
+{
+  return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
+                                 flags, cost);
+}
+
+int ha_myisam::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                     uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
+}
+

--- 1.72/sql/ha_myisam.h	2006-09-12 13:49:53 +03:00
+++ 1.73/sql/ha_myisam.h	2006-09-12 13:49:53 +03:00
@@ -132,4 +132,19 @@
   int dump(THD* thd, int fd);
   int net_read_dump(NET* net);
 #endif
+public:
+  /**
+   * Multi Range Read interface
+   */
+  int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                            uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+  int multi_range_read_next(char **range_info);
+  ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                      void *seq_init_param, 
+                                      uint n_ranges, uint *bufsz,
+                                      uint *flags, COST_VECT *cost);
+  int multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                            uint *bufsz, uint *flags, COST_VECT *cost);
+private:
+  DsMrr_impl ds_mrr;
 };

--- 1.220/sql/handler.cc	2006-09-12 13:49:53 +03:00
+++ 1.221/sql/handler.cc	2006-09-12 13:49:53 +03:00
@@ -2742,93 +2742,294 @@
 }
 #endif
 
+
 /*
-  Read the first row of a multi-range set.
+  Calculate cost of 'index only' scan for given index and number of records.
 
   SYNOPSIS
-    read_multi_range_first()
-    found_range_p       Returns a pointer to the element in 'ranges' that
-                        corresponds to the returned row.
-    ranges              An array of KEY_MULTI_RANGE range descriptions.
-    range_count         Number of ranges in 'ranges'.
-    sorted		If result should be sorted per key.
-    buffer              A HANDLER_BUFFER for internal handler usage.
+    index_only_read_time()
+      keynr    key to read
+      records  #of records to read
 
   NOTES
-    Record is read into table->record[0].
-    *found_range_p returns a valid value only if read_multi_range_first()
-    returns 0.
-    Sorting is done within each range. If you want an overall sort, enter
-    'ranges' with sorted ranges.
+    It is assumed that we will read trough the whole key range and that all
+    key blocks are half full (normally things are much better). It is also
+    assumed that each time we read the next key from the index, the handler
+    performs a random seek, thus the cost is proportional to the number of
+    blocks read.
+
+  TODO:
+    Move this to handler->read_time() by adding a flag 'index-only-read' to
+    this call. The reason for doing this is that the current function doesn't
+    handle the case when the row is stored in the b-tree (like in innodb
+    clustered index)
+*/
+
+double handler::index_only_read_time(uint keynr, double records)
+{
+  double read_time;
+  uint keys_per_block= (block_size/2/
+			(table->key_info[keynr].key_length + ref_length) + 1);
+  read_time=((double) (records + keys_per_block-1) /
+             (double) keys_per_block);
+  return read_time;
+}
+
+/****************************************************************************
+ * Default MRR implementation (MRR to non-MRR converter)
+ ***************************************************************************/
+
+/*
+  Get cost and other information about MRR scan over a known list of ranges
+
+  SYNOPSIS
+    multi_range_read_info_const()
+      keyno           Index number
+      seq             Range sequence to be traversed
+      seq_init_param  First parameter for seq->init()
+      n_ranges        Number of ranges in the sequence
+      bufsz    INOUT  IN:  Size of the buffer available for use
+                      OUT: Size of the buffer that is expected to be actually
+                           used, or 0 if buffer is not needed.
+      flags    INOUT  A combination of HA_MRR_* flags
+      cost     OUT    Estimated cost of MRR access
+
+  DESCRIPTION
+    Obtain the cost and other information about an MRR scan when the list of
+    ranges to be scanned is known.
+
+  NOTE
+    Reasons for having this function:
+    * Let high-roundtrip-cost handlers, like NDB/Federated produce the 
+      estimates in one roundtrip. (One might ask, why don't we use 
+      condition-pushdown-based system for the purpose of estimating number of
+      records that match the table condition?... good question... in any case
+      there is another reason)
+    * We need a specialized cost-of-MRR-over-constant-ranges function.
+      = It must be different from non-MRR-cost function (since MRR is supposed
+        to be better and have a smaller cost)
+      = It must be different from multi_range_read_info() because here we know
+        the ranges.
+    Therefore, we have this function.
 
   RETURN
-    0			OK, found a row
-    HA_ERR_END_OF_FILE	No rows in range
-    #			Error code
+    HA_POS_ERROR -  Error or can't perform the requested scan. Values of OUT
+                    parameters are undefined.
+    other        -  OK, *cost contains cost of the scan, *bufsz and *flags
+                    contain scan parameters.
 */
 
-int handler::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                    KEY_MULTI_RANGE *ranges, uint range_count,
-                                    bool sorted, HANDLER_BUFFER *buffer)
-{
-  int result= HA_ERR_END_OF_FILE;
-  DBUG_ENTER("handler::read_multi_range_first");
-  multi_range_sorted= sorted;
-  multi_range_buffer= buffer;
-
-  for (multi_range_curr= ranges, multi_range_end= ranges + range_count;
-       multi_range_curr < multi_range_end;
-       multi_range_curr++)
-  {
-    result= read_range_first(multi_range_curr->start_key.length ?
-                             &multi_range_curr->start_key : 0,
-                             multi_range_curr->end_key.length ?
-                             &multi_range_curr->end_key : 0,
-                             test(multi_range_curr->range_flag & EQ_RANGE),
-                             multi_range_sorted);
-    if (result != HA_ERR_END_OF_FILE)
-      break;
+ha_rows 
+handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                     void *seq_init_param, uint n_ranges,
+                                     uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  KEY_MULTI_RANGE range;
+  range_seq_t seq_it;
+  ha_rows rows, total_rows;
+  *bufsz= 0;
+  total_rows= 0;
+  uint part, max_used_part= 0;
+
+  seq_it= seq->init(seq_init_param, n_ranges, *flags);
+  n_ranges=0;
+  while (!seq->next(seq_it, &range))
+  {
+    n_ranges++;
+    key_range *min_endp= range.start_key.length? &range.start_key : NULL;
+    key_range *max_endp= range.end_key.length? &range.end_key : NULL;
+    if (!(range.range_flag & UNIQUE_RANGE))
+    {
+      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
+                                                        max_endp)))
+      {
+        /* Can't scan one range => can't do MRR scan at all */
+        total_rows= HA_POS_ERROR;
+        break;
+      }
+    }
+    else
+      rows= 1; // UNIQUE_RANGE
+
+    total_rows += rows;
+    part= (int)range.ptr;
+    if (part > max_used_part)
+      max_used_part= part;
   }
+  
+  /* The following code is a copy of multi_range_read_info(): */
+  *flags |= HA_MRR_USE_DEFAULT_IMPL;
+  cost->zero();
+  cost->avg_io_cost= 1; // Assume random seeks
+  if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
+    cost->io_count= index_only_read_time(keyno, total_rows);
+  else
+    cost->io_count= read_time(keyno, n_ranges, total_rows);
+  cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
 
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("result %d", result));
-  DBUG_RETURN(result);
+  return total_rows;
 }
 
 
 /*
-  Read the next row of a multi-range set.
+  Get cost and other information about MRR scan over currently unknown ranges
 
   SYNOPSIS
-    read_multi_range_next()
-    found_range_p       Returns a pointer to the element in 'ranges' that
-                        corresponds to the returned row.
+    multi_range_read_info()
+      keyno           Index number
+      n_ranges        Estimated number of ranges (i.e. intervals) in the 
+                      range sequence.
+      n_rows          Estimated total number of records contained within all 
+                      of the ranges
+      bufsz    INOUT  IN:  Size of the buffer available for use
+                      OUT: Size of the buffer that will be actually used, or 
+                           0 if buffer is not needed.
+      flags    INOUT  A combination of HA_MRR_* flags
+      cost     OUT    Estimated cost of MRR access
+
+  DESCRIPTION
+    This function obtains the cost and other info about about the MRR scan.
+    The caller has some information about what kinds of ranges will be scanned
+    (e.g. "1000 {keypart1=const} ranges"}, but the constant themselves are
+    assumed not to be known when this function is called.
+
+    The flags parameter is a combination of those flags: HA_MRR_SORTED, 
+    HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
+
+  RETURN
+    0     - OK, *cost contains cost of the scan, *bufsz and *flags contain scan
+            parameters.
+    other - Error or can't perform the requested scan
+*/
+
+int handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+                                   uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  *bufsz= 0;  // No need for buffer
+
+  *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+  cost->zero();
+  cost->avg_io_cost= 1; // Assume random seeks
+
+  /* Produce the same cost as non-MRR code does */
+  if (*flags & HA_MRR_INDEX_ONLY)
+    cost->io_count= index_only_read_time(keyno, n_rows);
+  else
+    cost->io_count= read_time(keyno, n_ranges, n_rows);
+  return 0;
+}
+
+
+/*
+  Initialize the MRR scan
+
+  SYNOPSIS
+    multi_range_read_init()
+      seq             Range sequence to be traversed
+      seq_init_param  First parameter for seq->init()
+      n_ranges        Number of ranges in the sequence
+      mode            Flags, see the description for details
+      buf             INOUT, memory buffer to be used.
+
+  DESCRIPTION
+    Initialize the MRR scan. This function may do heavyweight scan 
+    initialization like row prefetching/sorting/etc (NOTE: but better not do
+    it here as we may not need it, e.g. if we never satisfy WHERE clause on
+    previous tables. For many implementations it would be natural to do such
+    initializations in _next() call)
+
+    mode is a combination of the following flags: HA_MRR_SORTED,
+    HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
 
   NOTES
-    Record is read into table->record[0].
-    *found_range_p returns a valid value only if read_multi_range_next()
-    returns 0.
+    One must have called index_init() before calling this function. Several
+    multi_range_read_init() calls may be made in course of one query.
+
+    Until WL#2623 is done (see section 3.2), the following will also hold:
+    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
+    then seq_init_param will be an array of KEY_MULTI_RANGE structures (with
+    n_ranges elements). This property will only be used by NDB handler until
+    WL#2623 is done (see section 3.2 again).
+     
+    Buffer memory management is done according to the following scenario:
+    The caller allocates the buffer and provides it to the callee by filling
+    in the members of HANDLER_BUFFER structure (see current code for
+    definition of HANDLER_BUFFER).
+    The callee consumes all or some fraction of the provided buffer space, and
+    sets the HANDLER_BUFFER members accordingly.
+    The callee may use the buffer memory until the next multi_range_read_init()
+    call is made, all records have been read, or until index_end() call is
+    made, whichever comes first.
 
   RETURN
-    0			OK, found a row
-    HA_ERR_END_OF_FILE	No (more) rows in range
-    #			Error code
+    0 - OK
+    1 - Error
+*/
+
+int
+handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+                               uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+  DBUG_ENTER("handler::multi_range_read_init");
+  /* For now, do nothing here... just initialize the ranges traversal */
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  mrr_funcs= *seq_funcs;
+  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
+  mrr_have_range= FALSE;
+  DBUG_RETURN(0);
+}
+
+
+/*
+  Get next record in MRR scan
+
+  SYNOPSIS
+    multi_range_read_next()
+       range_info  OUT  If HA_MRR_NO_ASSOCIATION flag is in effect or an error
+                        occured, nothing (but the caller is still allowed to 
+                        store an arbitrary value there)
+                        Otherwise, the opaque value associated with the range
+                        that contains the returned record.
+
+  NOTE
+    It should be possible to process range lists of arbitrary length without
+    any kind of overflows.
+    
+  RETURN
+    0      OK
+    other  Error code
 */
 
-int handler::read_multi_range_next(KEY_MULTI_RANGE **found_range_p)
+int handler::multi_range_read_next(char **range_info)
 {
   int result;
-  DBUG_ENTER("handler::read_multi_range_next");
+  int range_res;
+  DBUG_ENTER("handler::multi_range_read_next");
 
-  /* We should not be called after the last call returned EOF. */
-  DBUG_ASSERT(multi_range_curr < multi_range_end);
+  if (!mrr_have_range)
+  {
+    mrr_have_range= TRUE;
+    goto start;
+  }
 
   do
   {
     /* Save a call if there can be only one row in range. */
-    if (multi_range_curr->range_flag != (UNIQUE_RANGE | EQ_RANGE))
+    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
     {
-      result= read_range_next();
+      if (mrr_restore_scan)
+      {
+        result= mrr_restore_scan_res;
+        if (!result)
+          result= (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
+        mrr_restore_scan= FALSE;
+        if (!result)
+          eq_range= FALSE;
+
+      }
+      else
+        result= read_range_next();
 
       /* On success or non-EOF errors jump to the end. */
       if (result != HA_ERR_END_OF_FILE)
@@ -2843,30 +3044,559 @@
       result= HA_ERR_END_OF_FILE;
     }
 
+start:
     /* Try the next range(s) until one matches a record. */
-    for (multi_range_curr++;
-         multi_range_curr < multi_range_end;
-         multi_range_curr++)
-    {
-      result= read_range_first(multi_range_curr->start_key.length ?
-                               &multi_range_curr->start_key : 0,
-                               multi_range_curr->end_key.length ?
-                               &multi_range_curr->end_key : 0,
-                               test(multi_range_curr->range_flag & EQ_RANGE),
-                               multi_range_sorted);
+    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
+    {
+      result= read_range_first(mrr_cur_range.start_key.length ?
+                                 &mrr_cur_range.start_key : 0,
+                               mrr_cur_range.end_key.length ?
+                                 &mrr_cur_range.end_key : 0,
+                               test(mrr_cur_range.range_flag & EQ_RANGE),
+                               mrr_is_output_sorted);
       if (result != HA_ERR_END_OF_FILE)
         break;
     }
   }
-  while ((result == HA_ERR_END_OF_FILE) &&
-         (multi_range_curr < multi_range_end));
+  while ((result == HA_ERR_END_OF_FILE) && !range_res);
 
-  *found_range_p= multi_range_curr;
-  DBUG_PRINT("exit",("handler::read_multi_range_next: result %d", result));
+  *range_info= mrr_cur_range.ptr;
+  DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
   DBUG_RETURN(result);
 }
 
 
+/****************************************************************************
+ * DS-MRR implementation 
+ ***************************************************************************/
+
+/*
+  DS-MRR: Initialize and start MRR scan
+
+  SYNOPSIS
+    DsMrr_impl::dsmrr_init()
+      h               Table handler to be used
+      key             Index to be used
+      seq_funcs       Interval sequence enumeration functions
+      seq_init_param  Interval sequence enumeration parameter
+      n_ranges        Number of ranges in the sequence.
+      mode            HA_MRR_* modes to use
+      buf             INOUT Buffer to use
+
+  DESCRIPTION
+    Initialize and start the MRR scan. Depending on the mode parameter, this
+    may use default or DS-MRR implementation.
+
+  RETURN   
+    0     - Ok, Scan started.
+    other - Error
+*/
+
+int DsMrr_impl::dsmrr_init(handler *h, KEY *key,
+                           RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
+                           uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+  int res;
+  uint elem_size;
+
+  if (mode & HA_MRR_USE_DEFAULT_IMPL)
+  {
+    use_default_impl= TRUE;
+    return h->handler::multi_range_read_init(seq_funcs, seq_init_param,
+                                             n_ranges, mode, buf);
+  }
+  use_default_impl= FALSE;
+  
+  rowids_buf= (byte*)buf->buffer;
+  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+  rowids_buf_end= (byte*)buf->buffer_end;
+  
+  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
+  rowids_buf_last= rowids_buf + 
+                      ((rowids_buf_end - rowids_buf)/ elem_size)*
+                      elem_size;
+  rowids_buf_end= rowids_buf_last;
+  res= h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+                                         mode, buf);
+  DBUG_ASSERT(!res);
+
+  mrr_key= key;
+  mrr_keyno= h->active_index;
+  
+  if (h->index_end() || h->extra(HA_EXTRA_KEYREAD) || 
+      h->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) ||
+      h->extra(HA_EXTRA_RETRIEVE_ALL_COLS) ||
+      h->index_init(mrr_keyno, FALSE))
+    return 1;
+  
+  if (!(last_idx_tuple= sql_alloc(key->key_length + h->ref_length)))
+    return 1;
+
+  h->mrr_restore_scan= FALSE;
+  /* h->mrr_restore_scan_res needs no initialization */
+
+  if ((res= dsmrr_fill_buffer(h)))
+    return res;
+ 
+  if (h->index_end() || h->rnd_init(FALSE))
+    return 1;
+
+  /*
+    If the above call has scanned through all intervals in *seq, then
+    adjust *buf to indicate that the remaining buffer space will not be used.
+  */
+  if (dsmrr_eof)
+    buf->end_of_used_area= rowids_buf_last;
+  return 0;
+}
+
+
+static int rowid_cmp(void *h, byte *a, byte *b)
+{
+  return ((handler*)h)->cmp_ref(a, b);
+}
+
+
+/*
+  DS-MRR: Fill the buffer with rowids and sort it by rowid
+  
+  SYNOPSIS
+    DsMrr_impl::dsmrr_fill_buffer()
+      h  Table handler
+
+  DESCRIPTION
+    {This is an internal function of DiskSweep MRR implementation}
+    Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
+    buffer. When the buffer is full or scan is completed, sort the buffer by 
+    rowid and return.
+    
+    The function assumes that rowids buffer is empty when it is invoked. 
+
+  RETURN 
+    0     - Some rowids are in the rowids buffer, properly ordered
+    other - Error
+*/
+
+int DsMrr_impl::dsmrr_fill_buffer(handler *h)
+{
+  char *range_info;
+  int res;
+
+  rowids_buf_cur= rowids_buf;
+  while ((rowids_buf_cur < rowids_buf_end) && 
+         !(res= h->handler::multi_range_read_next(&range_info)))
+  {
+    /* Put rowid, or {rowid, range_id} pair into the buffer */
+    h->position((const byte*)(h->table->record[0]));
+    memcpy(rowids_buf_cur, h->ref, h->ref_length);
+    rowids_buf_cur += h->ref_length;
+
+    if (is_mrr_assoc)
+    {
+      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
+      rowids_buf_cur += sizeof(void*);
+    }
+  }
+
+  if (res && res != HA_ERR_END_OF_FILE)
+    return res; 
+  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
+
+  if (!res && (h->mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)))
+  {
+    /* Save the index position: search tuple + rowid */
+    key_copy(last_idx_tuple, h->table->record[0], mrr_key,
+             mrr_key->key_length);
+    key_zero_nulls(last_idx_tuple, mrr_key);
+
+    memcpy(last_idx_tuple + mrr_key->key_length, h->ref, h->ref_length);
+    h->mrr_restore_scan= TRUE;
+  }
+
+  /* Sort the buffer contents by rowid */
+  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
+  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+  
+  qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
+         (void*)h);
+  rowids_buf_last= rowids_buf_cur;
+  rowids_buf_cur=  rowids_buf;
+  return 0;
+}
+
+
+/*
+  DS-MRR implementation: multi_range_read_next() function
+*/
+
+int DsMrr_impl::dsmrr_next(handler *h, char **range_info)
+{
+  int res;
+  
+  if (use_default_impl)
+    return h->handler::multi_range_read_next(range_info);
+    
+  if ((rowids_buf_cur == rowids_buf_last))
+  {
+    h->rnd_end();
+    if (h->extra(HA_EXTRA_KEYREAD) ||
+        h->extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) ||
+        h->extra(HA_EXTRA_RETRIEVE_ALL_COLS) ||
+        h->index_init(mrr_keyno, FALSE))
+      return HA_ERR_KEY_NOT_FOUND;
+    /* 
+      The following check is made here after index_init call so that we
+      leave the table handler in "scanning the index" state after returning
+      EOF. De-initialization code depends on this.
+    */
+    if (dsmrr_eof)
+      return HA_ERR_END_OF_FILE;
+
+    if (h->mrr_restore_scan)
+    {
+      h->mrr_restore_scan_res= 
+        h->index_read(h->table->record[0], last_idx_tuple,
+                      mrr_key->key_length + h->ref_length, 
+                      HA_READ_AFTER_KEY);
+    }
+    res= dsmrr_fill_buffer(h);
+
+    h->index_end();
+    h->rnd_init(FALSE);
+
+    if (res)
+      return res;
+  }
+  
+  /* Return EOF if there are no rowids in the buffer after re-fill attempt */
+  if (rowids_buf_cur == rowids_buf_last)
+    return HA_ERR_END_OF_FILE;
+
+  res= h->rnd_pos((byte*)h->table->record[0], rowids_buf_cur);
+  rowids_buf_cur += h->ref_length;
+  
+  if (is_mrr_assoc)
+  {
+    memcpy(range_info, rowids_buf_cur, sizeof(void*));
+    rowids_buf_cur += sizeof(void*);
+  }
+  return res;
+}
+
+
+/*
+  DS-MRR implementation: multi_range_read_info() function
+*/
+int DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint *bufsz,
+                           uint *flags, COST_VECT *cost)
+{  
+  int res;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
+                                         &def_flags, cost);
+  DBUG_ASSERT(!res);
+
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
+      choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
+  {
+    /* Default implementation is choosen */
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+
+  return 0;
+}
+
+
+/*
+  DS-MRR Implementation: multi_range_read_info_const() function
+*/
+
+ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                 void *seq_init_param, uint n_ranges, 
+                                 uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  ha_rows rows;
+  uint def_flags= *flags;
+  uint def_bufsz= *bufsz;
+  /* Get cost/flags/mem_usage of default MRR implementation */
+  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
+                                                n_ranges, &def_bufsz, 
+                                                &def_flags, cost);
+  if (rows == HA_POS_ERROR)
+  {
+    /* Default implementation can't perform MRR scan => we can't either */
+    return rows;
+  }
+
+  /*
+    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
+    use the default MRR implementation (we need it for UPDATE/DELETE).
+    Otherwise, make a choice based on cost and @@optimizer_use_mrr.
+  */
+  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
+      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
+  {
+    DBUG_PRINT("info", ("Default MRR implementation choosen"));
+    *flags= def_flags;
+    *bufsz= def_bufsz;
+  }
+  else
+  {
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
+    DBUG_PRINT("info", ("DS-MRR implementation choosen"));
+  }
+  return rows;
+}
+
+
+/*
+  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
+
+  SYNOPSIS
+    choose_mrr_impl()
+      keyno       Index number
+      rows        E(full rows to be retrieved)
+      flags  IN   MRR flags provided by the MRR user
+             OUT  If DS-MRR is choosen, flags of DS-MRR implementation
+                  else the value is not modified
+      bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
+                  else the value is not modified
+      cost   IN   Cost of default MRR implementation
+             OUT  If DS-MRR is choosen, cost of DS-MRR scan 
+                  else the value is not modified
+
+  DESCRIPTION
+    Make the choice between using Default MRR implementation and DS-MRR.
+    This function contains common functionality factored out of dsmrr_info()
+    and dsmrr_info_const(). The function assumes that the default MRR
+    implementation's applicability requirements are satisfied.
+
+  RETURN
+    TRUE   Default MRR implementation should be used
+    FALSE  DS-MRR implementation should be used
+*/
+
+bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
+                                 uint *bufsz, COST_VECT *cost)
+{
+  COST_VECT dsmrr_cost;
+  bool res;
+  THD *thd= current_thd;
+  if ((thd->variables.optimizer_use_mrr == 2) || 
+      (*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||
+      (keyno == h->table_share->primary_key && 
+       h->primary_key_is_clustered()))
+  {
+    /* Use the default implementation */
+    return TRUE;
+  }
+
+  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
+    return TRUE;
+  
+  bool force_dsmrr;
+  /* 
+    If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
+    DS-MRR and Default implementations cost. This allows one to force use of
+    DS-MRR whenever it is applicable without affecting other cost-based
+    choices.
+  */
+  if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
+      dsmrr_cost.total_cost() > cost->total_cost())
+    dsmrr_cost= *cost;
+
+  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
+  {
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
+    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
+    *cost= dsmrr_cost;
+    res= FALSE;
+  }
+  else
+  {
+    /* Use the default MRR implementation */
+    res= TRUE;
+  }
+  return res;
+}
+
+
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, COST_VECT *cost);
+
+/*
+  Get cost of DS-MRR scan
+
+  SYNOPSIS
+    get_disk_sweep_mrr_cost()
+      keynr              Index to be used
+      rows               E(Number of rows to be scanned)
+      flags              Scan parameters (HA_MRR_* flags)
+      buffer_size INOUT  Buffer size
+      cost        OUT    The cost
+
+  RETURN
+    FALSE  OK
+    TRUE   Error, DS-MRR cannot be used (the buffer is too small for even 1
+           rowid)
+*/
+
+bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
+                                         uint *buffer_size, COST_VECT *cost)
+{
+  ulong max_buff_entries, elem_size;
+  ha_rows rows_in_full_step, rows_in_last_step;
+  uint n_full_steps;
+  double index_read_cost;
+
+  elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
+  max_buff_entries = *buffer_size / elem_size;
+
+  if (!max_buff_entries)
+    return TRUE; /* Buffer has not enough space for even 1 rowid */
+
+  /* Number of iterations we'll make with full buffer */
+  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
+  
+  /* 
+    Get numbers of rows we'll be processing in 
+     - non-last sweep, with full buffer 
+     - last iteration, with non-full buffer
+  */
+  rows_in_full_step= max_buff_entries;
+  rows_in_last_step= rows % max_buff_entries;
+  
+  /* Adjust buffer size if we expect to use only part of the buffer */
+  if (n_full_steps)
+  {
+    get_sort_and_sweep_cost(h->table, rows, cost);
+    cost->multiply(n_full_steps);
+  }
+  else
+  {
+    cost->zero();
+    *buffer_size= rows_in_last_step * elem_size;
+  }
+  
+  COST_VECT last_step_cost;
+  get_sort_and_sweep_cost(h->table, rows_in_last_step, &last_step_cost);
+  cost->add(&last_step_cost);
+ 
+  if (n_full_steps != 0)
+    cost->mem_cost= *buffer_size;
+  else
+    cost->mem_cost= rows_in_last_step * elem_size;
+  
+  /* Total cost of all index accesses */
+  index_read_cost= h->index_only_read_time(keynr, rows);
+  cost->add_io(index_read_cost, 1 /* Random seeks */);
+  return FALSE;
+}
+
+
+/* 
+  Get cost of one sort-and-sweep step
+
+  SYNOPSIS
+    get_sort_and_sweep_cost()
+      table       Table being accessed
+      nrows       Number of rows to be sorted and retrieved
+      cost   OUT  The cost
+
+  DESCRIPTION
+    Get cost of these operations:
+     - sort an array of #nrows ROWIDs using qsort
+     - read #nrows records from table in a sweep.
+*/
+
+static 
+void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
+{
+  if (nrows)
+  {
+    get_sweep_read_cost(table, nrows, FALSE, cost);
+    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
+    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
+    if (cmp_op < 3)
+      cmp_op= 3;
+    cost->cpu_cost += cmp_op * log2(cmp_op);
+  }
+  else
+    cost->zero();
+}
+
+
+/*
+  Get cost of reading nrows table records in a "disk sweep"
+
+  SYNOPSIS
+    get_sweep_read_cost()
+      table             Table to be accessed
+      nrows             Number of rows to retrieve
+      interrupted       TRUE <=> Assume that the disk sweep will be
+                        interrupted by other disk IO. FALSE - otherwise.
+      cost         OUT  The cost.
+
+  DESCRIPTION
+    Get cost of reading nrows table records in a "disk sweep" (This is done
+    by making a sequence of handler->rnd_pos() calls for ordered sequence of
+    rowids).
+*/
+
+void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, COST_VECT *cost)
+{
+  double result;
+  DBUG_ENTER("get_sweep_read_cost");
+
+  cost->zero();
+  if (table->file->primary_key_is_clustered())
+  {
+    cost->io_count= table->file->read_time(table->s->primary_key, nrows, 
+                                           nrows);
+  }
+  else
+  {
+    double n_blocks=
+      ceil(ulonglong2double(table->file->data_file_length) / IO_SIZE);
+    double busy_blocks=
+      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
+    if (busy_blocks < 1.0)
+      busy_blocks= 1.0;
+    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
+                       busy_blocks));
+    /*
+      Disabled: Bail out if # of blocks to read is bigger than # of blocks in
+      table data file.
+    if (max_cost != DBL_MAX  && (busy_blocks+index_reads_cost) >= n_blocks)
+      return 1;
+    */
+    cost->io_count= busy_blocks;
+    if (!interrupted)
+    {
+      /* Assume reading is done in one 'sweep' */
+      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
+                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
+    }
+  }
+  DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
+  DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ * DS-MRR implementation ends
+ ***************************************************************************/
+
+
 /*
   Read first row between two ranges.
   Store ranges for future calls to read_range_next
@@ -2889,7 +3619,8 @@
 
 int handler::read_range_first(const key_range *start_key,
 			      const key_range *end_key,
-			      bool eq_range_arg, bool sorted)
+			      bool eq_range_arg,
+                              bool sorted /* ignored */)
 {
   int result;
   DBUG_ENTER("handler::read_range_first");
@@ -2922,7 +3653,7 @@
 
 
 /*
-  Read next row between two ranges.
+  Read next row between two endpoints.
 
   SYNOPSIS
     read_range_next()

--- 1.198/sql/handler.h	2006-09-12 13:49:53 +03:00
+++ 1.199/sql/handler.h	2006-09-12 13:49:53 +03:00
@@ -700,6 +700,133 @@
 
 typedef struct system_status_var SSV;
 
+
+typedef void *range_seq_t;
+
+typedef struct st_range_seq_if
+{
+  /*
+    Initialize the traversal of range sequence
+    
+    SYNOPSIS
+      init()
+        init_params  The seq_init_param parameter 
+        n_ranges     The number of ranges obtained 
+        flags        HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
+
+    RETURN
+      An opaque value to be used as RANGE_SEQ_IF::next() parameter
+  */
+  range_seq_t (*init)(void *init_params, uint n_ranges, uint flags);
+
+
+  /*
+    Get the next range in the range sequence
+
+    SYNOPSIS
+      next()
+        seq    The value returned by RANGE_SEQ_IF::init()
+        range  OUT
+    
+    RETURN
+      0 - Ok, the range structure filled with info about the next range
+      1 - No more ranges
+  */
+  uint (*next) (range_seq_t seq, KEY_MULTI_RANGE *range);
+} RANGE_SEQ_IF;
+
+uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx);
+char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx);
+
+class COST_VECT
+{ 
+public:
+  double io_count;     /* number of I/O                 */
+  double avg_io_cost;  /* cost of an average I/O oper.  */
+  double cpu_cost;     /* cost of operations in CPU     */
+  double mem_cost;     /* cost of used memory           */ 
+  double import_cost;  /* cost of remote operations     */
+  
+  enum { IO_COEFF=1 };
+  enum { CPU_COEFF=1 };
+  enum { MEM_COEFF=1 };
+  enum { IMPORT_COEFF=1 };
+
+  double total_cost() 
+  {
+    return IO_COEFF*io_count*avg_io_cost + CPU_COEFF * cpu_cost +
+           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+  }
+
+  void zero()
+  {
+    avg_io_cost= 1.0;
+    io_count= cpu_cost= mem_cost= import_cost= 0.0;
+  }
+
+  void multiply(double m)
+  {
+    io_count *= m;
+    cpu_cost *= m;
+    import_cost *= m;
+    /* Don't multiply mem_cost */
+  }
+
+  void add(const COST_VECT* cost)
+  {
+    double io_count_sum= io_count + cost->io_count;
+    add_io(cost->io_count, cost->avg_io_cost);
+    io_count= io_count_sum;
+    cpu_cost += cost->cpu_cost;
+  }
+  void add_io(double add_io_cnt, double add_avg_cost)
+  {
+    double io_count_sum= io_count + add_io_cnt;
+    avg_io_cost= (io_count * avg_io_cost + 
+                  add_io_cnt * add_avg_cost) / io_count_sum;
+    io_count= io_count_sum;
+  }
+};
+
+
+/*
+  The below two are not used (and not handled) in this milestone of this WL
+  entry because there seems to be no use for them at this stage of
+  implementation. (TODO is that so??)
+*/
+#define HA_MRR_SINGLE_POINT 1
+#define HA_MRR_FIXED_KEY  2
+
+/* 
+  Indicates that RANGE_SEQ_IF::next(&range) doesn't need to fill in the
+  'range' parameter.
+*/
+#define HA_MRR_NO_ASSOCIATION 4
+
+/* 
+  The MRR user will provide ranges in key order, and MRR implementation
+  must return rows in key order.
+*/
+#define HA_MRR_SORTED 8
+
+/* MRR implementation doesn't have to retrieve full records */
+#define HA_MRR_INDEX_ONLY 16
+
+/* 
+  The passed memory buffer is of maximum possible size, the caller can't
+  assume larger buffer.
+*/
+#define HA_MRR_LIMITS 32
+
+
+/*
+  Default MRR implementation is used
+  (The choice is made by **_info[_const]() function which may set this
+   flag. SQL layer remembers the flag value and then passes it to
+   multi_read_range_init().
+*/
+#define HA_MRR_USE_DEFAULT_IMPL 64
+
 /*
   The handler class is the interface for dynamically loadable
   storage engines. Do not add ifdefs and take care when adding or
@@ -708,6 +835,7 @@
 class handler :public Sql_alloc
 {
   friend class ha_partition;
+  friend class DsMrr_impl;
 
  protected:
   struct st_table_share *table_share;   /* The table definition */
@@ -746,12 +874,23 @@
   time_t create_time;			/* When table was created */
   time_t check_time;
   time_t update_time;
-
-  /* The following are for read_multi_range */
-  bool multi_range_sorted;
-  KEY_MULTI_RANGE *multi_range_curr;
-  KEY_MULTI_RANGE *multi_range_end;
-  HANDLER_BUFFER *multi_range_buffer;
+  
+  /* Multi range read implementation-used members: */
+  range_seq_t mrr_iter;    /* MRR range sequence iterator being traversed */
+  RANGE_SEQ_IF mrr_funcs;  /* Saved MRR range sequence traversal functions */
+  HANDLER_BUFFER *multi_range_buffer; /* Saved MRR buffer info */
+  uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
+  /* TRUE <=> source MRR ranges and the output are ordered */
+  bool mrr_is_output_sorted;
+  
+  /* TRUE <=> we're currently traversing a range in mrr_cur_range. */
+  bool mrr_have_range;
+  /* Current range (the one we're now returning rows from) */
+  KEY_MULTI_RANGE mrr_cur_range;
+  
+  /* Default MRR implementation: */
+  bool mrr_restore_scan; /* TRUE <=> we're restoring the scan */
+  int mrr_restore_scan_res; /* iff mrr_restore_scan: return value of next call */
 
   /* The following are for read_range() */
   key_range save_end_range, *end_range;
@@ -778,7 +917,7 @@
     ref(0), data_file_length(0), max_data_file_length(0), index_file_length(0),
     delete_length(0), auto_increment_value(0),
     records(0), deleted(0), mean_rec_length(0),
-    create_time(0), check_time(0), update_time(0),
+    create_time(0), check_time(0), update_time(0), mrr_restore_scan(FALSE),
     key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
     ref_length(sizeof(my_off_t)), block_size(0),
     ft_handler(0), inited(NONE), implicit_emptied(0),
@@ -826,10 +965,26 @@
     table= table_arg;
     table_share= share;
   }
+  /* Estimates calculation */
   virtual double scan_time()
     { return ulonglong2double(data_file_length) / IO_SIZE + 2; }
   virtual double read_time(uint index, uint ranges, ha_rows rows)
  { return rows2double(ranges+rows); }
+
+  virtual double index_only_read_time(uint keynr, double records);
+  
+  virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                              void *seq_init_param, 
+                                              uint n_ranges, uint *bufsz,
+                                              uint *flags, COST_VECT *cost);
+  virtual int multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                    uint *bufsz, uint *flags, COST_VECT *cost);
+  virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                                    uint n_ranges, uint mode,
+                                    HANDLER_BUFFER *buf);
+  virtual int multi_range_read_next(char **range_info);
+
+
   virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
   virtual bool has_transactions(){ return 0;}
   virtual uint extra_rec_buf_length() const { return 0; }
@@ -1119,10 +1274,6 @@
   virtual int index_next_same(byte *buf, const byte *key, uint keylen);
   virtual int index_read_last(byte * buf, const byte * key, uint key_len)
    { return (my_errno=HA_ERR_WRONG_COMMAND); }
-  virtual int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                     KEY_MULTI_RANGE *ranges, uint range_count,
-                                     bool sorted, HANDLER_BUFFER *buffer);
-  virtual int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
   virtual int read_range_first(const key_range *start_key,
                                const key_range *end_key,
                                bool eq_range, bool sorted);
@@ -1454,7 +1605,46 @@
   }
 };
 
-	/* Some extern variables used with handlers */
+class handler;
+class DsMrr_impl
+{
+public:
+  DsMrr_impl(handler *handler_par) : h(handler_par) {};
+  byte *rowids_buf;       // rows buffer
+
+  byte *rowids_buf_cur;   // current position in rowids buffer
+  byte *rowids_buf_last;  // just-after-the-end position in rowids buffer 
+  byte *rowids_buf_end;
+  
+  KEY  *mrr_key;
+  byte *last_idx_tuple;
+  uint mrr_keyno;
+  bool dsmrr_eof;
+
+  bool is_mrr_assoc;  // TRUE <=> need mrr association (and the buffer holds 
+                      //  {rowid, range_id} pairs
+  /* Some extern variables used with handlers */
+  handler *h;
+  bool use_default_impl;
+
+  int dsmrr_init(handler *h, KEY *key, RANGE_SEQ_IF *seq_funcs, 
+                 void *seq_init_param, uint n_ranges, uint mode, 
+                 HANDLER_BUFFER *buf);
+  int dsmrr_fill_buffer(handler *h);
+  int dsmrr_next(handler *h, char **range_info);
+
+  int dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz,
+                 uint *flags, COST_VECT *cost);
+
+  ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, 
+                            void *seq_init_param, uint n_ranges, uint *bufsz,
+                            uint *flags, COST_VECT *cost);
+private:
+  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
+                       COST_VECT *cost);
+  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
+                               uint *buffer_size, COST_VECT *cost);
+};
 
 extern handlerton *sys_table_types[];
 extern const char *ha_row_type[];

--- 1.39/sql/key.cc	2006-09-12 13:49:53 +03:00
+++ 1.40/sql/key.cc	2006-09-12 13:49:53 +03:00
@@ -150,6 +150,30 @@
 
 
 /*
+  Zero the null components of key tuple
+  SYNOPSIS
+    key_zero_nulls()
+      tuple
+      key_info
+
+  DESCRIPTION
+  
+*/
+
+void key_zero_nulls(byte *tuple, KEY *key_info)
+{
+  KEY_PART_INFO *key_part= key_info->key_part;
+  KEY_PART_INFO *key_part_end= key_part + key_info->key_parts;
+  for (; key_part != key_part_end; key_part++)
+  {
+    if (key_part->null_bit && *tuple)
+      bzero(tuple+1, key_part->store_length-1);
+    tuple+= key_part->store_length;
+  }
+}
+
+
+/*
   Restore a key from some buffer to record.
 
   SYNOPSIS

--- 1.385/sql/mysql_priv.h	2006-09-12 13:49:53 +03:00
+++ 1.386/sql/mysql_priv.h	2006-09-12 13:49:53 +03:00
@@ -1185,6 +1185,7 @@
 /* key.cc */
 int find_ref_key(KEY *key, uint key_count, Field *field, uint *key_length);
 void key_copy(byte *to_key, byte *from_record, KEY *key_info, uint key_length);
+void key_zero_nulls(byte *tuple, KEY *key_info);
 void key_restore(byte *to_record, byte *from_key, KEY *key_info,
                  uint key_length);
 bool key_cmp_if_same(TABLE *form,const byte *key,uint index,uint key_length);

--- 1.542/sql/mysqld.cc	2006-09-12 13:49:53 +03:00
+++ 1.543/sql/mysqld.cc	2006-09-12 13:49:53 +03:00
@@ -6156,8 +6156,8 @@
    "When reading rows in sorted order after a sort, the rows are read through this buffer to avoid a disk seeks. If not set, then it's set to the value of record_buffer.",
    (gptr*) &global_system_variables.read_rnd_buff_size,
    (gptr*) &max_system_variables.read_rnd_buff_size, 0,
-   GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD,
-   ~0L, MALLOC_OVERHEAD, IO_SIZE, 0},
+   GET_ULONG, REQUIRED_ARG, 256*1024L, 64 /*IO_SIZE*2+MALLOC_OVERHEAD*/ ,
+   ~0L, MALLOC_OVERHEAD, 1 /* Small lower limit to be able to test MRR */, 0},
   {"record_buffer", OPT_RECORD_BUFFER,
    "Alias for read_buffer_size",
    (gptr*) &global_system_variables.read_buff_size,

--- 1.205/sql/opt_range.cc	2006-09-12 13:49:53 +03:00
+++ 1.206/sql/opt_range.cc	2006-09-12 13:49:53 +03:00
@@ -35,7 +35,7 @@
     
     The lists are returned in form of complicated structure of interlinked
     SEL_TREE/SEL_IMERGE/SEL_ARG objects.
-    See check_quick_keys, find_used_partitions for examples of how to walk 
+    See quick_range_seq_next, find_used_partitions for examples of how to walk 
     this structure.
     All direct "users" of this module are located within this file, too.
 
@@ -60,6 +60,49 @@
 
   Record retrieval code for range/index_merge/groupby-min-max.
     Implementations of QUICK_*_SELECT classes.
+
+  KeyTupleFormat
+  ~~~~~~~~~~~~~~
+  The code in this file (and elsewhere) makes operations on key value tuples.
+  Those tuples are stored in the following format:
+  
+  The tuple is a sequence of key part values. The length of key part value
+  depends only on its type (and not depends on the what value is stored)
+  //unless it is prefix! The prefix may be for ???
+  
+    KeyTuple: keypart1-data, keypart2-data, ...
+  
+  The value of each keypart is stored in the following format:
+  
+    keypart_data: [isnull_byte] keypart-value-bytes
+
+  If a keypart may have a NULL value (key_part->field->real_maybe_null() can
+  be used to check this), then the first byte is a NULL indicator with the 
+  following valid values:
+    1  - keypart has NULL value.
+    0  - keypart has non-NULL value.
+
+  <questionable-statement> If isnull_byte==1 (NULL value), then the following
+  keypart->length bytes must be 0.
+  </questionable-statement>
+
+  keypart-value-bytes holds the value. Its format depends on the field type.
+  The length of keypart-value-bytes may or may not depend on the value being
+  stored. The default is that length is static and equal to 
+  KEY_PART_INFO::length.
+  
+  Key parts with (key_part_flag & HA_BLOB_PART) have length depending of the 
+  value:
+  
+     keypart-value-bytes: value_length value_bytes
+
+  The value_length part itself occupies HA_KEY_BLOB_LENGTH=2 bytes.
+
+  See key_copy() and key_restore() for code to move data between index tuple 
+  in some buffer and table->record[0].
+
+  CAUTION: the above description is only psergey's understanding of the 
+           subject and may omit some details.
 */
 
 #ifdef USE_PRAGMA_IMPLEMENTATION
@@ -212,6 +255,7 @@
   }
   void store_min(uint length,char **min_key,uint min_key_flag)
   {
+    /* "(kp1 > c1) AND (kp2 OP c2) AND ..." -> (kp1 > c1) */
     if ((min_flag & GEOM_FLAG) ||
         (!(min_flag & NO_MIN_RANGE) &&
 	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
@@ -226,6 +270,16 @@
       (*min_key)+= length;
     }
   }
+
+  /*
+    SYNOPSIS
+      store()
+       length              Key pack length
+       min_key       INOUT store here and advance the pointer
+       min_key_flag        prev-key-part
+       max_key       INOUT store here and advance the pointer
+       max_key_flag        prev-key-part
+  */
   void store(uint length,char **min_key,uint min_key_flag,
 	     char **max_key, uint max_key_flag)
   {
@@ -244,12 +298,20 @@
     }
   }
 
+  /*
+     store_min_key()
+       key             
+       range_key       INOUT
+       range_key_flag  INOUT
+  */
   void store_min_key(KEY_PART *key,char **range_key, uint *range_key_flag)
   {
-    SEL_ARG *key_tree= first();
+    SEL_ARG *key_tree= first(); //find the left-most interval of this.
+    //   left < X < +inf ... and amend with next key part(s) ???
     key_tree->store(key[key_tree->part].store_length,
 		    range_key,*range_key_flag,range_key,NO_MAX_RANGE);
     *range_key_flag|= key_tree->min_flag;
+    
     if (key_tree->next_key_part &&
 	key_tree->next_key_part->part == key_tree->part+1 &&
 	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)) &&
@@ -470,10 +532,9 @@
 static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond);
 
 static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
-static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
-static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
-				char *min_key,uint min_key_flag,
-				char *max_key, uint max_key_flag);
+static ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
+                                  SEL_ARG *tree, bool *native_mrr,
+                                  COST_VECT *cost);
 
 QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
                                      SEL_ARG *key_tree,
@@ -497,8 +558,6 @@
 static int get_index_merge_params(PARAM *param, key_map& needed_reg,
                            SEL_IMERGE *imerge, double *read_time,
                            ha_rows* imerge_rows);
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr);
 
 #ifndef DBUG_OFF
 static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
@@ -818,7 +877,8 @@
 
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
                                        bool no_alloc, MEM_ROOT *parent_alloc)
-  :dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),range(0)
+  : dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),range(0),
+    mrr_use_native(TRUE)
 {
   sorted= 0;
   index= key_nr;
@@ -1622,9 +1682,10 @@
 public:
   SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
   uint     key_idx; /* key number in PARAM::key */
+  bool     mrr_use_native; /* Whether native or default MRR is used */
 
-  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
-   : key(key_arg), key_idx(idx_arg)
+  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg, bool use_native_arg)
+   : key(key_arg), key_idx(idx_arg), mrr_use_native(use_native_arg)
   {}
   virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
 
@@ -1637,6 +1698,7 @@
     {
       quick->records= records;
       quick->read_time= read_cost;
+      quick->mrr_use_native= mrr_use_native;
     }
     DBUG_RETURN(quick);
   }
@@ -1938,9 +2000,10 @@
     if (!head->used_keys.is_clear_all())
     {
       int key_for_use= find_shortest_key(head, &head->used_keys);
-      double key_read_time= (get_index_only_read_time(&param, records,
-                                                     key_for_use) +
-                             (double) records / TIME_FOR_COMPARE);
+      double key_read_time= 
+        param.table->file->index_only_read_time(key_for_use, 
+                                                rows2double(records)) +
+        (double) records / TIME_FOR_COMPARE;
       DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                            "read time %g", key_for_use, key_read_time));
       if (key_read_time < read_time)
@@ -2715,7 +2778,7 @@
     {
       /* 
         Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and
-        we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change
+        we got "const1 CMP fieldX CMP const2" interval
       */
       DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
                                                     ppar->range_param.
@@ -3518,42 +3581,6 @@
 }
 
 
-/*
-  Calculate cost of 'index only' scan for given index and number of records.
-
-  SYNOPSIS
-    get_index_only_read_time()
-      param    parameters structure
-      records  #of records to read
-      keynr    key to read
-
-  NOTES
-    It is assumed that we will read trough the whole key range and that all
-    key blocks are half full (normally things are much better). It is also
-    assumed that each time we read the next key from the index, the handler
-    performs a random seek, thus the cost is proportional to the number of
-    blocks read.
-
-  TODO:
-    Move this to handler->read_time() by adding a flag 'index-only-read' to
-    this call. The reason for doing this is that the current function doesn't
-    handle the case when the row is stored in the b-tree (like in innodb
-    clustered index)
-*/
-
-static double get_index_only_read_time(const PARAM* param, ha_rows records,
-                                       int keynr)
-{
-  double read_time;
-  uint keys_per_block= (param->table->file->block_size/2/
-			(param->table->key_info[keynr].key_length+
-			 param->table->file->ref_length) + 1);
-  read_time=((double) (records+keys_per_block-1)/
-             (double) keys_per_block);
-  return read_time;
-}
-
-
 typedef struct st_ror_scan_info
 {
   uint      idx;      /* # of used key in param->keys */
@@ -3629,9 +3656,9 @@
     if (bitmap_is_set(&param->needed_fields, key_part->fieldnr))
       bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
   }
+  double rows= rows2double(param->table->quick_rows[ror_scan->keynr]);
   ror_scan->index_read_cost=
-    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
-                             ror_scan->keynr);
+    param->table->file->index_only_read_time(ror_scan->keynr, rows);
   DBUG_RETURN(ror_scan);
 }
 
@@ -4406,6 +4433,7 @@
   int idx;
   SEL_ARG **key,**end, **key_to_read= NULL;
   ha_rows best_records;
+  bool    best_uses_native_mrr;
   TRP_RANGE* read_plan= NULL;
   bool pk_is_clustered= param->table->file->primary_key_is_clustered();
   DBUG_ENTER("get_key_scans_params");
@@ -4419,9 +4447,7 @@
                                       "tree scans"););
   tree->ror_scans_map.clear_all();
   tree->n_ror_scans= 0;
-  for (idx= 0,key=tree->keys, end=key+param->keys;
-       key != end ;
-       key++,idx++)
+  for (idx= 0,key=tree->keys, end=key+param->keys; key != end; key++,idx++)
   {
     ha_rows found_records;
     double found_read_time;
@@ -4435,49 +4461,24 @@
       bool read_index_only= index_read_must_be_used ? TRUE :
                             (bool) param->table->used_keys.is_set(keynr);
 
-      found_records= check_quick_select(param, idx, *key);
-      if (param->is_ror_scan)
+      COST_VECT cost;
+      bool native_mrr;
+      found_records= check_quick_select(param, idx, read_index_only, *key,
+                                        &native_mrr, &cost);
+      found_read_time= cost.total_cost();
+      if ((found_records != HA_POS_ERROR) && param->is_ror_scan)
       {
         tree->n_ror_scans++;
         tree->ror_scans_map.set_bit(idx);
       }
-      double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
-      if (found_records != HA_POS_ERROR && found_records > 2 &&
-          read_index_only &&
-          (param->table->file->index_flags(keynr, param->max_key_part,1) &
-           HA_KEYREAD_ONLY) &&
-          !(pk_is_clustered && keynr == param->table->s->primary_key))
-      {
-        /*
-          We can resolve this by only reading through this key. 
-          0.01 is added to avoid races between range and 'index' scan.
-        */
-        found_read_time= get_index_only_read_time(param,found_records,keynr) +
-                         cpu_cost + 0.01;
-      }
-      else
-      {
-        /*
-          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
-          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
-        */
-	found_read_time= param->table->file->read_time(keynr,
-                                                       param->range_count,
-                                                       found_records) +
-			 cpu_cost + 0.01;
-      }
-      DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
-                         param->table->key_info[keynr].name, found_read_time,
-                         read_time));
-
       if (read_time > found_read_time && found_records != HA_POS_ERROR
           /*|| read_time == DBL_MAX*/ )
       {
         read_time=    found_read_time;
         best_records= found_records;
         key_to_read=  key;
+        best_uses_native_mrr= native_mrr;
       }
-
     }
   }
 
@@ -4486,7 +4487,8 @@
   if (key_to_read)
   {
     idx= key_to_read - tree->keys;
-    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
+    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx,
+                                                    best_uses_native_mrr)))
     {
       read_plan->records= best_records;
       read_plan->is_ror= tree->ror_scans_map.is_set(idx);
@@ -6437,212 +6439,235 @@
 
 #endif
 
+/****************************************************************************
+  Implementation of MRR range sequence interface that traverses the SEL_ARG* 
+  tree 
+ ****************************************************************************/
 
-/*
-  Calculate estimate of number records that will be retrieved by a range
-  scan on given index using given SEL_ARG intervals tree.
-  SYNOPSIS
-    check_quick_select
-      param  Parameter from test_quick_select
-      idx    Number of index to use in PARAM::key SEL_TREE::key
-      tree   Transformed selection condition, tree->key[idx] holds intervals
-             tree to be used for scanning.
-  NOTES
-    param->is_ror_scan is set to reflect if the key scan is a ROR (see
-    is_key_scan_ror function for more info)
-    param->table->quick_*, param->range_count (and maybe others) are
-    updated with data of given key scan, see check_quick_keys for details.
+/* MRR range sequence, SEL_ARG* implementation: stack entry */
+typedef struct st_range_seq_entry 
+{
+  /* 
+    pointers in min and max keys. They point to right-after-end of key
+    images. The 0-th entry has these pointing to key tuple start.
+  */
+  char *min_key, *max_key;
+  
+  /* Flags (accumulated with prev. keypart) */
+  uint min_key_flag, max_key_flag;
 
-  RETURN
-    Estimate # of records to be retrieved.
-    HA_POS_ERROR if estimate calculation failed due to table handler problems.
+  SEL_ARG *key_tree;
+} RANGE_SEQ_ENTRY;
 
-*/
 
-static ha_rows
-check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
+/*
+  MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context
+*/
+typedef struct st_sel_arg_range_seq
 {
-  ha_rows records;
-  bool    cpk_scan;
-  uint key;
-  DBUG_ENTER("check_quick_select");
+  uint keyno;      /* index of used tree in SEL_TREE structure */
+  uint real_keyno; /* Number of the index in tables */
+  PARAM *param;
 
-  param->is_ror_scan= FALSE;
+  SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */
+  
+  RANGE_SEQ_ENTRY stack[MAX_REF_PARTS];
+  int i; /* Index of last used element in the above array */
+  
+  bool at_start; /* TRUE <=> The traversal has just started */
+} SEL_ARG_RANGE_SEQ;
 
-  if (!tree)
-    DBUG_RETURN(HA_POS_ERROR);			// Can't use it
-  param->max_key_part=0;
-  param->range_count=0;
-  key= param->real_keynr[idx];
 
-  if (tree->type == SEL_ARG::IMPOSSIBLE)
-    DBUG_RETURN(0L);				// Impossible select. return
-  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
-    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+/*
+  Range sequence interface, SEL_ARG* implementation: Initialize the traversal
+  SYNOPSIS
+    init()
+      init_params  SEL_ARG tree traversal context
+      n_ranges     [ignored] The number of ranges obtained 
+      flags        [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
 
-  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
-  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
-  {
-    /* Records are not ordered by rowid for other types of indexes. */
-    cpk_scan= FALSE;
-  }
-  else
-  {
-    /*
-      Clustered PK scan is a special case, check_quick_keys doesn't recognize
-      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
-    */
-    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
-               param->table->file->primary_key_is_clustered());
-    param->is_ror_scan= !cpk_scan;
-  }
+  RETURN
+    Same as what has been passed
+*/
 
-  records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
-  if (records != HA_POS_ERROR)
-  {
-    param->table->quick_keys.set_bit(key);
-    param->table->quick_rows[key]=records;
-    param->table->quick_key_parts[key]=param->max_key_part+1;
+range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param;
+  seq->at_start= TRUE;
 
-    if (cpk_scan)
-      param->is_ror_scan= TRUE;
-  }
-  if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
-    param->is_ror_scan= FALSE;
-  DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
-  DBUG_RETURN(records);
+  seq->stack[0].key_tree= NULL;
+  seq->stack[0].min_key= seq->param->min_key;
+  seq->stack[0].min_key_flag= 0;
+  seq->stack[0].max_key= seq->param->max_key;
+  seq->stack[0].max_key_flag= 0;
+  seq->i= 0;
+  return init_param;
 }
 
 
-/*
-  Recursively calculate estimate of # rows that will be retrieved by
-  key scan on key idx.
-  SYNOPSIS
-    check_quick_keys()
-      param         Parameter from test_quick select function.
-      idx           Number of key to use in PARAM::keys in list of used keys
-                    (param->real_keynr[idx] holds the key number in table)
-      key_tree      SEL_ARG tree being examined.
-      min_key       Buffer with partial min key value tuple
-      min_key_flag
-      max_key       Buffer with partial max key value tuple
-      max_key_flag
-
-  NOTES
-    The function does the recursive descent on the tree via SEL_ARG::left,
-    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
-    are calculated using records_in_range calls at the leaf nodes and then
-    summed.
+static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree)
+{
+  RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1];
+  RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i];
+  
+  cur->key_tree= key_tree;
+  cur->min_key= prev->min_key;
+  cur->max_key= prev->max_key;
+  
+  key_tree->store(arg->param->key[arg->keyno][key_tree->part].store_length,
+                  &cur->min_key, prev->min_key_flag,
+                  &cur->max_key, prev->max_key_flag);
+  cur->min_key_flag= prev->min_key_flag | key_tree->min_flag;
+  cur->max_key_flag= prev->max_key_flag | key_tree->max_flag;
 
-    param->min_key and param->max_key are used to hold prefixes of key value
-    tuples.
+  (arg->i)++;
+}
 
-    The side effects are:
 
-    param->max_key_part is updated to hold the maximum number of key parts used
-      in scan minus 1.
+/*
+  Range sequence interface, SEL_ARG* implementation: get the next interval
+  
+  SYNOPSIS
+    sel_arg_range_seq_next()
+      rseq        Value returned from sel_arg_range_seq_init
+      range  OUT  Store information about the range here
 
-    param->range_count is incremented if the function finds a range that
-      wasn't counted by the caller.
+  DESCRIPTION
+   This is "get_next" function for Range sequence interface implementation
+   for SEL_ARG* tree.
 
-    param->is_ror_scan is cleared if the function detects that the key scan is
-      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
-      function for description of which key scans are ROR scans)
+  RETURN
+    0  Ok
+    1  No more ranges in the sequence
 */
 
-static ha_rows
-check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
-		 char *min_key,uint min_key_flag, char *max_key,
-		 uint max_key_flag)
+uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
 {
-  ha_rows records=0, tmp;
-  uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
-  char *tmp_min_key, *tmp_max_key;
-
-  param->max_key_part=max(param->max_key_part,key_tree->part);
-  if (key_tree->left != &null_element)
+  SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq;
+  SEL_ARG *key_tree, *parent;
+  uint tmp_min_flag, tmp_max_flag;
+  if (seq->at_start)
   {
-    /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
-    */
-    param->is_ror_scan= FALSE;
-    records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
-			     max_key,max_key_flag);
-    if (records == HA_POS_ERROR)			// Impossible
-      return records;
+    key_tree= seq->start;
+    seq->at_start= FALSE;
+    goto walk_up_right;
   }
 
-  tmp_min_key= min_key;
-  tmp_max_key= max_key;
-  key_tree->store(param->key[idx][key_tree->part].store_length,
-		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
-  min_key_length= (uint) (tmp_min_key- param->min_key);
-  max_key_length= (uint) (tmp_max_key- param->max_key);
+  key_tree= seq->stack[seq->i].key_tree;
+  /* Ok, we're at some "full tuple" position in the tree */
+ 
+  /* Step down if we can */
+  if (key_tree->next && key_tree->next != &null_element)
+  {
+    //step down; (update the tuple, we'll step right and stay there)
+    seq->i--;
+    step_down_to(seq, key_tree->next);
+    key_tree= key_tree->next;
+    seq->param->is_ror_scan= FALSE;
+    goto walk_right_up;
+  }
 
-  if (param->is_ror_scan)
+  /* Ok, can't step down, walk left until we can step down */
+  while (1)
   {
-    /*
-      If the index doesn't cover entire key, mark the scan as non-ROR scan.
-      Actually we're cutting off some ROR scans here.
-    */
-    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
-                    key_part[key_tree->part].fieldnr - 1;
-    if (param->table->field[fieldnr]->key_length() !=
-        param->key[idx][key_tree->part].length)
-      param->is_ror_scan= FALSE;
+    if (seq->i == 1) // can't step left
+      return 1;
+
+    /* Step left */
+    seq->i--;
+    key_tree= seq->stack[seq->i].key_tree;
+
+    /* Step down if we can */
+    if (key_tree->next && key_tree->next != &null_element)
+    {
+      // Step down; update the tuple
+      seq->i--;
+      step_down_to(seq, key_tree->next);
+      key_tree= key_tree->next;
+      break;
+    }
   }
 
-  if (key_tree->next_key_part &&
-      key_tree->next_key_part->part == key_tree->part+1 &&
-      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
-  {						// const key as prefix
-    if (min_key_length == max_key_length &&
-	!memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
-	!key_tree->min_flag && !key_tree->max_flag)
-    {
-      tmp=check_quick_keys(param,idx,key_tree->next_key_part,
-			   tmp_min_key, min_key_flag | key_tree->min_flag,
-			   tmp_max_key, max_key_flag | key_tree->max_flag);
-      goto end;					// Ugly, but efficient
+  // Ok, we've stepped down from the path to previous tuple.
+  // Walk right-up while we can.
+walk_right_up:
+  // stepping right...
+  while (key_tree->next_key_part && key_tree->next_key_part != &null_element && 
+         key_tree->next_key_part->part == key_tree->part + 1)
+  {
+    // Here: check if current is not "t.field = const"
+    // if it is not, store next keypart as "addon" and break;
+    {
+      uint min_key_length= seq->stack[seq->i].min_key - seq->param->min_key;
+      uint max_key_length= seq->stack[seq->i].max_key - seq->param->max_key;
+      uint len= seq->stack[seq->i].min_key - seq->stack[seq->i-1].min_key;
+      if (!(min_key_length == max_key_length &&
+            !memcmp(seq->stack[seq->i-1].min_key, seq->stack[seq->i-1].max_key, len) &&
+            !key_tree->min_flag && !key_tree->max_flag))
+      {
+        seq->param->is_ror_scan= FALSE;
+        if (!key_tree->min_flag)
+          key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno], 
+                                                 &seq->stack[seq->i].min_key,
+                                                 &seq->stack[seq->i].min_key_flag);
+        if (!key_tree->max_flag)
+          key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno], 
+                                                 &seq->stack[seq->i].max_key,
+                                                 &seq->stack[seq->i].max_key_flag);
+        break;
+      }
     }
-    else
+  
+    // Ok, current is "t.field=const" and there is next_key_part.
+    // step right, and walk up there.
+    key_tree= key_tree->next_key_part;
+
+walk_up_right:
+    while (key_tree->prev && key_tree->prev != &null_element)
     {
-      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
-      param->is_ror_scan= FALSE;
+      /* Step "up" */
+      key_tree= key_tree->prev;
     }
-
-    tmp_min_flag=key_tree->min_flag;
-    tmp_max_flag=key_tree->max_flag;
-    if (!tmp_min_flag)
-      key_tree->next_key_part->store_min_key(param->key[idx], &tmp_min_key,
-					     &tmp_min_flag);
-    if (!tmp_max_flag)
-      key_tree->next_key_part->store_max_key(param->key[idx], &tmp_max_key,
-					     &tmp_max_flag);
-    min_key_length= (uint) (tmp_min_key- param->min_key);
-    max_key_length= (uint) (tmp_max_key- param->max_key);
+    step_down_to(seq, key_tree);
   }
-  else
+
+  /* Ok, we are at full tuple */
+  RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i];
+  uint min_key_length= cur->min_key - seq->param->min_key;
+  uint max_key_length= cur->max_key - seq->param->max_key;
+  
+  range->ptr= (char*)(int)(key_tree->part);
+  if (cur->min_key_flag & GEOM_FLAG)
   {
-    tmp_min_flag=min_key_flag | key_tree->min_flag;
-    tmp_max_flag=max_key_flag | key_tree->max_flag;
-  }
+    range->range_flag= cur->min_key_flag;
 
-  keynr=param->real_keynr[idx];
-  param->range_count++;
-  if (!tmp_min_flag && ! tmp_max_flag &&
-      (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
-      (param->table->key_info[keynr].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
-      HA_NOSAME &&
-      min_key_length == max_key_length &&
-      !memcmp(param->min_key,param->max_key,min_key_length))
-    tmp=1;					// Max one record
+    /* Here minimum contains also function code bits, and maximum is +inf */
+    range->start_key.key=    (byte*)seq->param->min_key;
+    range->start_key.length= min_key_length;
+    range->start_key.flag=  (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+  }
   else
   {
-    if (param->is_ror_scan)
+    range->range_flag= cur->min_key_flag | cur->max_key_flag;
+    
+    range->start_key.key=    (byte*)seq->param->min_key;
+    range->start_key.length= cur->min_key - seq->param->min_key;
+    range->start_key.flag=   (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : 
+                                                             HA_READ_KEY_EXACT);
+
+    range->end_key.key=    (byte*)seq->param->max_key;
+    range->end_key.length= cur->max_key - seq->param->max_key;
+    range->end_key.flag=   (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : 
+                                                           HA_READ_AFTER_KEY);
+    if (!cur->min_key_flag && !cur->max_key_flag &&
+        (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts &&
+        (seq->param->table->key_info[seq->real_keyno].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
+        HA_NOSAME &&
+        range->start_key.length == range->end_key.length &&
+        !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length))
+      range->range_flag= UNIQUE_RANGE;
+      
+    if (seq->param->is_ror_scan)
     {
       /*
         If we get here, the condition on the key was converted to form
@@ -6653,63 +6678,114 @@
           uncovered "tail" of KeyX parts is either empty or is identical to
           first members of clustered primary key.
       */
-      if (!(min_key_length == max_key_length &&
-            !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
-            !key_tree->min_flag && !key_tree->max_flag &&
-            is_key_scan_ror(param, keynr, key_tree->part + 1)))
-        param->is_ror_scan= FALSE;
-    }
-
-    if (tmp_min_flag & GEOM_FLAG)
-    {
-      key_range min_range;
-      min_range.key=    (byte*) param->min_key;
-      min_range.length= min_key_length;
-      /* In this case tmp_min_flag contains the handler-read-function */
-      min_range.flag=   (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);
-
-      tmp= param->table->file->records_in_range(keynr, &min_range,
-                                                (key_range*) 0);
-    }
-    else
-    {
-      key_range min_range, max_range;
-
-      min_range.key=    (byte*) param->min_key;
-      min_range.length= min_key_length;
-      min_range.flag=   (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
-                         HA_READ_KEY_EXACT);
-      max_range.key=    (byte*) param->max_key;
-      max_range.length= max_key_length;
-      max_range.flag=   (tmp_max_flag & NEAR_MAX ?
-                         HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
-      tmp=param->table->file->records_in_range(keynr,
-                                               (min_key_length ? &min_range :
-                                                (key_range*) 0),
-                                               (max_key_length ? &max_range :
-                                                (key_range*) 0));
+      if (!(!cur->min_key_flag && !cur->max_key_flag &&
+            (range->start_key.length == range->end_key.length) &&
+            !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) &&
+            is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1)))
+        seq->param->is_ror_scan= FALSE;
     }
   }
- end:
-  if (tmp == HA_POS_ERROR)			// Impossible range
-    return tmp;
-  records+=tmp;
-  if (key_tree->right != &null_element)
+  seq->param->range_count++;
+  seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+  return 0;
+}
+
+
+/*
+  Calculate estimate of number records that will be retrieved by a range
+  scan on given index using given SEL_ARG intervals tree.
+  SYNOPSIS
+    check_quick_select
+      param  Parameter from test_quick_select
+      idx    Number of index to use in PARAM::key SEL_TREE::key
+      tree   Transformed selection condition, tree->key[idx] holds intervals
+             tree to be used for scanning.
+  NOTES
+    param->is_ror_scan is set to reflect if the key scan is a ROR (see
+    is_key_scan_ror function for more info)
+    param->table->quick_*, param->range_count (and maybe others) are
+    updated with data of given key scan, see quick_range_seq_next for details.
+
+  RETURN
+    Estimate # of records to be retrieved.
+    HA_POS_ERROR if estimate calculation failed due to table handler problems.
+
+*/
+
+static
+ha_rows check_quick_select(PARAM *param, uint idx, bool index_only, SEL_ARG *tree, 
+                           bool *native_mrr, COST_VECT *cost)
+{
+  DBUG_ENTER("check_quick_select");
+  SEL_ARG_RANGE_SEQ seq;
+  bool cpk_scan;
+  
+  if (tree->type == SEL_ARG::IMPOSSIBLE)
+    DBUG_RETURN(0L);				// Impossible select. return
+  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
+    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
+
+  seq.keyno= idx;
+  seq.real_keyno= param->real_keynr[idx];
+  seq.param= param;
+  seq.start= tree;
+  
+  RANGE_SEQ_IF seq_if = {sel_arg_range_seq_init, sel_arg_range_seq_next};
+  uint bufsize= 0;
+  uint flags= 0;
+  ha_rows rows;
+
+  handler *file= param->table->file;
+  param->range_count=0;
+  param->max_key_part=0;
+  param->is_ror_scan= FALSE;
+  
+  enum ha_key_alg key_alg= param->table->key_info[seq.real_keyno].algorithm;
+  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
+  {
+    /* Records are not ordered by rowid for other types of indexes. */
+    cpk_scan= FALSE;
+  }
+  else
   {
     /*
-      There are at least two intervals for current key part, i.e. condition
-      was converted to something like
-        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
-      This is not a ROR scan if the key is not Clustered Primary Key.
+      Clustered PK scan is a special case, quick_range_seq_next doesn't 
+      recognize CPK scans as ROR scans (while actually any CPK scan is a ROR 
+      scan).
     */
-    param->is_ror_scan= FALSE;
-    tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
-			 max_key,max_key_flag);
-    if (tmp == HA_POS_ERROR)
-      return tmp;
-    records+=tmp;
+    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
+               param->table->file->primary_key_is_clustered());
+    param->is_ror_scan= !cpk_scan;
   }
-  return records;
+
+  bool pk_is_clustered= file->primary_key_is_clustered();
+  uint keynr= param->real_keynr[idx];
+  if (index_only && 
+      (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
+      !(pk_is_clustered && keynr == param->table->s->primary_key))
+     flags |= HA_MRR_INDEX_ONLY;
+  
+  if (current_thd->lex->sql_command != SQLCOM_SELECT)
+    flags |= HA_MRR_USE_DEFAULT_IMPL;
+  bufsize= param->thd->variables.read_rnd_buff_size;
+  rows= file->multi_range_read_info_const(param->real_keynr[idx], &seq_if, 
+                                          (void*)&seq,
+                                          0, // todo: n_ranges
+                                          &bufsize, &flags, cost);
+  if (rows != HA_POS_ERROR)
+  {
+    param->table->quick_keys.set_bit(seq.real_keyno);
+    param->table->quick_rows[seq.real_keyno]=rows;
+    param->table->quick_key_parts[seq.real_keyno]=param->max_key_part+1;
+    *native_mrr= test(flags & HA_MRR_USE_DEFAULT_IMPL);
+
+    if (cpk_scan)
+      param->is_ror_scan= TRUE;
+  }
+  if (param->table->file->index_flags(seq.real_keyno, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
+    param->is_ror_scan= FALSE;
+  DBUG_PRINT("exit", ("Records: %lu", (ulong) rows));
+  DBUG_RETURN(rows);
 }
 
 
@@ -6743,7 +6819,7 @@
     Scans on HASH indexes are not ROR scans,
     any range scan on clustered primary key is ROR scan  (3)
 
-    Check (1) is made in check_quick_keys()
+    Check (1) is made in quick_range_seq_next()
     Check (3) is made check_quick_select()
     Check (2) is made by this function.
 
@@ -6759,9 +6835,19 @@
   KEY_PART_INFO *key_part_end= (table_key->key_part +
                                 table_key->key_parts);
   uint pk_number;
+  
+  for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
+  {
+    uint16 fieldnr= param->table->key_info[keynr].
+                    key_part[kp - table_key->key_part].fieldnr - 1;
+    if (param->table->field[fieldnr]->key_length() != kp->length)
+      return FALSE;
+  }
 
   if (key_part == key_part_end)
     return TRUE;
+
+  key_part= table_key->key_part + nparts;
   pk_number= param->table->s->primary_key;
   if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
     return FALSE;
@@ -6971,7 +7057,20 @@
 }
 
 
-/* Returns TRUE if any part of the key is NULL */
+
+/*
+  Return TRUE if any part of the key is NULL
+
+  SYNOPSIS
+    null_part_in_key()    
+      key_part  Array of key parts (index description)
+      key       Key values tuple
+      length    Length of key values tuple in bytes.
+
+  RETURN
+    TRUE   The tuple has at least one "keypartX is NULL"
+    FALSE  Otherwise
+*/
 
 static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
 {
@@ -7426,6 +7525,7 @@
   DBUG_RETURN(error);
 }
 
+
 int QUICK_RANGE_SELECT::reset()
 {
   uint  mrange_bufsiz;
@@ -7443,7 +7543,7 @@
   if (multi_range_length)
   {
     DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
-    DBUG_RETURN(0);
+    goto end;
   }
 
   /* Allocate the ranges array. */
@@ -7500,11 +7600,140 @@
     bzero((char*) mrange_buff, mrange_bufsiz);
 #endif
   }
+end:
+  RANGE_SEQ_IF seq_funcs= {quick_range_seq_init, quick_range_seq_next};
+  uint flags= HA_MRR_NO_ASSOCIATION | (sorted ? HA_MRR_SORTED : 0);
+  
+  if (mrr_use_native)
+    flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+  file->multi_range_read_init(&seq_funcs, (void*)this, ranges.elements,
+                              flags, multi_range_buff);
   DBUG_RETURN(0);
 }
 
 
 /*
+  Range sequence interface implementation for array<QUICK_RANGE>: initialize
+  
+  SYNOPSIS
+    quick_range_seq_init()
+      init_param  Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer
+      n_ranges    Number of ranges in the sequence (ignored)
+      flags       MRR flags (currently not used) 
+
+  RETURN
+    Opaque value to be passed to quick_range_seq_next
+*/
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags)
+{
+  QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param;
+  quick->qr_traversal_ctx.first=  (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.cur=    (QUICK_RANGE**)quick->ranges.buffer;
+  quick->qr_traversal_ctx.last=   quick->qr_traversal_ctx.cur + 
+                                  quick->ranges.elements;
+  return &quick->qr_traversal_ctx;
+}
+
+
+/*
+  Range sequence interface implementation for array<QUICK_RANGE>: get next
+  
+  SYNOPSIS
+    quick_range_seq_next()
+      rseq        Value returned from quick_range_seq_init
+      range  OUT  Store information about the range here
+
+  RETURN
+    0  Ok
+    1  No more ranges in the sequence
+*/
+
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
+{
+  QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq;
+
+  if (ctx->cur == ctx->last)
+    return 1; /* no more ranges */
+
+  QUICK_RANGE *cur= *(ctx->cur);
+  key_range *start_key= &range->start_key;
+  key_range *end_key=   &range->end_key;
+
+  start_key->key=    (const byte*)cur->min_key;
+  start_key->length= cur->min_length;
+  start_key->flag=   ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
+                      (cur->flag & EQ_RANGE) ?
+                      HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
+  end_key->key=      (const byte*)cur->max_key;
+  end_key->length=   cur->max_length;
+  /*
+    We use HA_READ_AFTER_KEY here because if we are reading on a key
+    prefix. We want to find all keys with this prefix.
+  */
+  end_key->flag=     (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
+                      HA_READ_AFTER_KEY);
+  range->range_flag= cur->flag;
+  ctx->cur++;
+  return 0;
+}
+
+
+/*
+  MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
+
+  SYNOPSIS
+    mrr_persistent_flag_storage()
+      seq  Range sequence being traversed
+      idx  Number of range
+
+  DESCRIPTION
+    MRR/NDB implementation needs to store some bits for each range. This
+    function returns a reference to the "range_flag" associated with the
+    range number idx.
+
+    This function should be removed when we get a proper MRR/NDB 
+    implementation.
+
+  RETURN
+    Reference to range_flag associated with range number #idx
+*/
+
+uint16 &mrr_persistent_flag_storage(range_seq_t seq, uint idx)
+{
+  QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
+  return ctx->first[idx]->flag;
+}
+
+
+/*
+  MRR range sequence interface: array<QUICK_RANGE> impl: utility func for NDB
+
+  SYNOPSIS
+    mrr_get_ptr_by_idx()
+      seq  Range sequence bening traversed
+      idx  Number of range
+
+  DESCRIPTION
+    Current MRR/NDB code traverses the sequence of ranges several times and
+    it accesses range-associated data on the second pass. Multiple range
+    sequence interface traversals are not allowed in the new MRR interface,
+    so this function retu
+
+  RETURN
+    Reference to range-associated data.  
+*/
+
+char* &mrr_get_ptr_by_idx(range_seq_t seq, uint idx)
+{
+  static char *dummy;
+  QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)seq;
+  return dummy;
+}
+
+
+/*
   Get next possible record using quick-struct.
 
   SYNOPSIS
@@ -7521,71 +7750,8 @@
 
 int QUICK_RANGE_SELECT::get_next()
 {
-  int             result;
-  KEY_MULTI_RANGE *mrange;
-  key_range       *start_key;
-  key_range       *end_key;
-  DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
-  DBUG_ASSERT(multi_range_length && multi_range &&
-              (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
-              (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
-
-  for (;;)
-  {
-    if (in_range)
-    {
-      /* We did already start to read this key. */
-      result= file->read_multi_range_next(&mrange);
-      if (result != HA_ERR_END_OF_FILE)
-      {
-        in_range= ! result;
-	DBUG_RETURN(result);
-      }
-    }
-
-    uint count= min(multi_range_length, ranges.elements -
-                    (cur_range - (QUICK_RANGE**) ranges.buffer));
-    if (count == 0)
-    {
-      /* Ranges have already been used up before. None is left for read. */
-      in_range= FALSE;
-      DBUG_RETURN(HA_ERR_END_OF_FILE);
-    }
-    KEY_MULTI_RANGE *mrange_slot, *mrange_end;
-    for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
-         mrange_slot < mrange_end;
-         mrange_slot++)
-    {
-      start_key= &mrange_slot->start_key;
-      end_key= &mrange_slot->end_key;
-      range= *(cur_range++);
-
-      start_key->key=    (const byte*) range->min_key;
-      start_key->length= range->min_length;
-      start_key->flag=   ((range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
-                          (range->flag & EQ_RANGE) ?
-                          HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
-      end_key->key=      (const byte*) range->max_key;
-      end_key->length=   range->max_length;
-      /*
-        We use HA_READ_AFTER_KEY here because if we are reading on a key
-        prefix. We want to find all keys with this prefix.
-      */
-      end_key->flag=     (range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
-                          HA_READ_AFTER_KEY);
-
-      mrange_slot->range_flag= range->flag;
-    }
-
-    result= file->read_multi_range_first(&mrange, multi_range, count,
-                                         sorted, multi_range_buff);
-    if (result != HA_ERR_END_OF_FILE)
-    {
-      in_range= ! result;
-      DBUG_RETURN(result);
-    }
-    in_range= FALSE; /* No matching rows; go to next set of ranges. */
-  }
+  char *dummy;
+  return file->multi_range_read_next(&dummy);
 }
 
 
@@ -8615,8 +8781,12 @@
       cur_index_tree= get_index_range_tree(cur_index, tree, param,
                                            &cur_param_idx);
       /* Check if this range tree can be used for prefix retrieval. */
-      cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
-                                                    cur_index_tree);
+      bool dummy1;
+      COST_VECT dummy2;
+      cur_quick_prefix_records= check_quick_select(param, cur_param_idx, 
+                                                   FALSE /*don't care*/, 
+                                                   cur_index_tree,
+                                                   &dummy1, &dummy2);
     }
     cost_group_min_max(table, cur_index_info, used_key_parts,
                        cur_group_key_parts, tree, cur_index_tree,

--- 1.60/sql/opt_range.h	2006-09-12 13:49:53 +03:00
+++ 1.61/sql/opt_range.h	2006-09-12 13:49:53 +03:00
@@ -252,6 +252,22 @@
 class PARAM;
 class SEL_ARG;
 
+
+/*
+  MRR range sequence, array<QUICK_RANGE> implementation: sequence traversal
+  context.
+*/
+typedef struct st_quick_range_seq_ctx
+{
+  QUICK_RANGE **first;
+  QUICK_RANGE **cur;
+  QUICK_RANGE **last;
+} QUICK_RANGE_SEQ_CTX;
+
+range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags);
+uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+
+
 /*
   Quick select that does a range scan on a single key. The records are
   returned in key order.
@@ -277,7 +293,7 @@
                                        freed by QUICK_RANGE_SELECT) */
   HANDLER_BUFFER *multi_range_buff; /* the handler buffer (allocated and
                                        freed by QUICK_RANGE_SELECT) */
-
+  QUICK_RANGE_SEQ_CTX qr_traversal_ctx;
 protected:
   friend class TRP_ROR_INTERSECT;
   friend
@@ -295,6 +311,9 @@
   friend class QUICK_SELECT_DESC;
   friend class QUICK_INDEX_MERGE_SELECT;
   friend class QUICK_ROR_INTERSECT_SELECT;
+  friend uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
+  friend range_seq_t quick_range_seq_init(void *init_param,
+                                                  uint n_ranges, uint flags);
 
   DYNAMIC_ARRAY ranges;     /* ordered array of range ptrs */
   QUICK_RANGE **cur_range;  /* current element in ranges  */
@@ -302,11 +321,13 @@
   QUICK_RANGE *range;
   KEY_PART *key_parts;
   KEY_PART_INFO *key_part_info;
+  
   int cmp_next(QUICK_RANGE *range);
   int cmp_prev(QUICK_RANGE *range);
   bool row_in_ranges();
 public:
   MEM_ROOT alloc;
+  bool mrr_use_native;
 
   QUICK_RANGE_SELECT(THD *thd, TABLE *table,uint index_arg,bool no_alloc=0,
                      MEM_ROOT *parent_alloc=NULL);

--- 1.287/sql/sql_class.h	2006-09-12 13:49:53 +03:00
+++ 1.288/sql/sql_class.h	2006-09-12 13:49:53 +03:00
@@ -207,6 +207,13 @@
   ulong net_write_timeout;
   ulong optimizer_prune_level;
   ulong optimizer_search_depth;
+  /*
+    Controls use of Engine-MRR:
+      0 - auto, based on cost
+      1 - force MRR when the storage engine is capable of doing it
+      2 - disable MRR.
+  */
+  ulong optimizer_use_mrr; 
   ulong preload_buff_size;
   ulong query_cache_type;
   ulong read_buff_size;

--- 1.391/sql/sql_select.cc	2006-09-12 13:49:53 +03:00
+++ 1.392/sql/sql_select.cc	2006-09-12 13:49:53 +03:00
@@ -3456,12 +3456,10 @@
             if (table->used_keys.is_set(key))
             {
               /* we can use only index tree */
-              uint keys_per_block= table->file->block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp = record_count*(tmp+keys_per_block-1)/keys_per_block;
+              tmp= record_count * table->file->index_only_read_time(key, tmp);
             }
             else
-              tmp = record_count*min(tmp,s->worst_seeks);
+              tmp= record_count * min(tmp,s->worst_seeks);
           }
         }
         else
@@ -3546,12 +3544,10 @@
             if (table->used_keys.is_set(key))
             {
               /* we can use only index tree */
-              uint keys_per_block= table->file->block_size/2/
-                (keyinfo->key_length+table->file->ref_length)+1;
-              tmp = record_count*(tmp+keys_per_block-1)/keys_per_block;
+              tmp= record_count * table->file->index_only_read_time(key, tmp);
             }
             else
-              tmp = record_count*min(tmp,s->worst_seeks);
+              tmp = record_count * min(tmp,s->worst_seeks);
           }
           else
             tmp = best_time;                    // Do nothing
@@ -4468,12 +4464,10 @@
 	      if (table->used_keys.is_set(key))
 	      {
 		/* we can use only index tree */
-		uint keys_per_block= table->file->block_size/2/
-		  (keyinfo->key_length+table->file->ref_length)+1;
-		tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
+                tmp= record_count*table->file->index_only_read_time(key, tmp);
 	      }
 	      else
-		tmp=record_count*min(tmp,s->worst_seeks);
+		tmp= record_count*min(tmp,s->worst_seeks);
 	    }
 	  }
 	  else
@@ -4554,12 +4548,11 @@
 	      if (table->used_keys.is_set(key))
 	      {
 		/* we can use only index tree */
-		uint keys_per_block= table->file->block_size/2/
-		  (keyinfo->key_length+table->file->ref_length)+1;
-		tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
+                tmp= record_count * table->file->index_only_read_time(key, 
+                                                                      tmp);
 	      }
 	      else
-		tmp=record_count*min(tmp,s->worst_seeks);
+		tmp=record_count * min(tmp,s->worst_seeks);
 	    }
 	    else
 	      tmp=best_time;			// Do nothing
@@ -11094,6 +11087,33 @@
 }
 
 
+/*
+  Extract a condition that can be checked after reading given table
+  
+  SYNOPSIS
+    make_cond_for_table()
+      cond         Condition to analyze
+      tables       Tables for which "current field values" are available
+      used_table   Table that we're extracting the condition for
+
+  DESCRIPTION
+    Extract the condition that can be checked after reading the table
+    specified in 'used_table', given that current-field values for tables
+    specified in 'tables' bitmap are available.
+
+    The function assumes that
+      - Constant parts of the condition has already been checked.
+      - Condition that could be checked for tables in 'tables' has already 
+        been checked.
+        
+    The function takes into account that some parts of the condition are
+    guaranteed to be true by employed 'ref' access methods (the code that
+    does this is located at the end, search down for "EQ_FUNC").
+
+  RETURN
+    Extracted condition
+*/
+
 static COND *
 make_cond_for_table(COND *cond, table_map tables, table_map used_table)
 {
@@ -11189,6 +11209,7 @@
   return cond;
 }
 
+
 static Item *
 part_of_refkey(TABLE *table,Field *field)
 {
@@ -14134,6 +14155,7 @@
       item_list.push_back(new Item_int((longlong) (ulonglong)
 				       join->best_positions[i]. records_read,
 				       21));
+
       /* Build "Extra" field and add it to item_list. */
       my_bool key_read=table->key_read;
       if ((tab->type == JT_NEXT || tab->type == JT_CONST) &&
@@ -14191,6 +14213,13 @@
         }
 	if (table->reginfo.not_exists_optimize)
 	  extra.append(STRING_WITH_LEN("; Not exists"));
+          
+        if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
+            !((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_use_native)
+        {
+	  extra.append(STRING_WITH_LEN("; Using MRR"));
+        }
+
 	if (need_tmp_table)
 	{
 	  need_tmp_table=0;

--- 1.58/sql/structs.h	2006-09-12 13:49:53 +03:00
+++ 1.59/sql/structs.h	2006-09-12 13:49:53 +03:00
@@ -70,7 +70,9 @@
   Field *field;
   uint	offset;				/* offset in record (from 0) */
   uint	null_offset;			/* Offset to null_bit in record */
-  uint16 length;			/* Length of key_part */
+   /* Length of key part in bytes, excluding NULL flag and length bytes */
+  uint16 length;
+  /* Max. length of the key part, including the NULL flag and length bytes. */
   uint16 store_length;
   uint16 key_type;
   uint16 fieldnr;			/* Fieldnum in UNIREG */

--- 1.134/sql/table.h	2006-09-12 13:49:54 +03:00
+++ 1.135/sql/table.h	2006-09-12 13:49:54 +03:00
@@ -233,7 +233,9 @@
   byte *write_row_record;		/* Used as optimisation in
 					   THD::write_row */
   byte *insert_values;                  /* used by INSERT ... UPDATE */
-  key_map quick_keys, used_keys, keys_in_use_for_query;
+  key_map quick_keys;
+  key_map used_keys;     /* Indexes that cover all fields used by the query */
+  key_map keys_in_use_for_query;
   KEY  *key_info;			/* data of keys in database */
 
   Field *next_number_field;		/* Set if next_number is activated */

--- 1.283/sql/ha_ndbcluster.cc	2006-09-12 13:49:54 +03:00
+++ 1.284/sql/ha_ndbcluster.cc	2006-09-12 13:49:54 +03:00
@@ -7017,45 +7017,107 @@
   DBUG_RETURN(error);
 }
 
-int
-ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                                      KEY_MULTI_RANGE *ranges, 
-                                      uint range_count,
-                                      bool sorted, 
-                                      HANDLER_BUFFER *buffer)
+/****************************************************************************
+ * MRR interface implementation
+ ***************************************************************************/
+
+ha_rows 
+ha_ndbcluster::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                           void *seq_init_param, 
+                                           uint n_ranges, uint *bufsz,
+                                           uint *flags, COST_VECT *cost)
 {
-  DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
-  m_write_op= FALSE;
+  ha_rows res;
+  res= handler::multi_range_read_info_const(keyno, seq, seq_init_param, 
+                                            n_ranges, bufsz, flags, cost);
   
-  int res;
-  KEY* key_info= table->key_info + active_index;
-  NDB_INDEX_TYPE index_type= get_index_type(active_index);
-  ulong reclength= table_share->reclength;
-  NdbOperation* op;
+  if (uses_blob_value())
+  {
+    /* NOTE: the above criteria doesn't match... */
+    *flags |= HA_MRR_USE_DEFAULT_IMPL;
+  }
+  else
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
+  return res;
+}
 
+int 
+ha_ndbcluster::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                                     uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  int res;
+  res= handler::multi_range_read_info(keyno, n_ranges, keys, bufsz, flags,
+                                      cost);
   if (uses_blob_value())
   {
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
+  }
+  else
+    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
+  return res;
+}
+
+#if 0
+#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("multi_range_read_next: case %d\n", x));
+#else
+#define DBUG_MULTI_RANGE(x)
+#endif
+
+//new
+int ha_ndbcluster::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, 
+                                         void *seq_init_param,
+                                         uint n_ranges, uint mode,
+                                      HANDLER_BUFFER *buffer)
+{
+  int res;
+  DBUG_ENTER("ha_ndbcluster::multi_range_read_init");
+  m_write_op= FALSE;
+
+  if (uses_blob_value() || (seq_funcs->init != quick_range_seq_init))
+  {
     /**
      * blobs can't be batched currently
      */
     m_disable_multi_read= TRUE;
-    DBUG_RETURN(handler::read_multi_range_first(found_range_p, 
-                                                ranges, 
-                                                range_count,
-                                                sorted, 
-                                                buffer));
+    DBUG_RETURN(handler::multi_range_read_init(seq_funcs, seq_init_param,
+                                               n_ranges, mode, buffer));
   }
-
   m_disable_multi_read= FALSE;
 
-  /**
-   * Copy arguments into member variables
-   */
-  m_multi_ranges= ranges;
-  multi_range_curr= ranges;
-  multi_range_end= ranges+range_count;
-  multi_range_sorted= sorted;
+  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
   multi_range_buffer= buffer;
+  mrr_funcs= *seq_funcs;
+  mrr_iter= mrr_funcs.init(seq_init_param, n_ranges, mode);
+  ranges_in_seq= n_ranges;
+
+  res= multi_range_start_retrievals(-1);
+  if (first_unstarted_range == n_ranges)
+  {
+    /**
+     * Mark that we're using entire buffer (even if might not) as
+     *   we haven't read all ranges for some reason
+     * This as we don't want mysqld to reuse the buffer when we read
+     *   the remaining ranges
+     */
+    buffer->end_of_used_area= (byte*)multi_range_buffer->buffer_end;
+  }
+  else
+  {
+    /* Using all buffer */
+  }
+  
+  DBUG_RETURN(res);
+}
+
+
+int ha_ndbcluster::multi_range_start_retrievals(int starting_range)
+{
+  int res, range_res;
+  KEY* key_info= table->key_info + active_index;
+  NDB_INDEX_TYPE index_type= get_index_type(active_index);
+  ulong reclength= table_share->reclength;
+  NdbOperation* op;
+  DBUG_ENTER("multi_range_start_retrievals");
 
   /**
    * read multi range will read ranges as follows (if not ordered)
@@ -7073,8 +7135,8 @@
   /**
    * Variables for loop
    */
-  byte *curr= (byte*)buffer->buffer;
-  byte *end_of_buffer= (byte*)buffer->buffer_end;
+  byte *curr= (byte*)multi_range_buffer->buffer;
+  byte *end_of_buffer= (byte*)multi_range_buffer->buffer_end;
   NdbOperation::LockMode lm= 
     (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
   const NDBTAB *tab= (const NDBTAB *) m_table;
@@ -7082,21 +7144,20 @@
   const NDBINDEX *idx= (NDBINDEX *) m_index[active_index].index; 
   const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation();
   NdbIndexScanOperation* scanOp= 0;
-  for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; 
-       multi_range_curr++)
+
+  int range_no= -1;
+  int mrr_range_no= starting_range;
+
+  while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)) &&
+         curr+reclength <= end_of_buffer)
   {
+    range_no++;
+    mrr_range_no++;
     part_id_range part_spec;
     if (m_use_partition_function)
     {
-      get_partition_set(table, curr, active_index,
-                        &multi_range_curr->start_key,
+      get_partition_set(table, curr, active_index, &mrr_cur_range.start_key,
                         &part_spec);
-      DBUG_PRINT("info", ("part_spec.start_part = %u, part_spec.end_part = %u",
-                          part_spec.start_part, part_spec.end_part));
-      /*
-        If partition pruning has found no partition in set
-        we can skip this scan
-      */
       if (part_spec.start_part > part_spec.end_part)
       {
         /*
@@ -7104,22 +7165,23 @@
           partition
         */
         curr += reclength;
-        multi_range_curr->range_flag |= SKIP_RANGE;
+        mrr_persistent_flag_storage(mrr_iter, mrr_range_no) |= SKIP_RANGE;
         continue;
       }
     }
+    
     switch(index_type){
     case PRIMARY_KEY_ORDERED_INDEX:
-      if (!(multi_range_curr->start_key.length == key_info->key_length &&
-          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
+      if (!(mrr_cur_range.start_key.length == key_info->key_length &&
+            mrr_cur_range.start_key.flag == HA_READ_KEY_EXACT))
         goto range;
       // else fall through
     case PRIMARY_KEY_INDEX:
     {
-      multi_range_curr->range_flag |= UNIQUE_RANGE;
+      mrr_persistent_flag_storage(mrr_iter, mrr_range_no) |= UNIQUE_RANGE;
       if ((op= m_active_trans->getNdbOperation(tab)) && 
           !op->readTuple(lm) && 
-          !set_primary_key(op, multi_range_curr->start_key.key) &&
+          !set_primary_key(op, mrr_cur_range.start_key.key) &&
           !define_read_attrs(curr, op) &&
           (op->setAbortOption(AO_IgnoreError), TRUE) &&
           (!m_use_partition_function ||
@@ -7129,20 +7191,23 @@
         ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
       break;
     }
-    break;
     case UNIQUE_ORDERED_INDEX:
-      if (!(multi_range_curr->start_key.length == key_info->key_length &&
-          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
-          !check_null_in_key(key_info, multi_range_curr->start_key.key,
-                             multi_range_curr->start_key.length)))
+    {
+      if (!(mrr_cur_range.start_key.length == key_info->key_length &&
+            mrr_cur_range.start_key.flag == HA_READ_KEY_EXACT &&
+            !check_null_in_key(key_info, mrr_cur_range.start_key.key,
+                               mrr_cur_range.start_key.length)))
+      {
         goto range;
+      }
       // else fall through
+    }
     case UNIQUE_INDEX:
     {
-      multi_range_curr->range_flag |= UNIQUE_RANGE;
+      mrr_persistent_flag_storage(mrr_iter, mrr_range_no) |= UNIQUE_RANGE;
       if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && 
           !op->readTuple(lm) && 
-          !set_index_key(op, key_info, multi_range_curr->start_key.key) &&
+          !set_index_key(op, key_info, mrr_cur_range.start_key.key) &&
           !define_read_attrs(curr, op) &&
           (op->setAbortOption(AO_IgnoreError), TRUE))
         curr += reclength;
@@ -7150,15 +7215,16 @@
         ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
       break;
     }
-    case ORDERED_INDEX: {
+    case ORDERED_INDEX:
+    {
   range:
-      multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
+      mrr_persistent_flag_storage(mrr_iter, mrr_range_no) &= ~(uint16)UNIQUE_RANGE;
       if (scanOp == 0)
       {
         if (m_multi_cursor)
         {
           scanOp= m_multi_cursor;
-          DBUG_ASSERT(scanOp->getSorted() == sorted);
+          DBUG_ASSERT(scanOp->getSorted() == mrr_is_output_sorted);
           DBUG_ASSERT(scanOp->getLockMode() == 
                       (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
           if (scanOp->reset_bounds(m_force_send))
@@ -7167,7 +7233,8 @@
           end_of_buffer -= reclength;
         }
         else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab)) 
-                 &&!scanOp->readTuples(lm, 0, parallelism, sorted, FALSE, TRUE)
+                 &&!scanOp->readTuples(lm, 0, parallelism,  
+                                       mrr_is_output_sorted, FALSE, TRUE)
                  &&!generate_scan_filter(m_cond_stack, scanOp)
                  &&!define_read_attrs(end_of_buffer-reclength, scanOp))
         {
@@ -7181,10 +7248,9 @@
         }
       }
 
-      const key_range *keys[2]= { &multi_range_curr->start_key, 
-                                  &multi_range_curr->end_key };
-      if ((res= set_bounds(scanOp, active_index, false, keys,
-                           multi_range_curr-ranges)))
+      const key_range *keys[2]= { &mrr_cur_range.start_key, 
+                                  &mrr_cur_range.end_key };
+      if ((res= set_bounds(scanOp, active_index, false, keys, range_no)))
         DBUG_RETURN(res);
       break;
     }
@@ -7195,21 +7261,6 @@
     }
   }
   
-  if (multi_range_curr != multi_range_end)
-  {
-    /**
-     * Mark that we're using entire buffer (even if might not) as
-     *   we haven't read all ranges for some reason
-     * This as we don't want mysqld to reuse the buffer when we read
-     *   the remaining ranges
-     */
-    buffer->end_of_used_area= (byte*)buffer->buffer_end;
-  }
-  else
-  {
-    buffer->end_of_used_area= curr;
-  }
-  
   /**
    * Set first operation in multi range
    */
@@ -7217,40 +7268,36 @@
     lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation();
   if (!(res= execute_no_commit_ie(this, m_active_trans)))
   {
-    m_multi_range_defined= multi_range_curr;
-    multi_range_curr= ranges;
-    m_multi_range_result_ptr= (byte*)buffer->buffer;
-    DBUG_RETURN(read_multi_range_next(found_range_p));
+    m_multi_range_result_ptr= (byte*)multi_range_buffer->buffer;
+    first_running_range= first_range_in_batch= starting_range + 1;
+    first_unstarted_range= mrr_range_no + 1;
+    DBUG_RETURN(0);
   }
   ERR_RETURN(m_active_trans->getNdbError());
 }
 
-#if 0
-#define DBUG_MULTI_RANGE(x) DBUG_PRINT("info", ("read_multi_range_next: case %d\n", x));
-#else
-#define DBUG_MULTI_RANGE(x)
-#endif
-
-int
-ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
+//new 
+int ha_ndbcluster::multi_range_read_next(char **range_info)
 {
-  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
+  DBUG_ENTER("ha_ndbcluster::multi_range_read_next");
   if (m_disable_multi_read)
   {
     DBUG_MULTI_RANGE(11);
-    DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
+    DBUG_RETURN(handler::multi_range_read_next(range_info));
   }
   
   int res;
   int range_no;
   ulong reclength= table_share->reclength;
   const NdbOperation* op= m_current_multi_operation;
-  for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
+  
+  //for each range (we should have remembered the number)
+  for (;first_running_range < first_unstarted_range; first_running_range++)
   {
     DBUG_MULTI_RANGE(12);
-    if (multi_range_curr->range_flag & SKIP_RANGE)
+    if (mrr_persistent_flag_storage(mrr_iter, first_running_range) & SKIP_RANGE)
       continue;
-    if (multi_range_curr->range_flag & UNIQUE_RANGE)
+    if (mrr_persistent_flag_storage(mrr_iter, first_running_range) & UNIQUE_RANGE)
     {
       if (op->getNdbError().code == 0)
       {
@@ -7262,7 +7309,7 @@
       m_multi_range_result_ptr += reclength;
       continue;
     } 
-    else if (m_multi_cursor && !multi_range_sorted)
+    else if (m_multi_cursor && !mrr_is_output_sorted)
     {
       DBUG_MULTI_RANGE(1);
       if ((res= fetch_next(m_multi_cursor)) == 0)
@@ -7277,7 +7324,7 @@
         goto close_scan;
       }
     }
-    else if (m_multi_cursor && multi_range_sorted)
+    else if (m_multi_cursor && mrr_is_output_sorted)
     {
       if (m_active_cursor && (res= fetch_next(m_multi_cursor)))
       {
@@ -7286,7 +7333,7 @@
       }
       
       range_no= m_multi_cursor->get_range_no();
-      uint current_range_no= multi_range_curr - m_multi_ranges;
+      uint current_range_no= first_running_range - first_range_in_batch;
       if ((uint) range_no == current_range_no)
       {
         DBUG_MULTI_RANGE(4);
@@ -7310,7 +7357,7 @@
           DBUG_MULTI_RANGE(15);
           goto close_scan;
         }
-        multi_range_curr--; // Will be increased in for-loop
+        first_running_range--; // Will be increased in for-loop
         continue;
       }
     }
@@ -7318,7 +7365,7 @@
     {
       DBUG_MULTI_RANGE(7);
       /**
-       * Corresponds to range 5 in example in read_multi_range_first
+       * Corresponds to range 5 in example in multi_range_start_retrievals
        */
       (void)1;
       continue;
@@ -7340,27 +7387,23 @@
     }
   }
   
-  if (multi_range_curr == multi_range_end)
+  if (first_running_range == ranges_in_seq)
   {
     DBUG_MULTI_RANGE(16);
     DBUG_RETURN(HA_ERR_END_OF_FILE);
   }
   
-  /**
-   * Read remaining ranges
-   */
-  DBUG_RETURN(read_multi_range_first(multi_range_found_p, 
-                                     multi_range_curr,
-                                     multi_range_end - multi_range_curr, 
-                                     multi_range_sorted,
-                                     multi_range_buffer));
+  /* Read remaining ranges */
+  if ((res= multi_range_start_retrievals(first_running_range-1)))
+    DBUG_RETURN(res);
+  DBUG_RETURN(multi_range_read_next(range_info));
   
 found:
   /**
    * Found a record belonging to a scan
    */
   m_active_cursor= m_multi_cursor;
-  * multi_range_found_p= m_multi_ranges + range_no;
+  *range_info= mrr_get_ptr_by_idx(mrr_iter, first_running_range);
   memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength);
   setup_recattr(m_active_cursor->getFirstRecAttr());
   unpack_record(table->record[0]);
@@ -7372,17 +7415,21 @@
    * Found a record belonging to a pk/index op,
    *   copy result and move to next to prepare for next call
    */
-  * multi_range_found_p= multi_range_curr;
+  *range_info= mrr_get_ptr_by_idx(mrr_iter, first_running_range++);
   memcpy(table->record[0], m_multi_range_result_ptr, reclength);
   setup_recattr(op->getFirstRecAttr());
   unpack_record(table->record[0]);
   table->status= 0;
   
-  multi_range_curr++;
   m_current_multi_operation= m_active_trans->getNextCompletedOperation(op);
   m_multi_range_result_ptr += reclength;
   DBUG_RETURN(0);
 }
+
+
+/*****************************************************************************
+ * MRR implementation ends
+ ****************************************************************************/
 
 int
 ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)

--- 1.123/sql/ha_ndbcluster.h	2006-09-12 13:49:54 +03:00
+++ 1.124/sql/ha_ndbcluster.h	2006-09-12 13:49:54 +03:00
@@ -579,12 +579,23 @@
   int alter_tablespace(st_alter_tablespace *info);
 
   /**
-   * Multi range stuff
+   * Multi Range Read interface
    */
-  int read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
-                             KEY_MULTI_RANGE*ranges, uint range_count,
-                             bool sorted, HANDLER_BUFFER *buffer);
-  int read_multi_range_next(KEY_MULTI_RANGE **found_range_p);
+  int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                            uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+  int multi_range_read_next(char **range_info);
+  ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                      void *seq_init_param, 
+                                      uint n_ranges, uint *bufsz,
+                                      uint *flags, COST_VECT *cost);
+  int multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                            uint *bufsz, uint *flags, COST_VECT *cost);
+private:
+  uint first_running_range;
+  uint first_range_in_batch;
+  uint first_unstarted_range;
+  int multi_range_start_retrievals(int first_range);
+public:
 
   bool get_error_message(int error, String *buf);
   void info(uint);
@@ -852,8 +863,6 @@
   Ndb_cond_stack *m_cond_stack;
   bool m_disable_multi_read;
   byte *m_multi_range_result_ptr;
-  KEY_MULTI_RANGE *m_multi_ranges;
-  KEY_MULTI_RANGE *m_multi_range_defined;
   const NdbOperation *m_current_multi_operation;
   NdbIndexScanOperation *m_multi_cursor;
   byte *m_multi_range_cursor_result_ptr;

--- 1.261/sql/ha_innodb.cc	2006-09-12 13:49:54 +03:00
+++ 1.262/sql/ha_innodb.cc	2006-09-12 13:49:54 +03:00
@@ -840,10 +840,13 @@
                   HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS |
                   HA_PRIMARY_KEY_IN_READ_INDEX |
                   HA_CAN_GEOMETRY |
-                  HA_TABLE_SCAN_ON_INDEX),
+                  HA_TABLE_SCAN_ON_INDEX |
+                  HA_NEED_READ_RANGE_BUFFER
+                  ),
   last_dup_key((uint) -1),
   start_of_scan(0),
-  num_write_row(0)
+  num_write_row(0),
+  ds_mrr(this)
 {}
 
 /*************************************************************************
@@ -7569,5 +7572,42 @@
     return COMPATIBLE_DATA_NO;
 
   return COMPATIBLE_DATA_YES;
+}
+
+
+/****************************************************************************
+ * DS-MRR implementation 
+ ***************************************************************************/
+
+/**
+ * Multi Range Read interface, DS-MRR calls
+ */
+
+int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                          uint n_ranges, uint mode, HANDLER_BUFFER *buf)
+{
+  return ds_mrr.dsmrr_init(this, &table->key_info[active_index], 
+                           seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_innobase::multi_range_read_next(char **range_info)
+{
+  return ds_mrr.dsmrr_next(this, range_info);
+}
+
+ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                                 void *seq_init_param,  
+                                                 uint n_ranges, uint *bufsz,
+                                                 uint *flags, 
+                                                 COST_VECT *cost)
+{
+  return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
+                                 flags, cost);
+}
+
+int ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                          uint *bufsz, uint *flags, COST_VECT *cost)
+{
+  return ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost);
 }
 

--- 1.117/sql/ha_innodb.h	2006-09-12 13:49:54 +03:00
+++ 1.118/sql/ha_innodb.h	2006-09-12 13:49:54 +03:00
@@ -210,6 +210,20 @@
         int cmp_ref(const byte *ref1, const byte *ref2);
 	bool check_if_incompatible_data(HA_CREATE_INFO *info,
 					uint table_changes);
+public:
+  /**
+   * Multi Range Read interface
+   */
+  int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+                            uint n_ranges, uint mode, HANDLER_BUFFER *buf);
+  int multi_range_read_next(char **range_info);
+  ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+                                      void *seq_init_param, 
+                                      uint n_ranges, uint *bufsz,
+                                      uint *flags, COST_VECT *cost);
+  int multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+                            uint *bufsz, uint *flags, COST_VECT *cost);
+  DsMrr_impl ds_mrr;
 };
 
 extern SHOW_VAR innodb_status_variables[];

--- 1.178/sql/set_var.cc	2006-09-12 13:49:54 +03:00
+++ 1.179/sql/set_var.cc	2006-09-12 13:49:54 +03:00
@@ -337,7 +337,6 @@
                                                 &SV::myisam_stats_method,
                                                 &myisam_stats_method_typelib,
                                                 NULL);
-
 sys_var_thd_ulong	sys_net_buffer_length("net_buffer_length",
 					      &SV::net_buffer_length);
 sys_var_thd_ulong	sys_net_read_timeout("net_read_timeout",
@@ -353,10 +352,23 @@
 sys_var_thd_bool	sys_old_alter_table("old_alter_table",
 					    &SV::old_alter_table);
 sys_var_thd_bool	sys_old_passwords("old_passwords", &SV::old_passwords);
+
 sys_var_thd_ulong       sys_optimizer_prune_level("optimizer_prune_level",
                                                   &SV::optimizer_prune_level);
 sys_var_thd_ulong       sys_optimizer_search_depth("optimizer_search_depth",
                                                    &SV::optimizer_search_depth);
+
+const char *optimizer_use_mrr_names[] = {"auto", "force", "disable", NullS};
+TYPELIB optimizer_use_mrr_typelib= {
+  array_elements(optimizer_use_mrr_names) - 1, "",
+  optimizer_use_mrr_names, NULL
+};
+
+sys_var_thd_enum        sys_optimizer_use_mrr("optimizer_use_mrr",
+                                              &SV::optimizer_use_mrr,
+                                              &optimizer_use_mrr_typelib,
+                                              NULL);
+
 sys_var_thd_ulong       sys_preload_buff_size("preload_buffer_size",
                                               &SV::preload_buff_size);
 sys_var_thd_ulong	sys_read_buff_size("read_buffer_size",
@@ -896,6 +908,7 @@
    SHOW_SYS},
   {sys_optimizer_search_depth.name,(char*) &sys_optimizer_search_depth,
    SHOW_SYS},
+  {sys_optimizer_use_mrr.name, (char*) &sys_optimizer_use_mrr,       SHOW_SYS},
   {"pid_file",                (char*) pidfile_name,                 SHOW_CHAR},
   {"plugin_dir",              (char*) opt_plugin_dir,               SHOW_CHAR},
   {"port",                    (char*) &mysqld_port,                  SHOW_INT},

--- 1.17/mysql-test/r/ndb_condition_pushdown.result	2006-09-12 13:49:54 +03:00
+++ 1.18/mysql-test/r/ndb_condition_pushdown.result	2006-09-12 13:49:54 +03:00
@@ -925,7 +925,7 @@
 date_time != '1901-01-01 01:01:01' 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where 
 string != "aaaa" and 
 vstring != "aaaa" and 
@@ -984,7 +984,7 @@
 date_time > '1901-01-01 01:01:01'
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where 
 string > "aaaa" and 
 vstring > "aaaa" and 
@@ -1043,7 +1043,7 @@
 date_time >= '1901-01-01 01:01:01' 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	4	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	4	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where 
 string >= "aaaa" and 
 vstring >= "aaaa" and 
@@ -1103,7 +1103,7 @@
 date_time < '1904-04-04 04:04:04' 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where 
 string < "dddd" and 
 vstring < "dddd" and 
@@ -1162,7 +1162,7 @@
 date_time <= '1904-04-04 04:04:04' 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	4	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	4	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where 
 string <= "dddd" and 
 vstring <= "dddd" and 
@@ -1255,7 +1255,7 @@
 (date_time between '1901-01-01 01:01:01' and '1903-03-03 03:03:03') 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	3	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where
 (string between "aaaa" and "cccc") and 
 (vstring between "aaaa" and "cccc") and 
@@ -1358,7 +1358,7 @@
 (date_time not between '1901-01-01 01:01:01' and '1903-03-03 03:03:03') 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	1	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	1	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where
 (string not between "aaaa" and "cccc") and 
 (vstring not between "aaaa" and "cccc") and 
@@ -1462,7 +1462,7 @@
 date_time in('1901-01-01 01:01:01','1903-03-03 03:03:03') 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	2	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	2	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where
 string in("aaaa","cccc") and 
 vstring in("aaaa","cccc") and 
@@ -1565,7 +1565,7 @@
 date_time not in('1901-01-01 01:01:01','1903-03-03 03:03:03') 
 order by auto;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	2	Using where with pushed condition; Using filesort
+1	SIMPLE	t1	range	medium_index	medium_index	3	NULL	2	Using where with pushed condition; Using MRR; Using filesort
 select auto from t1 where
 string not in("aaaa","cccc") and 
 vstring not in("aaaa","cccc") and 
@@ -1738,7 +1738,7 @@
 explain
 select * from t4 where attr1 < 5 and attr2 > 9223372036854775803 and attr3 != 3 order by t4.pk1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t4	range	attr1	attr1	4	NULL	5	Using where with pushed condition; Using filesort
+1	SIMPLE	t4	range	attr1	attr1	4	NULL	5	Using where with pushed condition; Using MRR; Using filesort
 select * from t4 where attr1 < 5 and attr2 > 9223372036854775803 and attr3 != 3 order by t4.pk1;
 pk1	attr1	attr2	attr3	attr4
 2	2	9223372036854775804	2	c
@@ -1746,7 +1746,7 @@
 explain
 select * from t3,t4 where t4.attr1 > 1 and t4.attr2 = t3.attr2 and t4.attr3 < 5 order by t4.pk1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t4	range	attr1	attr1	4	NULL	4	Using where with pushed condition; Using temporary; Using filesort
+1	SIMPLE	t4	range	attr1	attr1	4	NULL	4	Using where with pushed condition; Using MRR; Using temporary; Using filesort
 1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	6	Using where
 select * from t3,t4 where t4.attr1 > 1 and t4.attr2 = t3.attr2 and t4.attr3 < 5 order by t4.pk1;
 pk1	attr1	attr2	attr3	attr4	pk1	attr1	attr2	attr3	attr4

--- 1.17/mysql-test/r/ndb_blob.result	2006-09-12 13:49:54 +03:00
+++ 1.18/mysql-test/r/ndb_blob.result	2006-09-12 13:49:54 +03:00
@@ -242,7 +242,7 @@
 commit;
 explain select * from t1 where c >= 100 order by a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	c	c	4	NULL	9	Using where; Using filesort
+1	SIMPLE	t1	range	c	c	4	NULL	9	Using where; Using MRR; Using filesort
 select * from t1 where c >= 100 order by a;
 a	b	c	d
 1	b1	111	dd1
@@ -278,7 +278,7 @@
 commit;
 explain select * from t1 where c >= 100 order by a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	c	c	4	NULL	2	Using where; Using filesort
+1	SIMPLE	t1	range	c	c	4	NULL	2	Using where; Using MRR; Using filesort
 select a,length(b),substr(b,1+2*900,2),length(d),substr(d,1+3*900,3)
 from t1 where c >= 100 order by a;
 a	length(b)	substr(b,1+2*900,2)	length(d)	substr(d,1+3*900,3)
--- New file ---
+++ mysql-test/include/mrr_tests.inc	06/09/12 13:49:40

create table t1(a int);
show create table t1;
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2(a int);
insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C;

set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
set read_rnd_buffer_size=64;
select @@read_rnd_buffer_size;

create table t3 (
  a char(8) not null, b char(8) not null, filler char(200),
  key(a)
);
insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A;
insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), 
                      'filler-1' from t2 A;
insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), 
                      'filler-2' from t2 A;

# Test empty result set
select a,filler from t3 where a >= 'c-9011=w';

# Ok, t3.ref_length=6, limit is 64 => 10 elements fit into the buffer
# Test the cases when buffer gets exhausted at different points in source
# intervals:

# 1. Split is in the middle of the range
select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w'; 

# 2. Split is at range edge 
select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or
                              (a>='c-1014=w' and a <= 'c-1015=w');

# 3. Split is at range edge, with some rows between ranges.
insert into t3 values ('c-1013=z', 'c-1013=z', 'err');
insert into t3 values ('a-1014=w', 'a-1014=w', 'err');

select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or
                              (a>='c-1014=w' and a <= 'c-1015=w');
delete from t3 where b in ('c-1013=z', 'a-1014=w');

# 4. Split is within the equality range.
select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or
                              a='c-1014=w' or a='c-1015=w';

# 5. Split is at the edge of equality range.
insert into t3 values ('c-1013=w', 'del-me', 'inserted');
select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or
                              a='c-1014=w' or a='c-1015=w';
delete from t3 where b='del-me';

# PK tests are not included here.

alter table t3 add primary key(b);

##  PK scan tests
# 6. Split is between 'unique' PK ranges
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or 
                              b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', 
                                    'c-1022=w', 'c-1023=w', 'c-1024=w');

# 7. Between non-uniq and uniq range
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or 
                              b IN ('c-1021=w', 'c-1022=w', 'c-1023=w');

# 8. Between uniq and non-uniq range
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or 
                              b IN ('c-1019=w', 'c-1020=w') or 
                              (b>='c-1021=w' and b<= 'c-1023=w');
## End of PK scan tests

#
# Now try different keypart types and special values
#
create table t4 (a varchar(10), b int, c char(10), filler char(200),
                 key idx1 (a, b, c));

# insert buffer_size * 1.5 all-NULL tuples
insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15;

insert into t4 (a,b,c,filler) 
  select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
  select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
  select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
  select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500;

explain 
  select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1'
                                                      or c='no-such-row2');
select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1'
                                                    or c='no-such-row2');

explain 
  select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');

select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');

set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;



--- New file ---
+++ mysql-test/r/innodb_mrr.result	06/09/12 13:49:40
drop table if exists t1,t2,t3,t4;
set @@optimizer_use_mrr='force';
set @save_storage_engine= @@storage_engine;
set storage_engine=InnoDB;
create table t1(a int);
show create table t1;
Table	Create Table
t1	CREATE TABLE `t1` (
  `a` int(11) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2(a int);
insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C;
set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
set read_rnd_buffer_size=64;
select @@read_rnd_buffer_size;
@@read_rnd_buffer_size
64
create table t3 (
a char(8) not null, b char(8) not null, filler char(200),
key(a)
);
insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A;
insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), 
'filler-1' from t2 A;
insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), 
'filler-2' from t2 A;
select a,filler from t3 where a >= 'c-9011=w';
a	filler
select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w';
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or
(a>='c-1014=w' and a <= 'c-1015=w');
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
insert into t3 values ('c-1013=z', 'c-1013=z', 'err');
insert into t3 values ('a-1014=w', 'a-1014=w', 'err');
select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or
(a>='c-1014=w' and a <= 'c-1015=w');
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
delete from t3 where b in ('c-1013=z', 'a-1014=w');
select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or
a='c-1014=w' or a='c-1015=w';
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
insert into t3 values ('c-1013=w', 'del-me', 'inserted');
select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or
a='c-1014=w' or a='c-1015=w';
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1013=w	inserted
c-1014=w	filler
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
delete from t3 where b='del-me';
alter table t3 add primary key(b);
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or 
b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', 
'c-1022=w', 'c-1023=w', 'c-1024=w');
b	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1015=w	filler
c-1016=w	filler
c-1017=w	filler
c-1018=w	filler
c-1019=w	filler
c-1020=w	filler
c-1021=w	filler
c-1022=w	filler
c-1023=w	filler
c-1024=w	filler
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or 
b IN ('c-1021=w', 'c-1022=w', 'c-1023=w');
b	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1015=w	filler
c-1016=w	filler
c-1017=w	filler
c-1018=w	filler
c-1019=w	filler
c-1020=w	filler
c-1021=w	filler
c-1022=w	filler
c-1023=w	filler
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or 
b IN ('c-1019=w', 'c-1020=w') or 
(b>='c-1021=w' and b<= 'c-1023=w');
b	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1015=w	filler
c-1016=w	filler
c-1017=w	filler
c-1018=w	filler
c-1019=w	filler
c-1020=w	filler
c-1021=w	filler
c-1022=w	filler
c-1023=w	filler
create table t4 (a varchar(10), b int, c char(10), filler char(200),
key idx1 (a, b, c));
insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500;
explain 
select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1'
                                                      or c='no-such-row2');
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t4	range	idx1	idx1	29	NULL	16	Using where; Using MRR
select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1'
                                                    or c='no-such-row2');
a	b	c	filler
NULL	NULL	NULL	NULL-15
NULL	NULL	NULL	NULL-14
NULL	NULL	NULL	NULL-13
NULL	NULL	NULL	NULL-12
NULL	NULL	NULL	NULL-11
NULL	NULL	NULL	NULL-10
NULL	NULL	NULL	NULL-9
NULL	NULL	NULL	NULL-8
NULL	NULL	NULL	NULL-7
NULL	NULL	NULL	NULL-6
NULL	NULL	NULL	NULL-5
NULL	NULL	NULL	NULL-4
NULL	NULL	NULL	NULL-3
NULL	NULL	NULL	NULL-2
NULL	NULL	NULL	NULL-1
explain 
select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t4	range	idx1	idx1	29	NULL	32	Using where; Using MRR
select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
a	b	c	filler
b-1	NULL	c-1	NULL-15
b-1	NULL	c-1	NULL-14
b-1	NULL	c-1	NULL-13
b-1	NULL	c-1	NULL-12
b-1	NULL	c-1	NULL-11
b-1	NULL	c-1	NULL-10
b-1	NULL	c-1	NULL-9
b-1	NULL	c-1	NULL-8
b-1	NULL	c-1	NULL-7
b-1	NULL	c-1	NULL-6
b-1	NULL	c-1	NULL-5
b-1	NULL	c-1	NULL-4
b-1	NULL	c-1	NULL-3
b-1	NULL	c-1	NULL-2
b-1	NULL	c-1	NULL-1
bb-1	NULL	cc-2	NULL-15
bb-1	NULL	cc-2	NULL-14
bb-1	NULL	cc-2	NULL-13
bb-1	NULL	cc-2	NULL-12
bb-1	NULL	cc-2	NULL-11
bb-1	NULL	cc-2	NULL-10
bb-1	NULL	cc-2	NULL-9
bb-1	NULL	cc-2	NULL-8
bb-1	NULL	cc-2	NULL-7
bb-1	NULL	cc-2	NULL-6
bb-1	NULL	cc-2	NULL-5
bb-1	NULL	cc-2	NULL-4
bb-1	NULL	cc-2	NULL-3
bb-1	NULL	cc-2	NULL-2
bb-1	NULL	cc-2	NULL-1
select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
a	b	c	filler
b-1	NULL	c-1	NULL-15
b-1	NULL	c-1	NULL-14
b-1	NULL	c-1	NULL-13
b-1	NULL	c-1	NULL-12
b-1	NULL	c-1	NULL-11
b-1	NULL	c-1	NULL-10
b-1	NULL	c-1	NULL-9
b-1	NULL	c-1	NULL-8
b-1	NULL	c-1	NULL-7
b-1	NULL	c-1	NULL-6
b-1	NULL	c-1	NULL-5
b-1	NULL	c-1	NULL-4
b-1	NULL	c-1	NULL-3
b-1	NULL	c-1	NULL-2
b-1	NULL	c-1	NULL-1
bb-1	NULL	cc-2	NULL-15
bb-1	NULL	cc-2	NULL-14
bb-1	NULL	cc-2	NULL-13
bb-1	NULL	cc-2	NULL-12
bb-1	NULL	cc-2	NULL-11
bb-1	NULL	cc-2	NULL-10
bb-1	NULL	cc-2	NULL-9
bb-1	NULL	cc-2	NULL-8
bb-1	NULL	cc-2	NULL-7
bb-1	NULL	cc-2	NULL-6
bb-1	NULL	cc-2	NULL-5
bb-1	NULL	cc-2	NULL-4
bb-1	NULL	cc-2	NULL-3
bb-1	NULL	cc-2	NULL-2
bb-1	NULL	cc-2	NULL-1
set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;
set storage_engine= @save_storage_engine;
drop table t1, t2, t3, t4;
set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
set read_rnd_buffer_size=64;
create table t1(a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=InnoDB;
insert into t2 select 
concat('a-', 1000 + A.a, '-a'),
concat('b-', 1000 + B.a, '-b'),
concat('c-', 1000 + C.a, '-c'),
'filler'
from t1 A, t1 B, t1 C;
explain
select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a';
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t2	range	a	a	9	NULL	99	Using where; Using MRR
select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a';
count(length(a) + length(filler))
100
drop table t2;
create table t2 (a char(100), b char(100), c char(100), d int, 
filler char(10), key(d), primary key (a,b,c)) engine= innodb;
insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B;
explain select * from t2 force index (d) where d < 10;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t2	range	d	d	5	NULL	47	Using where
drop table t2;
set @@optimizer_use_mrr='auto';
set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;

--- New file ---
+++ mysql-test/r/myisam_mrr.result	06/09/12 13:49:40
drop table if exists t1, t2, t3;
set @@optimizer_use_mrr='force';
create table t1(a int);
show create table t1;
Table	Create Table
t1	CREATE TABLE `t1` (
  `a` int(11) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2(a int);
insert into t2 select A.a + 10*(B.a + 10*C.a) from t1 A, t1 B, t1 C;
set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
set read_rnd_buffer_size=64;
select @@read_rnd_buffer_size;
@@read_rnd_buffer_size
64
create table t3 (
a char(8) not null, b char(8) not null, filler char(200),
key(a)
);
insert into t3 select @a:=concat('c-', 1000+ A.a, '=w'), @a, 'filler' from t2 A;
insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 2000+A.a, '=w'), 
'filler-1' from t2 A;
insert into t3 select concat('c-', 1000+A.a, '=w'), concat('c-', 3000+A.a, '=w'), 
'filler-2' from t2 A;
select a,filler from t3 where a >= 'c-9011=w';
a	filler
select a,filler from t3 where a >= 'c-1011=w' and a <= 'c-1015=w';
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or
(a>='c-1014=w' and a <= 'c-1015=w');
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
insert into t3 values ('c-1013=z', 'c-1013=z', 'err');
insert into t3 values ('a-1014=w', 'a-1014=w', 'err');
select a,filler from t3 where (a>='c-1011=w' and a <= 'c-1013=w') or
(a>='c-1014=w' and a <= 'c-1015=w');
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
delete from t3 where b in ('c-1013=z', 'a-1014=w');
select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or
a='c-1014=w' or a='c-1015=w';
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
insert into t3 values ('c-1013=w', 'del-me', 'inserted');
select a,filler from t3 where a='c-1011=w' or a='c-1012=w' or a='c-1013=w' or
a='c-1014=w' or a='c-1015=w';
a	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1011=w	filler-1
c-1012=w	filler-1
c-1013=w	filler-1
c-1011=w	filler-2
c-1012=w	filler-2
c-1013=w	filler-2
c-1013=w	inserted
c-1014=w	filler
c-1015=w	filler
c-1014=w	filler-1
c-1015=w	filler-1
c-1014=w	filler-2
c-1015=w	filler-2
delete from t3 where b='del-me';
alter table t3 add primary key(b);
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or 
b IN ('c-1019=w', 'c-1020=w', 'c-1021=w', 
'c-1022=w', 'c-1023=w', 'c-1024=w');
b	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1015=w	filler
c-1016=w	filler
c-1017=w	filler
c-1018=w	filler
c-1019=w	filler
c-1020=w	filler
c-1021=w	filler
c-1022=w	filler
c-1023=w	filler
c-1024=w	filler
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1020=w') or 
b IN ('c-1021=w', 'c-1022=w', 'c-1023=w');
b	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1015=w	filler
c-1016=w	filler
c-1017=w	filler
c-1018=w	filler
c-1019=w	filler
c-1020=w	filler
c-1021=w	filler
c-1022=w	filler
c-1023=w	filler
select b,filler from t3 where (b>='c-1011=w' and b<= 'c-1018=w') or 
b IN ('c-1019=w', 'c-1020=w') or 
(b>='c-1021=w' and b<= 'c-1023=w');
b	filler
c-1011=w	filler
c-1012=w	filler
c-1013=w	filler
c-1014=w	filler
c-1015=w	filler
c-1016=w	filler
c-1017=w	filler
c-1018=w	filler
c-1019=w	filler
c-1020=w	filler
c-1021=w	filler
c-1022=w	filler
c-1023=w	filler
create table t4 (a varchar(10), b int, c char(10), filler char(200),
key idx1 (a, b, c));
insert into t4 (filler) select concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'b-1',NULL,'c-1', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'b-1',NULL,'c-222', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'bb-1',NULL,'cc-2', concat('NULL-', 15-a) from t2 order by a limit 15;
insert into t4 (a,b,c,filler) 
select 'zz-1',NULL,'cc-2', 'filler-data' from t2 order by a limit 500;
explain 
select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1'
                                                      or c='no-such-row2');
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t4	range	idx1	idx1	29	NULL	10	Using where; Using MRR
select * from t4 where a IS NULL and b IS NULL and (c IS NULL or c='no-such-row1'
                                                    or c='no-such-row2');
a	b	c	filler
NULL	NULL	NULL	NULL-15
NULL	NULL	NULL	NULL-14
NULL	NULL	NULL	NULL-13
NULL	NULL	NULL	NULL-12
NULL	NULL	NULL	NULL-11
NULL	NULL	NULL	NULL-10
NULL	NULL	NULL	NULL-9
NULL	NULL	NULL	NULL-8
NULL	NULL	NULL	NULL-7
NULL	NULL	NULL	NULL-6
NULL	NULL	NULL	NULL-5
NULL	NULL	NULL	NULL-4
NULL	NULL	NULL	NULL-3
NULL	NULL	NULL	NULL-2
NULL	NULL	NULL	NULL-1
explain 
select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	SIMPLE	t4	range	idx1	idx1	29	NULL	21	Using where; Using MRR
select * from t4 where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
a	b	c	filler
b-1	NULL	c-1	NULL-15
b-1	NULL	c-1	NULL-14
b-1	NULL	c-1	NULL-13
b-1	NULL	c-1	NULL-12
b-1	NULL	c-1	NULL-11
b-1	NULL	c-1	NULL-10
b-1	NULL	c-1	NULL-9
b-1	NULL	c-1	NULL-8
b-1	NULL	c-1	NULL-7
b-1	NULL	c-1	NULL-6
b-1	NULL	c-1	NULL-5
b-1	NULL	c-1	NULL-4
b-1	NULL	c-1	NULL-3
b-1	NULL	c-1	NULL-2
b-1	NULL	c-1	NULL-1
bb-1	NULL	cc-2	NULL-15
bb-1	NULL	cc-2	NULL-14
bb-1	NULL	cc-2	NULL-13
bb-1	NULL	cc-2	NULL-12
bb-1	NULL	cc-2	NULL-11
bb-1	NULL	cc-2	NULL-10
bb-1	NULL	cc-2	NULL-9
bb-1	NULL	cc-2	NULL-8
bb-1	NULL	cc-2	NULL-7
bb-1	NULL	cc-2	NULL-6
bb-1	NULL	cc-2	NULL-5
bb-1	NULL	cc-2	NULL-4
bb-1	NULL	cc-2	NULL-3
bb-1	NULL	cc-2	NULL-2
bb-1	NULL	cc-2	NULL-1
select * from t4 ignore index(idx1) where (a ='b-1' or a='bb-1') and b IS NULL and (c='c-1' or c='cc-2');
a	b	c	filler
b-1	NULL	c-1	NULL-15
b-1	NULL	c-1	NULL-14
b-1	NULL	c-1	NULL-13
b-1	NULL	c-1	NULL-12
b-1	NULL	c-1	NULL-11
b-1	NULL	c-1	NULL-10
b-1	NULL	c-1	NULL-9
b-1	NULL	c-1	NULL-8
b-1	NULL	c-1	NULL-7
b-1	NULL	c-1	NULL-6
b-1	NULL	c-1	NULL-5
b-1	NULL	c-1	NULL-4
b-1	NULL	c-1	NULL-3
b-1	NULL	c-1	NULL-2
b-1	NULL	c-1	NULL-1
bb-1	NULL	cc-2	NULL-15
bb-1	NULL	cc-2	NULL-14
bb-1	NULL	cc-2	NULL-13
bb-1	NULL	cc-2	NULL-12
bb-1	NULL	cc-2	NULL-11
bb-1	NULL	cc-2	NULL-10
bb-1	NULL	cc-2	NULL-9
bb-1	NULL	cc-2	NULL-8
bb-1	NULL	cc-2	NULL-7
bb-1	NULL	cc-2	NULL-6
bb-1	NULL	cc-2	NULL-5
bb-1	NULL	cc-2	NULL-4
bb-1	NULL	cc-2	NULL-3
bb-1	NULL	cc-2	NULL-2
bb-1	NULL	cc-2	NULL-1
set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;
set @@optimizer_use_mrr='auto';
drop table t1, t2, t3, t4;

--- New file ---
+++ mysql-test/t/innodb_mrr.test	06/09/12 13:49:40
-- source include/have_innodb.inc

--disable_warnings
drop table if exists t1,t2,t3,t4;
--enable_warnings

set @@optimizer_use_mrr='force';
set @save_storage_engine= @@storage_engine;
set storage_engine=InnoDB;

--source include/mrr_tests.inc 

set storage_engine= @save_storage_engine;
drop table t1, t2, t3, t4;

# Try big rowid sizes
set @read_rnd_buffer_size_save= @@read_rnd_buffer_size;
set read_rnd_buffer_size=64;


# By default InnoDB will fill values only for key parts used by the query,
# which will cause DS-MRR to supply an invalid tuple on scan restoration. 
# Verify that DS-MRR's code extra(HA_EXTRA_RETRIEVE_ALL_COLS) call has effect:
create table t1(a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=InnoDB;

insert into t2 select 
  concat('a-', 1000 + A.a, '-a'),
  concat('b-', 1000 + B.a, '-b'),
  concat('c-', 1000 + C.a, '-c'),
  'filler'
from t1 A, t1 B, t1 C;

explain
select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a';
select count(length(a) + length(filler)) from t2 where a>='a-1000-a' and a <'a-1001-a';
drop table t2;

# Try a very big rowid
create table t2 (a char(100), b char(100), c char(100), d int, 
                 filler char(10), key(d), primary key (a,b,c)) engine= innodb;
insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B;
explain select * from t2 force index (d) where d < 10;
drop table t2;

set @@optimizer_use_mrr='auto';
set @@read_rnd_buffer_size= @read_rnd_buffer_size_save;


--- New file ---
+++ mysql-test/t/myisam_mrr.test	06/09/12 13:49:40
#
# MRR/MyISAM tests.
#

--disable_warnings
drop table if exists t1, t2, t3;
--enable_warnings

set @@optimizer_use_mrr='force';
-- source include/mrr_tests.inc
set @@optimizer_use_mrr='auto';

drop table t1, t2, t3, t4;


Thread
bk commit into 5.2 tree (sergefp:1.2154)Sergey Petrunia12 Sep