List:Commits« Previous MessageNext Message »
From:Ole John Aske Date:March 21 2011 1:06pm
Subject:bzr commit into mysql-trunk branch (ole.john.aske:3303) Bug#11764737
View as plain text  
#At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on revid:georgi.kodinov@stripped

 3303 Ole John Aske	2011-03-21
      Updated fix (W/ Jorgens review comments) for bug#11764737: Optimizer is overly eager to request ordered access
      
      The essential part of this fix is:
           
      1) Ensure that JOIN_TAB::sorted is set, and QUICK_RANGE_SELECT::mrr_flags contains 'HA_MRR_SORTED'
         only iff strict ordering is required from the handler - Either as a result of the query itself
         specifying ORDER/GROUP BY, or usage of a QUICK access method which require the sources to 
         be ordered (Typical if its internals use ::ha_index_first, _last, _next, _prev)
            
      2) Change calls to handler::ha_index_init() to take its 'sorted' argument either directly
         from JOIN_TAB::sorted or from mrr_flags containing HA_MRR_SORTED.
      
      In order to implement this QUICK_SELECT_I::need_sorted_output() had to be extended to
      take a 'bool sort' argument: Where 'sort==false' will enable us to turn off
      requirement that a QUICK_SELECT_I access method should deliver rows in sorted
      order. (Similar logic already exists for 'non-quick' access methods.)
      NOTE: QUICK_SELECT_I::need_sorted_output(sort==false) is only regarded as a hint
      and the different QUICK_SELECT_I methods may still request sorted order from the
      handler if required by their internals.
      
      Furthermore the function 'disable_sorted_access(JOIN_TAB* join_tab)' has been 
      introduced which collect all logic for turning of sort requirement.
     @ mysql-test/r/innodb_mysql_lock2.result
        Accept changed result order for this (unordered) result set.
     @ mysql-test/r/partition.result
        Accept changed result order for this (unordered) result set.
     @ mysql-test/r/partition_explicit_prune.result
        Accept changed result order for (unordered) result sets.
     @ sql/opt_range.cc
        Correcly set, and use, HA_MRR_SORTED in order to only request sorted access 
        iff it is required either due to optimizer using ::need_sorted_output(),
        or the internals of the quick access needing ordered access itself.
                
        Also added mrr_flags and its mrr relatives to C'tor initializer list.
        Fixed an issue with missing 'delete quick' + return in case
        get_quick_select() failed.

    modified:
      mysql-test/r/innodb_mysql_lock2.result
      mysql-test/r/partition.result
      mysql-test/r/partition_explicit_prune.result
      sql/opt_range.cc
      sql/opt_range.h
      sql/sql_select.cc
=== modified file 'mysql-test/r/innodb_mysql_lock2.result'
--- a/mysql-test/r/innodb_mysql_lock2.result	2010-07-19 16:09:51 +0000
+++ b/mysql-test/r/innodb_mysql_lock2.result	2011-03-21 13:06:21 +0000
@@ -605,11 +605,11 @@ begin;
 # Acquire SR metadata lock on t1.
 select * from t1;
 i
+4
 1
+5
 2
 3
-4
-5
 # Switching to connection 'con1'.
 # Sending:
 alter table t1 rebuild partition p0;

=== modified file 'mysql-test/r/partition.result'
--- a/mysql-test/r/partition.result	2011-03-11 09:58:45 +0000
+++ b/mysql-test/r/partition.result	2011-03-21 13:06:21 +0000
@@ -1776,9 +1776,9 @@ insert into t1 values (18446744073709551
 (18446744073709551613), (18446744073709551612);
 select * from t1;
 a
+18446744073709551614
 18446744073709551612
 18446744073709551613
-18446744073709551614
 18446744073709551615
 select * from t1 where a = 18446744073709551615;
 a
@@ -1786,9 +1786,9 @@ a
 delete from t1 where a = 18446744073709551615;
 select * from t1;
 a
+18446744073709551614
 18446744073709551612
 18446744073709551613
-18446744073709551614
 drop table t1;
 CREATE TABLE t1 (
 num int(11) NOT NULL, cs int(11) NOT NULL)

=== modified file 'mysql-test/r/partition_explicit_prune.result'
--- a/mysql-test/r/partition_explicit_prune.result	2011-02-01 11:38:39 +0000
+++ b/mysql-test/r/partition_explicit_prune.result	2011-03-21 13:06:21 +0000
@@ -332,12 +332,12 @@ ERROR 42000: You have an error in your S
 SELECT * FROM t1 `PARTITION`;
 a	b
 -2	(pNeg-)subp0
-5	p0-9:subp3
-10	p10-99
 -3	pNeg(-subp1)
 -1	pNeg(-subp1)
+5	p0-9:subp3
 1	subp3
 3	subp3
+10	p10-99
 100	`p100-99999`(-subp6)
 1000	`p100-99999`(-subp6)
 101	`p100-99999`(-subp7)
@@ -346,12 +346,12 @@ ERROR 42000: You have an error in your S
 SELECT * FROM t1 AS `PARTITION`;
 a	b
 -2	(pNeg-)subp0
-5	p0-9:subp3
-10	p10-99
 -3	pNeg(-subp1)
 -1	pNeg(-subp1)
+5	p0-9:subp3
 1	subp3
 3	subp3
+10	p10-99
 100	`p100-99999`(-subp6)
 1000	`p100-99999`(-subp6)
 101	`p100-99999`(-subp7)
@@ -536,10 +536,10 @@ HANDLER_WRITE	127
 SELECT * FROM t1 PARTITION (pNeg, `p10-99`);
 a	b
 -2	(pNeg-)subp0
-10	p10-99
 -3	pNeg(-subp1)
 -1	pNeg(-subp1)
 -21	REPLACEd by REPLACE
+10	p10-99
 FLUSH STATUS;
 SELECT * FROM t1 PARTITION (pNeg, `p10-99`) INTO OUTFILE 'loadtest.txt';
 SELECT * FROM INFORMATION_SCHEMA.SESSION_STATUS
@@ -576,7 +576,7 @@ WHERE VARIABLE_NAME LIKE 'HANDLER_%' AND
 VARIABLE_NAME	VARIABLE_VALUE
 HANDLER_EXTERNAL_LOCK	6
 HANDLER_ROLLBACK	1
-HANDLER_WRITE	18
+HANDLER_WRITE	21
 # 6 locks (1 ha_partiiton + 2 ha_innobase) x 2 (lock+unlock)
 # 1 rollback
 SELECT * FROM t1 PARTITION (pNeg, `p10-99`);
@@ -950,12 +950,12 @@ SUBPARTITION BY HASH (a)
 SELECT * FROM t1;
 a	b
 -2	(pNeg-)subp0, Updated
-10	p10-99
+-222	`p100-99999`(-subp6), Updated from a = 100
 -21	REPLACEd by REPLACE
 1	subp3
 3	subp3
+10	p10-99
 1000	`p100-99999`(-subp6)
--222	`p100-99999`(-subp6), Updated from a = 100
 110	`p100-99999`(-subp7), Updated to 103, Updated to 110
 SHOW CREATE TABLE t2;
 Table	Create Table
@@ -981,9 +981,9 @@ SUBPARTITION BY HASH (a)
   SUBPARTITION subp7 ENGINE = InnoDB)) */
 SELECT * FROM t2;
 a	b
-10	p10-99
 1	subp3
 3	subp3
+10	p10-99
 1000	`p100-99999`(-subp6)
 110	`p100-99999`(-subp7), Updated to 103, Updated to 110
 SHOW CREATE TABLE t3;
@@ -995,11 +995,11 @@ t3	CREATE TABLE `t3` (
 SELECT * FROM t3;
 a	b
 -2	(pNeg-)subp0, Updated
+-222	`p100-99999`(-subp6), Updated from a = 100
 -21	REPLACEd by REPLACE
 1	subp3
 3	subp3
 1000	`p100-99999`(-subp6)
--222	`p100-99999`(-subp6), Updated from a = 100
 110	`p100-99999`(-subp7), Updated to 103, Updated to 110
 FLUSH STATUS;
 DELETE t1 PARTITION (pNeg), t3 FROM t1, t3

=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc	2011-03-17 11:48:04 +0000
+++ b/sql/opt_range.cc	2011-03-21 13:06:21 +0000
@@ -1179,7 +1179,9 @@ QUICK_SELECT_I::QUICK_SELECT_I()
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
                                        bool no_alloc, MEM_ROOT *parent_alloc,
                                        bool *create_error)
-  :free_file(0),cur_range(NULL),last_range(0),dont_free(0)
+  :free_file(0),cur_range(NULL),last_range(0),
+   mrr_flags(0),mrr_buf_size(0),mrr_buf_desc(NULL),
+   dont_free(0)
 {
   my_bitmap_map *bitmap;
   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
@@ -1192,7 +1194,6 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
 
   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
   mrr_buf_size= thd->variables.read_rnd_buff_size;
-  mrr_buf_desc= NULL;
 
   if (!no_alloc && !parent_alloc)
   {
@@ -1220,17 +1221,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
 }
 
 
-void QUICK_RANGE_SELECT::need_sorted_output()
+void QUICK_RANGE_SELECT::need_sorted_output(bool sort)
 {
-  if (!(mrr_flags & HA_MRR_SORTED))
-  {
-    /*
-      Native implementation can't produce sorted output. We'll have to
-      switch to default
-    */
-    mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; 
-  }
-  mrr_flags |= HA_MRR_SORTED;
+  if (sort)
+    mrr_flags |= HA_MRR_SORTED;
+  else
+    mrr_flags &= ~HA_MRR_SORTED;
 }
 
 
@@ -8714,7 +8710,8 @@ int QUICK_RANGE_SELECT::reset()
   {
     if (in_ror_merged_scan)
       head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
-    if ((error= file->ha_index_init(index,1)))
+    bool sorted= (mrr_flags & HA_MRR_SORTED);
+    if ((error= file->ha_index_init(index,sorted)))
         DBUG_RETURN(error);
   }
 
@@ -8989,10 +8986,11 @@ int QUICK_RANGE_SELECT::get_next_prefix(
     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
 
+    bool sorted= (mrr_flags & HA_MRR_SORTED);
     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
 				   last_range->max_keypart_map ? &end_key : 0,
                                    test(last_range->flag & EQ_RANGE),
-				   TRUE);
+				   sorted);
     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
       last_range= 0;			// Stop searching
 
@@ -9104,6 +9102,7 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUI
   */
   mrr_buf_desc= NULL;
   mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+  mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
   mrr_buf_size= 0;
 
 
@@ -10584,12 +10583,19 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *par
     if (quick_prefix_records == HA_POS_ERROR)
       quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
     else
+    {
       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
       quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                    index_tree,
-                                                   HA_MRR_USE_DEFAULT_IMPL, 0,
+                                                   HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED,
+                                                   0,
                                                    &quick->alloc);
-
+      if (!quick->quick_prefix_select)
+      {
+        delete quick;
+        DBUG_RETURN(NULL);
+      }
+    }
     /*
       Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
       attribute, and create an array of QUICK_RANGES to be used by the
@@ -10985,7 +10991,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(vo
   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
 
   head->set_keyread(TRUE); /* We need only the key attributes */
-  if ((result= file->ha_index_init(index,1)))
+  /*
+    Request ordered index access as usage of ::index_last(), 
+    ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
+  */
+  if ((result= file->ha_index_init(index,true)))
     DBUG_RETURN(result);
   if (quick_prefix_select && quick_prefix_select->reset())
     DBUG_RETURN(1);

=== modified file 'sql/opt_range.h'
--- a/sql/opt_range.h	2010-12-17 12:58:04 +0000
+++ b/sql/opt_range.h	2011-03-21 13:06:21 +0000
@@ -285,11 +285,15 @@ public:
   virtual bool clustered_pk_range() { return false; }
   
   /*
-    Request that this quick select produces sorted output. Not all quick
-    selects can do it, the caller is responsible for calling this function
-    only for those quick selects that can.
+    Request that this quick select produces sorted output if 'sort==true'.
+    Not all quick selects can provide sorted output, the caller is responsible 
+    for calling this function only for those quick selects that can.
+    Caller is allowed to later cancel its sorted request by setting 
+    'sort==false'. However, the implementation is allowed to provide sorted 
+    output even in this case if benificial, or required by implementation 
+    internals.
   */
-  virtual void need_sorted_output() = 0;
+  virtual void need_sorted_output(bool sort) = 0;
   enum {
     QS_TYPE_RANGE = 0,
     QS_TYPE_INDEX_MERGE = 1,
@@ -399,7 +403,7 @@ uint quick_range_seq_next(range_seq_t rs
 
 /*
   Quick select that does a range scan on a single key. The records are
-  returned in key order.
+  returned in key order if ::need_sorted_output(true) has been called.
 */
 class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
@@ -465,7 +469,7 @@ public:
                      MEM_ROOT *parent_alloc, bool *create_error);
   ~QUICK_RANGE_SELECT();
   
-  void need_sorted_output();
+  void need_sorted_output(bool sort);
   int init();
   int reset(void);
   int get_next();
@@ -569,7 +573,7 @@ public:
   ~QUICK_INDEX_MERGE_SELECT();
 
   int  init();
-  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
+  void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -647,7 +651,7 @@ public:
   ~QUICK_ROR_INTERSECT_SELECT();
 
   int  init();
-  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
+  void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -718,7 +722,7 @@ public:
   ~QUICK_ROR_UNION_SELECT();
 
   int  init();
-  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
+  void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -860,7 +864,7 @@ public:
   void adjust_prefix_ranges();
   bool alloc_buffers();
   int init();
-  void need_sorted_output() { /* always do it */ }
+  void need_sorted_output(bool sort) { /* always do it */ }
   int reset();
   int get_next();
   bool reverse_sorted() { return false; }

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2011-03-17 12:45:19 +0000
+++ b/sql/sql_select.cc	2011-03-21 13:06:21 +0000
@@ -1467,7 +1467,7 @@ bool setup_semijoin_dups_elimination(JOI
            output is not supported.
         */
         if (tab->select && tab->select->quick)
-          tab->select->quick->need_sorted_output();
+          tab->select->quick->need_sorted_output(true);
 
         /* Calculate key length */
         keylen= 0;
@@ -2743,6 +2743,23 @@ JOIN::save_join_tab()
 
 
 /**
+  There may be a pending 'sorted' request on the specified 
+  'join_tab' which we now has decided we can ignore.
+*/
+static void
+disable_sorted_access(JOIN_TAB* join_tab)
+{
+  DBUG_ENTER("disable_sorted_access");
+  join_tab->sorted= 0;
+  if (join_tab->select && join_tab->select->quick)
+  {
+    join_tab->select->quick->need_sorted_output(false);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
   Exec select.
 
   @todo
@@ -2921,7 +2938,7 @@ JOIN::exec()
         curr_join->const_tables != curr_join->tables && 
         curr_join->best_positions[curr_join->const_tables].sj_strategy 
           != SJ_OPT_LOOSE_SCAN)
-      curr_join->join_tab[curr_join->const_tables].sorted= 0;
+      disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]);
     if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0)))
     {
       error= tmp_error;
@@ -3087,7 +3104,7 @@ JOIN::exec()
       curr_join->group_list= 0;
       if (!curr_join->sort_and_group &&
           curr_join->const_tables != curr_join->tables)
-        curr_join->join_tab[curr_join->const_tables].sorted= 0;
+        disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]);
       if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
 	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
 				0)))
@@ -11176,7 +11193,7 @@ make_join_readinfo(JOIN *join, ulonglong
 {
   uint i, jcl;
   bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
-  bool sorted= 1;
+  bool sorted= (join->order || join->group_list);
   uint first_sjm_table= MAX_TABLES;
   uint last_sjm_table= MAX_TABLES;
   DBUG_ENTER("make_join_readinfo");
@@ -18179,6 +18196,7 @@ join_read_key2(JOIN_TAB *tab, TABLE *tab
   int error;
   if (!table->file->inited)
   {
+    DBUG_ASSERT(!tab->sorted);  // Don't expect sort req. for single row.
     table->file->ha_index_init(table_ref->key, tab->sorted);
   }
 
@@ -18488,7 +18506,7 @@ join_read_last(JOIN_TAB *tab)
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
-    table->file->ha_index_init(tab->index, 1);
+    table->file->ha_index_init(tab->index, tab->sorted);
   if ((error= tab->table->file->ha_index_last(tab->table->record[0])))
     return report_error(table, error);
   return 0;
@@ -18512,7 +18530,7 @@ join_ft_read_first(JOIN_TAB *tab)
   TABLE *table= tab->table;
 
   if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, 1);
+    table->file->ha_index_init(tab->ref.key, tab->sorted);
   table->file->ft_init();
 
   if ((error= table->file->ft_read(table->record[0])))
@@ -20290,7 +20308,7 @@ check_reverse_order:                  
       }
     }
     else if (select && select->quick)
-      select->quick->need_sorted_output();
+      select->quick->need_sorted_output(true);
 
   } // QEP has been modified
 
@@ -23197,9 +23215,22 @@ void select_describe(JOIN *join, bool ne
 
         if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
             !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags &
-             HA_MRR_USE_DEFAULT_IMPL))
+             (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))) 
         {
-	  extra.append(STRING_WITH_LEN("; Using MRR"));
+          /*
+            During normal execution of a query, multi_range_read_init() is
+            called to initialize MRR. If HA_MRR_SORTED is set at this point,
+            multi_range_read_init() for any native MRR implementation will
+            revert to default MRR because they cannot produce sorted output
+            currently.
+            Calling multi_range_read_init() can potentially be costly, so it
+            is not done when executing an EXPLAIN. We therefore make the
+            assumption that HA_MRR_SORTED means no MRR. If some MRR native
+            implementation will support sorted output in the future, a
+            function "bool mrr_supports_sorted()" should be added in the
+            handler.
+          */
+          extra.append(STRING_WITH_LEN("; Using MRR"));
         }
         if (need_tmp_table)
         {


Attachment: [text/bzr-bundle] bzr/ole.john.aske@oracle.com-20110321130621-z8qov0yxvge0ae1r.bundle
Thread
bzr commit into mysql-trunk branch (ole.john.aske:3303) Bug#11764737Ole John Aske21 Mar
  • Re: bzr commit into mysql-trunk branch (ole.john.aske:3303) Bug#11764737Jorgen Loland21 Mar
  • Re: bzr commit into mysql-trunk branch (ole.john.aske:3303) Bug#11764737Roy Lyseng22 Mar