List:Commits« Previous MessageNext Message »
From:Ole John Aske Date:March 23 2011 9:52am
Subject:bzr commit into mysql-trunk branch (ole.john.aske:3311) Bug#11764737
View as plain text  
#At file:///net/fimafeng09/export/home/tmp/oleja/mysql/mysql-trunk/ based on revid:magne.mahre@stripped

 3311 Ole John Aske	2011-03-23
      Updated fix (W/ Jorgens review comments) for bug#11764737:
        'Optimizer is overly eager to request ordered access'
      
      The essential part of this fix is:
           
      1) Ensure that JOIN_TAB::sorted is set, and QUICK_RANGE_SELECT::mrr_flags contains 'HA_MRR_SORTED'
         only iff strict ordering is required from the handler - Either as a result of the query itself
         specifying ORDER/GROUP BY, or usage of a QUICK access method which require the sources to 
         be ordered (Typical if its internals use ::ha_index_first, _last, _next, _prev)
            
      2) Change calls to handler::ha_index_init() to take its 'sorted' argument either directly
         from JOIN_TAB::sorted or from mrr_flags containing HA_MRR_SORTED.
      
      In order to implement this QUICK_SELECT_I::need_sorted_output() had to be extended to
      take a 'bool sort' argument: Where 'sort==false' will enable us to turn off
      requirement that a QUICK_SELECT_I access method should deliver rows in sorted
      order. (Similar logic already exists for 'non-quick' access methods.)
      NOTE: QUICK_SELECT_I::need_sorted_output(sort==false) is only regarded as a hint
      and the different QUICK_SELECT_I methods may still request sorted order from the
      handler if required by their internals.
      
      Furthermore the function 'disable_sorted_access(JOIN_TAB* join_tab)' has been 
      introduced which collect all logic for turning off sort requirement.
     @ mysql-test/r/innodb_mysql_lock2.result
        Accept changed result order for this (unordered) result set.
     @ mysql-test/r/partition.result
        Accept changed result order for this (unordered) result set.
     @ mysql-test/r/partition_explicit_prune.result
        Accept changed result order for (unordered) result sets.
        
        There is also a change in 'HANDLER_WRITE' due to a changed order of 
        rows in 'loadtest.txt' when doing LOAD DATA INFILE 'loadtest.txt.
        That particular 'LOAD DATA' is intended to fail due to loading into
        incorrect partition.
        
        As the ofending row now has an other position in 'loadtest.txt', we
        write more rows before encountering this row - and the rollbacks the
        TXN.
     @ sql/opt_range.cc
        Correcly set, and use, HA_MRR_SORTED in order to only request sorted access 
        iff it is required either due to optimizer using ::need_sorted_output(),
        or the internals of the quick access needing ordered access itself.
                
        Also added mrr_flags and its mrr relatives to C'tor initializer list.
        Fixed an issue with missing 'delete quick' + return in case
        get_quick_select() failed.

    modified:
      mysql-test/r/innodb_mysql_lock2.result
      mysql-test/r/partition.result
      mysql-test/r/partition_explicit_prune.result
      sql/opt_range.cc
      sql/opt_range.h
      sql/sql_select.cc
=== modified file 'mysql-test/r/innodb_mysql_lock2.result'
--- a/mysql-test/r/innodb_mysql_lock2.result	2010-07-19 16:09:51 +0000
+++ b/mysql-test/r/innodb_mysql_lock2.result	2011-03-23 09:52:24 +0000
@@ -605,11 +605,11 @@ begin;
 # Acquire SR metadata lock on t1.
 select * from t1;
 i
+4
 1
+5
 2
 3
-4
-5
 # Switching to connection 'con1'.
 # Sending:
 alter table t1 rebuild partition p0;

=== modified file 'mysql-test/r/partition.result'
--- a/mysql-test/r/partition.result	2011-03-11 09:58:45 +0000
+++ b/mysql-test/r/partition.result	2011-03-23 09:52:24 +0000
@@ -1776,9 +1776,9 @@ insert into t1 values (18446744073709551
 (18446744073709551613), (18446744073709551612);
 select * from t1;
 a
+18446744073709551614
 18446744073709551612
 18446744073709551613
-18446744073709551614
 18446744073709551615
 select * from t1 where a = 18446744073709551615;
 a
@@ -1786,9 +1786,9 @@ a
 delete from t1 where a = 18446744073709551615;
 select * from t1;
 a
+18446744073709551614
 18446744073709551612
 18446744073709551613
-18446744073709551614
 drop table t1;
 CREATE TABLE t1 (
 num int(11) NOT NULL, cs int(11) NOT NULL)

=== modified file 'mysql-test/r/partition_explicit_prune.result'
--- a/mysql-test/r/partition_explicit_prune.result	2011-02-01 11:38:39 +0000
+++ b/mysql-test/r/partition_explicit_prune.result	2011-03-23 09:52:24 +0000
@@ -332,12 +332,12 @@ ERROR 42000: You have an error in your S
 SELECT * FROM t1 `PARTITION`;
 a	b
 -2	(pNeg-)subp0
-5	p0-9:subp3
-10	p10-99
 -3	pNeg(-subp1)
 -1	pNeg(-subp1)
+5	p0-9:subp3
 1	subp3
 3	subp3
+10	p10-99
 100	`p100-99999`(-subp6)
 1000	`p100-99999`(-subp6)
 101	`p100-99999`(-subp7)
@@ -346,12 +346,12 @@ ERROR 42000: You have an error in your S
 SELECT * FROM t1 AS `PARTITION`;
 a	b
 -2	(pNeg-)subp0
-5	p0-9:subp3
-10	p10-99
 -3	pNeg(-subp1)
 -1	pNeg(-subp1)
+5	p0-9:subp3
 1	subp3
 3	subp3
+10	p10-99
 100	`p100-99999`(-subp6)
 1000	`p100-99999`(-subp6)
 101	`p100-99999`(-subp7)
@@ -536,10 +536,10 @@ HANDLER_WRITE	127
 SELECT * FROM t1 PARTITION (pNeg, `p10-99`);
 a	b
 -2	(pNeg-)subp0
-10	p10-99
 -3	pNeg(-subp1)
 -1	pNeg(-subp1)
 -21	REPLACEd by REPLACE
+10	p10-99
 FLUSH STATUS;
 SELECT * FROM t1 PARTITION (pNeg, `p10-99`) INTO OUTFILE 'loadtest.txt';
 SELECT * FROM INFORMATION_SCHEMA.SESSION_STATUS
@@ -576,7 +576,7 @@ WHERE VARIABLE_NAME LIKE 'HANDLER_%' AND
 VARIABLE_NAME	VARIABLE_VALUE
 HANDLER_EXTERNAL_LOCK	6
 HANDLER_ROLLBACK	1
-HANDLER_WRITE	18
+HANDLER_WRITE	21
 # 6 locks (1 ha_partiiton + 2 ha_innobase) x 2 (lock+unlock)
 # 1 rollback
 SELECT * FROM t1 PARTITION (pNeg, `p10-99`);
@@ -950,12 +950,12 @@ SUBPARTITION BY HASH (a)
 SELECT * FROM t1;
 a	b
 -2	(pNeg-)subp0, Updated
-10	p10-99
+-222	`p100-99999`(-subp6), Updated from a = 100
 -21	REPLACEd by REPLACE
 1	subp3
 3	subp3
+10	p10-99
 1000	`p100-99999`(-subp6)
--222	`p100-99999`(-subp6), Updated from a = 100
 110	`p100-99999`(-subp7), Updated to 103, Updated to 110
 SHOW CREATE TABLE t2;
 Table	Create Table
@@ -981,9 +981,9 @@ SUBPARTITION BY HASH (a)
   SUBPARTITION subp7 ENGINE = InnoDB)) */
 SELECT * FROM t2;
 a	b
-10	p10-99
 1	subp3
 3	subp3
+10	p10-99
 1000	`p100-99999`(-subp6)
 110	`p100-99999`(-subp7), Updated to 103, Updated to 110
 SHOW CREATE TABLE t3;
@@ -995,11 +995,11 @@ t3	CREATE TABLE `t3` (
 SELECT * FROM t3;
 a	b
 -2	(pNeg-)subp0, Updated
+-222	`p100-99999`(-subp6), Updated from a = 100
 -21	REPLACEd by REPLACE
 1	subp3
 3	subp3
 1000	`p100-99999`(-subp6)
--222	`p100-99999`(-subp6), Updated from a = 100
 110	`p100-99999`(-subp7), Updated to 103, Updated to 110
 FLUSH STATUS;
 DELETE t1 PARTITION (pNeg), t3 FROM t1, t3

=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc	2011-03-17 11:48:04 +0000
+++ b/sql/opt_range.cc	2011-03-23 09:52:24 +0000
@@ -1179,7 +1179,9 @@ QUICK_SELECT_I::QUICK_SELECT_I()
 QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
                                        bool no_alloc, MEM_ROOT *parent_alloc,
                                        bool *create_error)
-  :free_file(0),cur_range(NULL),last_range(0),dont_free(0)
+  :free_file(0), cur_range(NULL), last_range(0),
+   mrr_flags(0), mrr_buf_size(0), mrr_buf_desc(NULL),
+   dont_free(0)
 {
   my_bitmap_map *bitmap;
   DBUG_ENTER("QUICK_RANGE_SELECT::QUICK_RANGE_SELECT");
@@ -1192,7 +1194,6 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
 
   /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
   mrr_buf_size= thd->variables.read_rnd_buff_size;
-  mrr_buf_desc= NULL;
 
   if (!no_alloc && !parent_alloc)
   {
@@ -1220,17 +1221,12 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T
 }
 
 
-void QUICK_RANGE_SELECT::need_sorted_output()
+void QUICK_RANGE_SELECT::need_sorted_output(bool sort)
 {
-  if (!(mrr_flags & HA_MRR_SORTED))
-  {
-    /*
-      Native implementation can't produce sorted output. We'll have to
-      switch to default
-    */
-    mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; 
-  }
-  mrr_flags |= HA_MRR_SORTED;
+  if (sort)
+    mrr_flags |= HA_MRR_SORTED;
+  else
+    mrr_flags &= ~HA_MRR_SORTED;
 }
 
 
@@ -8714,7 +8710,8 @@ int QUICK_RANGE_SELECT::reset()
   {
     if (in_ror_merged_scan)
       head->column_bitmaps_set_no_signal(&column_bitmap, &column_bitmap);
-    if ((error= file->ha_index_init(index,1)))
+    const bool sorted= (mrr_flags & HA_MRR_SORTED);
+    if ((error= file->ha_index_init(index, sorted)))
         DBUG_RETURN(error);
   }
 
@@ -8989,10 +8986,11 @@ int QUICK_RANGE_SELECT::get_next_prefix(
     last_range->make_min_endpoint(&start_key, prefix_length, keypart_map);
     last_range->make_max_endpoint(&end_key, prefix_length, keypart_map);
 
+    const bool sorted= (mrr_flags & HA_MRR_SORTED);
     result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0,
 				   last_range->max_keypart_map ? &end_key : 0,
                                    test(last_range->flag & EQ_RANGE),
-				   TRUE);
+				   sorted);
     if (last_range->flag == (UNIQUE_RANGE | EQ_RANGE))
       last_range= 0;			// Stop searching
 
@@ -9104,6 +9102,7 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUI
   */
   mrr_buf_desc= NULL;
   mrr_flags |= HA_MRR_USE_DEFAULT_IMPL;
+  mrr_flags |= HA_MRR_SORTED; // 'sorted' as internals use index_last/_prev
   mrr_buf_size= 0;
 
 
@@ -10584,12 +10583,20 @@ TRP_GROUP_MIN_MAX::make_quick(PARAM *par
     if (quick_prefix_records == HA_POS_ERROR)
       quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
     else
+    {
       /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
       quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                    index_tree,
-                                                   HA_MRR_USE_DEFAULT_IMPL, 0,
+                                                   HA_MRR_USE_DEFAULT_IMPL |
+                                                   HA_MRR_SORTED,
+                                                   0,
                                                    &quick->alloc);
-
+      if (!quick->quick_prefix_select)
+      {
+        delete quick;
+        DBUG_RETURN(NULL);
+      }
+    }
     /*
       Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
       attribute, and create an array of QUICK_RANGES to be used by the
@@ -10985,7 +10992,11 @@ int QUICK_GROUP_MIN_MAX_SELECT::reset(vo
   DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");
 
   head->set_keyread(TRUE); /* We need only the key attributes */
-  if ((result= file->ha_index_init(index,1)))
+  /*
+    Request ordered index access as usage of ::index_last(), 
+    ::index_first() within QUICK_GROUP_MIN_MAX_SELECT depends on it.
+  */
+  if ((result= file->ha_index_init(index, true)))
     DBUG_RETURN(result);
   if (quick_prefix_select && quick_prefix_select->reset())
     DBUG_RETURN(1);

=== modified file 'sql/opt_range.h'
--- a/sql/opt_range.h	2010-12-17 12:58:04 +0000
+++ b/sql/opt_range.h	2011-03-23 09:52:24 +0000
@@ -285,11 +285,15 @@ public:
   virtual bool clustered_pk_range() { return false; }
   
   /*
-    Request that this quick select produces sorted output. Not all quick
-    selects can do it, the caller is responsible for calling this function
-    only for those quick selects that can.
+    Request that this quick select produces sorted output if 'sort==true'.
+    Not all quick selects can provide sorted output, the caller is responsible 
+    for calling this function only for those quick selects that can.
+    Caller is allowed to later cancel its sorted request by setting 
+    'sort==false'. However, the implementation is allowed to provide sorted 
+    output even in this case if benificial, or required by implementation 
+    internals.
   */
-  virtual void need_sorted_output() = 0;
+  virtual void need_sorted_output(bool sort) = 0;
   enum {
     QS_TYPE_RANGE = 0,
     QS_TYPE_INDEX_MERGE = 1,
@@ -399,7 +403,7 @@ uint quick_range_seq_next(range_seq_t rs
 
 /*
   Quick select that does a range scan on a single key. The records are
-  returned in key order.
+  returned in key order if ::need_sorted_output(true) has been called.
 */
 class QUICK_RANGE_SELECT : public QUICK_SELECT_I
 {
@@ -465,7 +469,7 @@ public:
                      MEM_ROOT *parent_alloc, bool *create_error);
   ~QUICK_RANGE_SELECT();
   
-  void need_sorted_output();
+  void need_sorted_output(bool sort);
   int init();
   int reset(void);
   int get_next();
@@ -569,7 +573,7 @@ public:
   ~QUICK_INDEX_MERGE_SELECT();
 
   int  init();
-  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
+  void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -647,7 +651,7 @@ public:
   ~QUICK_ROR_INTERSECT_SELECT();
 
   int  init();
-  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
+  void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -718,7 +722,7 @@ public:
   ~QUICK_ROR_UNION_SELECT();
 
   int  init();
-  void need_sorted_output() { DBUG_ASSERT(0); /* Can't do it */ }
+  void need_sorted_output(bool sort) { DBUG_ASSERT(!sort); /* Can't do it */ }
   int  reset(void);
   int  get_next();
   bool reverse_sorted() { return false; }
@@ -860,7 +864,7 @@ public:
   void adjust_prefix_ranges();
   bool alloc_buffers();
   int init();
-  void need_sorted_output() { /* always do it */ }
+  void need_sorted_output(bool sort) { /* always do it */ }
   int reset();
   int get_next();
   bool reverse_sorted() { return false; }

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2011-03-17 12:45:19 +0000
+++ b/sql/sql_select.cc	2011-03-23 09:52:24 +0000
@@ -1467,7 +1467,7 @@ bool setup_semijoin_dups_elimination(JOI
            output is not supported.
         */
         if (tab->select && tab->select->quick)
-          tab->select->quick->need_sorted_output();
+          tab->select->quick->need_sorted_output(true);
 
         /* Calculate key length */
         keylen= 0;
@@ -2743,6 +2743,23 @@ JOIN::save_join_tab()
 
 
 /**
+  There may be a pending 'sorted' request on the specified 
+  'join_tab' which we now has decided we can ignore.
+*/
+static void
+disable_sorted_access(JOIN_TAB* join_tab)
+{
+  DBUG_ENTER("disable_sorted_access");
+  join_tab->sorted= 0;
+  if (join_tab->select && join_tab->select->quick)
+  {
+    join_tab->select->quick->need_sorted_output(false);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+/**
   Exec select.
 
   @todo
@@ -2921,7 +2938,7 @@ JOIN::exec()
         curr_join->const_tables != curr_join->tables && 
         curr_join->best_positions[curr_join->const_tables].sj_strategy 
           != SJ_OPT_LOOSE_SCAN)
-      curr_join->join_tab[curr_join->const_tables].sorted= 0;
+      disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]);
     if ((tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table, 0)))
     {
       error= tmp_error;
@@ -3087,7 +3104,7 @@ JOIN::exec()
       curr_join->group_list= 0;
       if (!curr_join->sort_and_group &&
           curr_join->const_tables != curr_join->tables)
-        curr_join->join_tab[curr_join->const_tables].sorted= 0;
+        disable_sorted_access(&curr_join->join_tab[curr_join->const_tables]);
       if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
 	  (tmp_error= do_select(curr_join, (List<Item> *) 0, curr_tmp_table,
 				0)))
@@ -11176,9 +11193,12 @@ make_join_readinfo(JOIN *join, ulonglong
 {
   uint i, jcl;
   bool statistics= test(!(join->select_options & SELECT_DESCRIBE));
-  bool sorted= 1;
   uint first_sjm_table= MAX_TABLES;
   uint last_sjm_table= MAX_TABLES;
+
+  /* First table sorted if ORDER or GROUP BY was specified */
+  bool sorted= (join->order || join->group_list);
+
   DBUG_ENTER("make_join_readinfo");
 
   if (setup_semijoin_dups_elimination(join, options, no_jbuf_after))
@@ -11195,12 +11215,8 @@ make_join_readinfo(JOIN *join, ulonglong
     tab->next_select=sub_select;		/* normal select */
     tab->use_join_cache= JOIN_CACHE::ALG_NONE;
     tab->cache_idx_cond= 0;
-    /* 
-      TODO: don't always instruct first table's ref/range access method to 
-      produce sorted output.
-    */
     tab->sorted= sorted;
-    sorted= 0;                                  // only first must be sorted
+    sorted= false;                              // only first must be sorted
     table->status=STATUS_NO_RECORD;
     pick_table_access_method (tab);
 
@@ -18179,6 +18195,7 @@ join_read_key2(JOIN_TAB *tab, TABLE *tab
   int error;
   if (!table->file->inited)
   {
+    DBUG_ASSERT(!tab->sorted);  // Don't expect sort req. for single row.
     table->file->ha_index_init(table_ref->key, tab->sorted);
   }
 
@@ -18488,7 +18505,7 @@ join_read_last(JOIN_TAB *tab)
   tab->read_record.index=tab->index;
   tab->read_record.record=table->record[0];
   if (!table->file->inited)
-    table->file->ha_index_init(tab->index, 1);
+    table->file->ha_index_init(tab->index, tab->sorted);
   if ((error= tab->table->file->ha_index_last(tab->table->record[0])))
     return report_error(table, error);
   return 0;
@@ -18512,7 +18529,7 @@ join_ft_read_first(JOIN_TAB *tab)
   TABLE *table= tab->table;
 
   if (!table->file->inited)
-    table->file->ha_index_init(tab->ref.key, 1);
+    table->file->ha_index_init(tab->ref.key, tab->sorted);
   table->file->ft_init();
 
   if ((error= table->file->ft_read(table->record[0])))
@@ -20290,7 +20307,7 @@ check_reverse_order:                  
       }
     }
     else if (select && select->quick)
-      select->quick->need_sorted_output();
+      select->quick->need_sorted_output(true);
 
   } // QEP has been modified
 
@@ -23197,9 +23214,22 @@ void select_describe(JOIN *join, bool ne
 
         if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE &&
             !(((QUICK_RANGE_SELECT*)(tab->select->quick))->mrr_flags &
-             HA_MRR_USE_DEFAULT_IMPL))
+             (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))) 
         {
-	  extra.append(STRING_WITH_LEN("; Using MRR"));
+          /*
+            During normal execution of a query, multi_range_read_init() is
+            called to initialize MRR. If HA_MRR_SORTED is set at this point,
+            multi_range_read_init() for any native MRR implementation will
+            revert to default MRR because they cannot produce sorted output
+            currently.
+            Calling multi_range_read_init() can potentially be costly, so it
+            is not done when executing an EXPLAIN. We therefore make the
+            assumption that HA_MRR_SORTED means no MRR. If some MRR native
+            implementation will support sorted output in the future, a
+            function "bool mrr_supports_sorted()" should be added in the
+            handler.
+          */
+          extra.append(STRING_WITH_LEN("; Using MRR"));
         }
         if (need_tmp_table)
         {


Attachment: [text/bzr-bundle] bzr/ole.john.aske@oracle.com-20110323095224-z84ht93cb8eb84f4.bundle
Thread
bzr commit into mysql-trunk branch (ole.john.aske:3311) Bug#11764737Ole John Aske23 Mar