List:Internals« Previous MessageNext Message »
From:Eric Jensen Date:June 6 2009 2:55am
Subject:Re: proposed design for UNION Order By optimization
View as plain text  
I now have a completed hack of a k-way merge implementation.  the  
patch is attached.  since i added a procedure and have a different  
version of autoconf locally, i had to run the following to build it:

patch -p0 < mysql-5.1.30.kway.patch; cd mysql-5.1.30; aclocal;  
automake; autoconf; ./configure; make

if you then do:

cd mysql-test; ./mysql-test-run --debug union

you'll see that  "records output by select_idx" which is logged in var/ 
log/master.trace shows we are not reading all of the rows from the  
component selects, only those necessary for the merge.

it's a hack in the sense that it:

1.  doesn't do the validation necessary to ensure component selects  
are sorted identically to the external one
2.  some of the validation that it does do isn't working (see the  
broken tests i added)
3.  doesn't show anything meaningful in explain
4.  doesn't make any attempt to automatically select this plan via the  
optimizer (relying on a query hint)

it is missing the following functionality for it to be efficient:

A.  take the value selected with select_idx and use it as a condition  
for the next join.  instead, it's just using limit/offset
B.  it doesn't seem to be using the btree index i create on the  
temporary table for either the select_idx lookups or the final sort

anyone's help figuring out / adding any of these things would be  
appreciated.  the patch is attached.

thanks,
eric

diff -rub mysql-5.1.30/client/mysql.cc mysql-5.1.30.kway/client/mysql.cc
--- mysql-5.1.30/client/mysql.cc        2008-11-14 08:34:16.000000000  
-0800
+++ mysql-5.1.30.kway/client/mysql.cc   2009-04-13 18:58:10.000000000  
-0700
@@ -692,6 +692,7 @@
    { "SQL_NO_CACHE", 0, 0, 0, ""},
    { "SQL_SMALL_RESULT", 0, 0, 0, ""},
    { "SQL_THREAD", 0, 0, 0, ""},
+  { "SQL_ASSUME_SORTED_UNION", 0, 0, 0, ""},
    { "SQL_TSI_FRAC_SECOND", 0, 0, 0, ""},
    { "SQL_TSI_SECOND", 0, 0, 0, ""},
    { "SQL_TSI_MINUTE", 0, 0, 0, ""},
diff -rub mysql-5.1.30/libmysqld/Makefile.am mysql-5.1.30.kway/ 
libmysqld/Makefile.am
--- mysql-5.1.30/libmysqld/Makefile.am  2008-11-14 08:34:37.000000000  
-0800
+++ mysql-5.1.30.kway/libmysqld/Makefile.am     2009-05-11  
19:44:40.000000000 -0700
@@ -61,7 +61,7 @@
         opt_sum.cc procedure.cc records.cc sql_acl.cc \
         sql_load.cc discover.cc sql_locale.cc \
         sql_profile.cc \
-       sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc \
+       sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc  
procedure_intholder.cc \
         sql_crypt.cc sql_db.cc sql_delete.cc sql_error.cc  
sql_insert.cc \
         sql_lex.cc sql_list.cc sql_manager.cc sql_map.cc \
         scheduler.cc sql_connect.cc sql_parse.cc \
diff -rub mysql-5.1.30/mysql-test/r/union.result mysql-5.1.30.kway/ 
mysql-test/r/union.result
--- mysql-5.1.30/mysql-test/r/union.result      2008-11-14  
09:31:01.000000000 -0800
+++ mysql-5.1.30.kway/mysql-test/r/union.result 2009-06-05  
18:53:48.000000000 -0700
@@ -474,6 +474,84 @@
  select a from t1 union select a from t2 order by t2.a;
  ERROR 42S22: Unknown column 't2.a' in 'order clause'
  drop table t1,t2;
+create table t1 (a int);
+insert into t1 values (1),(2),(3);
+create table t2 (a int);
+insert into t2 values (3),(4),(5);
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 1) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 1) ORDER BY a LIMIT 1;
+a
+1
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 3) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 3) ORDER BY a LIMIT 3;
+a
+1
+2
+3
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a DESC LIMIT 3)  
UNION (SELECT * FROM t2 ORDER BY a DESC LIMIT 3) ORDER BY a DESC LIMIT  
3;
+a
+5
+4
+3
+drop table t1,t2;
+create table t1 (a int);
+insert into t1 values (1),(3),(5),(6);
+create table t2 (a int);
+insert into t2 values (2),(3),(4),(7);
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+a
+1
+2
+3
+4
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) LIMIT 4;
+a
+1
+3
+5
+6
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a;
+a
+1
+2
+3
+4
+5
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+(SELECT SQL_ASSUME_SORTED_UNION SQL_CALC_FOUND_ROWS * FROM t1 ORDER  
by a LIMIT 4) UNION (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a  
LIMIT 4;
+a
+1
+2
+3
+4
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
ALL (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+a
+1
+2
+3
+3
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+drop table t1,t2;
+create table t1 (s char(200));
+insert into t1 values (repeat("1",200)),(repeat("3",200)),(repeat("5", 
200));
+create table t2 select * from t1;
+insert into t1 values (repeat("2",200)),(repeat("4",200)),(repeat("6", 
200)),(repeat("7",200));
+set local tmp_table_size=1024;
+select count(*) from ((select SQL_ASSUME_SORTED_UNION * from t1 order  
by 1) union (select * from t2 order by 1) order by 1 limit 7) b;
+count(*)
+7
+select count(*) from t1;
+count(*)
+7
+select count(*) from t2;
+count(*)
+3
+drop table t1,t2;
+set local tmp_table_size=default;
  select length(version()) > 1 as `*` UNION select 2;
  *
  1
diff -rub mysql-5.1.30/mysql-test/t/union.test mysql-5.1.30.kway/mysql- 
test/t/union.test
--- mysql-5.1.30/mysql-test/t/union.test        2008-11-14  
09:30:23.000000000 -0800
+++ mysql-5.1.30.kway/mysql-test/t/union.test   2009-06-05  
18:59:30.000000000 -0700
@@ -319,6 +319,47 @@
  drop table t1,t2;

  #
+# Test k-way merge
+#
+
+create table t1 (a int);
+insert into t1 values (1),(2),(3);
+create table t2 (a int);
+insert into t2 values (3),(4),(5);
+
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 1) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 1) ORDER BY a LIMIT 1;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 3) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 3) ORDER BY a LIMIT 3;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a DESC LIMIT 3)  
UNION (SELECT * FROM t2 ORDER BY a DESC LIMIT 3) ORDER BY a DESC LIMIT  
3;
+
+drop table t1,t2;
+create table t1 (a int);
+insert into t1 values (1),(3),(5),(6);
+create table t2 (a int);
+insert into t2 values (2),(3),(4),(7);
+
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+
+# All of these should ignore SQL_ASSUME_SORTED_UNION
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) LIMIT 4;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a;
+(SELECT SQL_ASSUME_SORTED_UNION SQL_CALC_FOUND_ROWS * FROM t1 ORDER  
by a LIMIT 4) UNION (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a  
LIMIT 4;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
ALL (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+
+drop table t1,t2;
+
+# conversion memory->disk table with merge
+create table t1 (s char(200));
+insert into t1 values (repeat("1",200)),(repeat("3",200)),(repeat("5", 
200));
+create table t2 select * from t1;
+insert into t1 values (repeat("2",200)),(repeat("4",200)),(repeat("6", 
200)),(repeat("7",200));
+set local tmp_table_size=1024;
+select count(*) from ((select SQL_ASSUME_SORTED_UNION * from t1 order  
by 1) union (select * from t2 order by 1) order by 1 limit 7) b;
+select count(*) from t1;
+select count(*) from t2;
+drop table t1,t2;
+set local tmp_table_size=default;
+
+#
  # Problem with alias '*' (BUG #1249)
  #

diff -rub mysql-5.1.30/sql/Makefile.am mysql-5.1.30.kway/sql/Makefile.am
--- mysql-5.1.30/sql/Makefile.am        2008-11-14 08:37:08.000000000  
-0800
+++ mysql-5.1.30.kway/sql/Makefile.am   2009-05-11 19:44:26.000000000  
-0700
@@ -104,7 +104,7 @@
                         ha_partition.cc \
                         sql_db.cc sql_table.cc sql_rename.cc  
sql_crypt.cc \
                         sql_load.cc mf_iocache.cc field_conv.cc  
sql_show.cc \
-                       sql_udf.cc sql_analyse.cc sql_analyse.h  
sql_cache.cc \
+                       sql_udf.cc sql_analyse.cc sql_analyse.h  
sql_cache.cc procedure_intholder.cc \
                         slave.cc sql_repl.cc rpl_filter.cc  
rpl_tblmap.cc \
                         rpl_utility.cc rpl_injector.cc rpl_rli.cc  
rpl_mi.cc \
                         rpl_reporting.cc \
diff -rub mysql-5.1.30/sql/item_subselect.cc mysql-5.1.30.kway/sql/ 
item_subselect.cc
--- mysql-5.1.30/sql/item_subselect.cc  2008-11-14 08:37:11.000000000  
-0800
+++ mysql-5.1.30.kway/sql/item_subselect.cc     2008-12-12  
10:37:52.000000000 -0800
@@ -1955,7 +1955,7 @@
        }
      }

-    join->exec();
+    join->exec(false);

      /* Enable the optimizations back */
      for (JOIN_TAB **ptab= changed_tabs; ptab != last_changed_tab;  
ptab++)
diff -rub mysql-5.1.30/sql/item_sum.cc mysql-5.1.30.kway/sql/item_sum.cc
--- mysql-5.1.30/sql/item_sum.cc        2008-11-14 08:37:11.000000000  
-0800
+++ mysql-5.1.30.kway/sql/item_sum.cc   2009-06-04 13:18:08.000000000  
-0700
@@ -2593,7 +2593,7 @@
    if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*)  
0, 1,
                                 0,
                                 (select_lex->options | thd->options),
-                               HA_POS_ERROR, (char*)"")))
+                               HA_POS_ERROR, (char*)"", false)))
      return TRUE;
    table->file->extra(HA_EXTRA_NO_ROWS);                // Don't  
update rows
    table->no_rows=1;
@@ -3461,7 +3461,7 @@
    if (!(table= create_tmp_table(thd, tmp_table_param, all_fields,
                                  (ORDER*) 0, 0, TRUE,
                                  (select_lex->options | thd->options),
-                                HA_POS_ERROR, (char*) "")))
+                                HA_POS_ERROR, (char*) "", false)))
      DBUG_RETURN(TRUE);
    table->file->extra(HA_EXTRA_NO_ROWS);
    table->no_rows= 1;
diff -rub mysql-5.1.30/sql/lex.h mysql-5.1.30.kway/sql/lex.h
--- mysql-5.1.30/sql/lex.h      2008-11-14 08:37:12.000000000 -0800
+++ mysql-5.1.30.kway/sql/lex.h 2009-04-13 19:44:30.000000000 -0700
@@ -498,6 +498,7 @@
    { "SQL_NO_CACHE",    SYM(SQL_NO_CACHE_SYM)},
    { "SQL_SMALL_RESULT", SYM(SQL_SMALL_RESULT)},
    { "SQL_THREAD",      SYM(SQL_THREAD)},
+  { "SQL_ASSUME_SORTED_UNION", SYM(SQL_ASSUME_SORTED_UNION)},
    { "SQL_TSI_FRAC_SECOND", SYM(FRAC_SECOND_SYM)},
    { "SQL_TSI_SECOND",   SYM(SECOND_SYM)},
    { "SQL_TSI_MINUTE",   SYM(MINUTE_SYM)},
diff -rub mysql-5.1.30/sql/mysql_priv.h mysql-5.1.30.kway/sql/ 
mysql_priv.h
--- mysql-5.1.30/sql/mysql_priv.h       2008-11-14 08:37:13.000000000  
-0800
+++ mysql-5.1.30.kway/sql/mysql_priv.h  2009-04-13 18:56:19.000000000  
-0700
@@ -476,7 +476,7 @@
  */
  #define TMP_TABLE_FORCE_MYISAM          (ULL(1) << 32)
  #define OPTION_PROFILING                (ULL(1) << 33)
-
+#define SELECT_ASSUME_SORTED_UNION      (ULL(1) << 34) // SELECT, user


  /**
diff -rub mysql-5.1.30/sql/procedure.cc mysql-5.1.30.kway/sql/ 
procedure.cc
--- mysql-5.1.30/sql/procedure.cc       2008-11-14 08:37:14.000000000  
-0800
+++ mysql-5.1.30.kway/sql/procedure.cc  2009-05-11 18:08:15.000000000  
-0700
@@ -23,6 +23,7 @@
  #include "mysql_priv.h"
  #include "procedure.h"
  #include "sql_analyse.h"                       // Includes procedure
+#include "procedure_intholder.h"
  #ifdef USE_PROC_RANGE
  #include "proc_range.h"
  #endif
@@ -37,6 +38,7 @@
    { "split_count",proc_count_range_init },     // Internal procedure  
at TCX
    { "matris_ranges",proc_matris_range_init },  // Internal procedure  
at TCX
  #endif
+  { "intholder",proc_intholder_init },         // Return an int that  
is internally set.  FIXME:  make private somehow
    { "analyse",proc_analyse_init }              // Analyse a result
  };

diff -rub mysql-5.1.30/sql/procedure_intholder.cc mysql-5.1.30/sql/ 
procedure_intholder.cc
--- mysql-5.1.30/sql/procedure_intholder.cc     1969-12-31  
16:00:00.000000000 -0800
+++ mysql-5.1.30/sql/procedure_intholder.cc     2009-06-05  
12:09:51.000000000 -0700
@@ -0,0 +1,75 @@
+/* procedure that returns an internally set int */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation                         // gcc: Class  
implementation
+#endif
+
+#include "mysql_priv.h"
+#include "procedure.h"
+
+#include "procedure_intholder.h"
+
+// Create and register the actual procedure object
+Procedure *proc_intholder_init(THD *thd, ORDER *param,
+ 
                                                                               
select_result 
  *result,
+ 
                                                                                List 
<Item> &field_list)
+{
+    DBUG_ENTER("proc_intholder_init");
+
+    proc_intholder *pc = new proc_intholder(result);
+
+    DBUG_RETURN(pc);
+}
+
+// modify the result set column types here
+bool proc_intholder::change_columns(List<Item> &field_list)
+{
+    DBUG_ENTER("proc_intholder::change_columns");
+
+    // and attach it to the column list
+    field_list.push_front(value_column);
+
+    DBUG_RETURN(0);
+}
+
+// not used here and not in ANALYSE() either,
+// probably called for each row before grouping
+void proc_intholder::add(void)
+{
+    DBUG_ENTER("proc_intholder::add");
+
+    DBUG_VOID_RETURN;
+}
+
+// not used here and not in ANALYSE() either,
+// probably called whenever a group rows were fully processed
+void proc_intholder::end_group(void)
+{
+    DBUG_ENTER("proc_intholder::end_group");
+
+    DBUG_VOID_RETURN;
+}
+
+// called before sending result rows, you may modify the results here
+int proc_intholder::send_row(List<Item> &field_list  
__attribute__((unused)))
+{
+    DBUG_ENTER("proc_intholder::send_row");
+
+    // set value in result row
+    value_column->set(value);
+
+    // now send the modified results
+    if (result->send_data(field_list))
+       DBUG_RETURN(-1);
+
+      DBUG_RETURN(0);
+    }
+
+// called after sending all result rows,
+// you may add summary rows here as ANALYSE() does
+int proc_intholder::end_of_records(void)
+{
+    DBUG_ENTER("proc_intholder::end_of_records");
+
+    DBUG_RETURN(0);
+}
diff -Nrub mysql-5.1.30/sql/procedure_intholder.h mysql-5.1.30/sql/ 
procedure_intholder.h
--- mysql-5.1.30/sql/procedure_intholder.h      1969-12-31  
16:00:00.000000000 -0800
+++ mysql-5.1.30/sql/procedure_intholder.h      2009-06-05  
12:09:51.000000000 -0700
@@ -0,0 +1,52 @@
+/* procedure that returns an internally set int */
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface                              /* gcc class  
implementation */
+#endif
+
+#ifndef PROCEDURE_INTHOLDER_H
+#define PROCEDURE_INTHOLDER_H
+
+Procedure *proc_intholder_init(THD *thd, ORDER *param,
+                                                    select_result  
*result,
+                                                    List<Item>  
&field_list);
+
+
+class proc_intholder: public Procedure
+{
+  protected:
+    longlong value;
+    Item_proc_int *value_column;
+    ORDER param_fields_value;
+
+  public:
+
+    proc_intholder(select_result *res) :Procedure(res, 0), value(-1)
+    {
+      // create a new column item
+      value_column = new Item_proc_int("IntHolder");
+      param_fields_value.item= (Item**) &value_column;
+      param_fields_value.next= NULL;
+      param_fields = &param_fields_value;
+    }
+
+    ~proc_intholder()
+    {
+      // Items are deleted by Query_arena::free_items
+      //delete value_column;
+    }
+
+    void set(longlong nr) { value=nr; }
+
+    virtual void add(void);
+    virtual bool change_columns(List<Item> &fields);
+    virtual int  send_row(List<Item> &fields);
+    virtual void end_group(void);
+    virtual int end_of_records(void);
+
+    friend Procedure *proc_intholder_init(THD *thd, ORDER *param,
+ 
                                                                             
select_result 
  *result,
+ 
                                                                              List 
<Item> &field_list);
+};
+
+#endif
diff -rub mysql-5.1.30/sql/share/errmsg.txt mysql-5.1.30.kway/sql/ 
share/errmsg.txt
--- mysql-5.1.30/sql/share/errmsg.txt   2008-11-14 08:37:17.000000000  
-0800
+++ mysql-5.1.30.kway/sql/share/errmsg.txt      2009-06-05  
11:35:49.000000000 -0700
@@ -6139,3 +6139,7 @@

  ER_DELAYED_NOT_SUPPORTED
    eng "DELAYED option not supported for table '%-.192s'"
+
+ER_WARN_INVALID_SQL_ASSUME_SORTED_UNION
+  eng "Invalid use of SQL_ASSUME_SORTED_UNION ignored"
+
diff -rub mysql-5.1.30/sql/sql_class.cc mysql-5.1.30.kway/sql/ 
sql_class.cc
--- mysql-5.1.30/sql/sql_class.cc       2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_class.cc  2009-05-26 19:04:52.000000000  
-0700
@@ -2052,6 +2052,49 @@
  }


+int select_singlerow_internal::prepare(List<Item> &list,  
SELECT_LEX_UNIT *u)
+{
+  Item *sel_item;
+  List_iterator_fast<Item> li(list);
+
+  unit= u;
+
+  if (!(row= (Item_cache**)  
sql_alloc(sizeof(Item_cache*)*list.elements)))
+    return 1;
+
+  for (uint i= 0; (sel_item= li++); i++)
+  {
+    if (!(row[i]= Item_cache::get_cache(sel_item)))
+      return 1;
+    row[i]->setup(sel_item);
+  }
+
+  return 0;
+}
+
+
+bool select_singlerow_internal::send_data(List<Item> &items)
+{
+  DBUG_ENTER("select_singlerow_internal::send_data");
+  if (assigned())
+  {
+    my_message(ER_SUBQUERY_NO_1_ROW, ER(ER_SUBQUERY_NO_1_ROW), MYF(0));
+    DBUG_RETURN(1);
+  }
+  if (unit->offset_limit_cnt)
+  {                                      // Using limit offset,count
+    unit->offset_limit_cnt--;
+    DBUG_RETURN(0);
+  }
+  List_iterator_fast<Item> li(items);
+  Item *val_item;
+  for (uint i= 0; (val_item= li++); i++)
+    row[i]->store(val_item);
+  assigned(1);
+  DBUG_RETURN(0);
+}
+
+
  select_subselect::select_subselect(Item_subselect *item_arg)
  {
    item= item_arg;
diff -rub mysql-5.1.30/sql/sql_class.h mysql-5.1.30.kway/sql/sql_class.h
--- mysql-5.1.30/sql/sql_class.h        2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_class.h   2009-05-26 18:58:37.000000000  
-0700
@@ -2599,7 +2599,25 @@

    bool create_result_table(THD *thd, List<Item> *column_types,
                             bool is_distinct, ulonglong options,
-                           const char *alias);
+                           const char *alias,
+                           bool kway_merge);
+};
+
+/* Stores single row of values from internal select, used by kway  
merge */
+class select_singlerow_internal :public select_result_interceptor
+{
+  my_bool value_assigned; /* value already assigned */
+protected:
+  Item_cache **row;
+public:
+  select_singlerow_internal() :value_assigned(0) {}
+  int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
+  bool send_data(List<Item> &items);
+  bool send_eof() { return 0; };
+  Item* element_index(uint i) { return my_reinterpret_cast(Item*) 
(row[i]); }
+  Item** addr(uint i) { return (Item**)row + i; }
+  bool assigned() { return value_assigned; }
+  void assigned(bool a) { value_assigned= a; }
  };

  /* Base subselect interface class */
diff -rub mysql-5.1.30/sql/sql_cursor.cc mysql-5.1.30.kway/sql/ 
sql_cursor.cc
--- mysql-5.1.30/sql/sql_cursor.cc      2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_cursor.cc 2009-05-08 13:18:54.000000000  
-0700
@@ -706,7 +706,7 @@
  {
    DBUG_ASSERT(table == 0);
    if (create_result_table(unit->thd, unit->get_unit_column_types(),
-                          FALSE, thd->options |  
TMP_TABLE_ALL_COLUMNS, ""))
+                          FALSE, thd->options |  
TMP_TABLE_ALL_COLUMNS, "", FALSE))
      return TRUE;

    materialized_cursor= new (&table->mem_root)
diff -rub mysql-5.1.30/sql/sql_derived.cc mysql-5.1.30.kway/sql/ 
sql_derived.cc
--- mysql-5.1.30/sql/sql_derived.cc     2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_derived.cc        2009-05-08  
13:00:09.000000000 -0700
@@ -168,7 +168,8 @@
      */
      if ((res= derived_result->create_result_table(thd, &unit->types,  
FALSE,
                                                   create_options,
-                                                 orig_table_list- 
 >alias)))
+                                                 orig_table_list- 
 >alias,
+                                                 FALSE)))
        goto exit;

      table= derived_result->table;
diff -rub mysql-5.1.30/sql/sql_lex.cc mysql-5.1.30.kway/sql/sql_lex.cc
--- mysql-5.1.30/sql/sql_lex.cc 2008-11-14 08:37:20.000000000 -0800
+++ mysql-5.1.30.kway/sql/sql_lex.cc    2009-06-05 16:06:22.000000000  
-0700
@@ -1516,6 +1516,7 @@
    fake_select_lex= 0;
    cleaned= 0;
    item_list.empty();
+  kway_select_idx_item_list.empty();
    describe= 0;
    found_rows_for_union= 0;
  }
@@ -1523,6 +1524,8 @@
  void st_select_lex::init_query()
  {
    st_select_lex_node::init_query();
+  suspended_select_limit_cnt= HA_POS_ERROR;
+  suspended_offset_limit_cnt= 0;
    table_list.empty();
    top_join_list.empty();
    join_list= &top_join_list;
diff -rub mysql-5.1.30/sql/sql_lex.h mysql-5.1.30.kway/sql/sql_lex.h
--- mysql-5.1.30/sql/sql_lex.h  2008-11-14 08:37:20.000000000 -0800
+++ mysql-5.1.30.kway/sql/sql_lex.h     2009-06-03 17:28:14.000000000  
-0700
@@ -490,6 +490,8 @@

    // list of fields which points to temporary table for union
    List<Item> item_list;
+  // list of fields which points to temporary table for union,  
including select_idx
+  List<Item> kway_select_idx_item_list;
    /*
      list of types of items inside union (used for union & derived  
tables)

@@ -552,6 +554,7 @@
    void set_limit(st_select_lex *values);
    void set_thd(THD *thd_arg) { thd= thd_arg; }
    inline bool is_union ();
+  inline bool kway_merge ();

    friend void lex_start(THD *thd);
    friend int subselect_union_engine::exec();
@@ -590,6 +593,9 @@
    List<Item_func_match> *ftfunc_list;
    List<Item_func_match> ftfunc_list_alloc;
    JOIN *join; /* after JOIN::prepare it is pointer to corresponding  
JOIN */
+  JOIN *suspended_join; /* after JOIN::exec(true), pointer to  
suspended JOIN */
+  /* after JOIN::exec(true), suspended LIMIT counters */
+  ha_rows suspended_select_limit_cnt, suspended_offset_limit_cnt;
    List<TABLE_LIST> top_join_list; /* join list of the top  
level          */
    List<TABLE_LIST> *join_list;    /* list for the currently parsed  
join  */
    TABLE_LIST *embedding;          /* table embedding to the above  
list   */
diff -rub mysql-5.1.30/sql/sql_select.cc mysql-5.1.30.kway/sql/ 
sql_select.cc
--- mysql-5.1.30/sql/sql_select.cc      2008-11-14 08:37:21.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_select.cc 2009-06-04 14:03:33.000000000  
-0700
@@ -123,7 +123,7 @@
  static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM  
*param,
                                     ulonglong options);
  static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
-                    Procedure *proc);
+                    Procedure *proc,bool suspendable);

  static enum_nested_loop_state
  evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
@@ -1432,7 +1432,7 @@
                            group_list && simple_group,
                            select_options,
                             tmp_rows_limit,
-                          (char *) "")))
+                          (char *) "", false)))
                 {
        DBUG_RETURN(1);
      }
@@ -1620,6 +1620,11 @@
  /**
    Exec select.

+  SYNOPSIS
+    exec()
+      suspendable  Pass to final do_select sweep to indicate it  
should not call
+                   sub_select.  Incompatible with cursored select.
+
    @todo
      Note, that create_sort_index calls test_if_skip_sort_order and may
      finally replace sorting with index scan if there is a LIMIT  
clause in
@@ -1627,24 +1632,29 @@

    @todo
      When can we have here thd->net.report_error not zero?
+
+  RETURN Sending data join that was suspended if suspendable
  */
-void
-JOIN::exec()
+JOIN*
+JOIN::exec(bool suspendable)
  {
+  JOIN *suspended_join= NULL;
    List<Item> *columns_list= &fields_list;
    int      tmp_error;
    DBUG_ENTER("JOIN::exec");

    thd_proc_info(thd, "executing");
    error= 0;
+
    if (procedure)
    {
      procedure_fields_list= fields_list;
+
      if (procedure->change_columns(procedure_fields_list) ||
         result->prepare(procedure_fields_list, unit))
      {
        thd->limit_found_rows= thd->examined_row_count= 0;
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      }
      columns_list= &procedure_fields_list;
    }
@@ -1688,7 +1698,7 @@
      /* Single select (without union) always returns 0 or 1 row */
      thd->limit_found_rows= send_records;
      thd->examined_row_count= 0;
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }
    /*
      Don't reset the found rows count if there're no tables as
@@ -1706,12 +1716,12 @@
                             select_options,
                             zero_result_cause,
                             having);
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }

    if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
        get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);

    if (select_options & SELECT_DESCRIBE)
    {
@@ -1744,7 +1754,7 @@
                     order != 0 && !skip_sort_order,
                     select_distinct,
                      !tables ? "No tables used" : NullS);
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }

    JOIN *curr_join= this;
@@ -1779,10 +1789,10 @@
      if (!curr_join->sort_and_group &&
          curr_join->const_tables != curr_join->tables)
        curr_join->join_tab[curr_join->const_tables].sorted= 0;
-    if ((tmp_error= do_select(curr_join, (List<Item> *) 0,  
curr_tmp_table, 0)))
+    if ((tmp_error= do_select(curr_join, (List<Item> *) 0,  
curr_tmp_table, 0, false)))
      {
        error= tmp_error;
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      }
      curr_tmp_table->file->info(HA_STATUS_VARIABLE);

@@ -1800,14 +1810,14 @@
         if (change_to_use_tmp_fields(thd, items1,
                                      tmp_fields_list1, tmp_all_fields1,
                                      fields_list.elements, all_fields))
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
        }
        else
        {
         if (change_refs_to_tmp_fields(thd, items1,
                                       tmp_fields_list1,  
tmp_all_fields1,
                                       fields_list.elements,  
all_fields))
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
        }
        curr_join->tmp_all_fields1= tmp_all_fields1;
        curr_join->tmp_fields_list1= tmp_fields_list1;
@@ -1862,7 +1872,7 @@
        /* Free first data from old join */
        curr_join->join_free();
        if (make_simple_join(curr_join, curr_tmp_table))
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        calc_group_buffer(curr_join, group_list);
        count_field_types(select_lex, &curr_join->tmp_table_param,
                         curr_join->tmp_all_fields1,
@@ -1895,8 +1905,8 @@
                                                 !curr_join->group_list,
                                                 1, curr_join- 
 >select_options,
                                                 HA_POS_ERROR,
-                                               (char *) "")))
-         DBUG_VOID_RETURN;
+                                               (char *) "", false)))
+         DBUG_RETURN(suspended_join);
         curr_join->exec_tmp_table2= exec_tmp_table2;
        }
        if (curr_join->group_list)
@@ -1904,13 +1914,13 @@
         thd_proc_info(thd, "Creating sort index");
         if (curr_join->join_tab == join_tab && save_join_tab())
         {
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
         }
         if (create_sort_index(thd, curr_join, curr_join->group_list,
                               HA_POS_ERROR, HA_POS_ERROR, FALSE) ||
             make_group_fields(this, curr_join))
         {
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
         }
          sortorder= curr_join->sortorder;
        }
@@ -1934,17 +1944,17 @@
        }
        if (curr_join->make_sum_func_list(*curr_all_fields,  
*curr_fields_list,
                                         1, TRUE))
-        DBUG_VOID_RETURN;
+        DBUG_RETURN(suspended_join);
        curr_join->group_list= 0;
        if (!curr_join->sort_and_group &&
            curr_join->const_tables != curr_join->tables)
          curr_join->join_tab[curr_join->const_tables].sorted= 0;
        if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
           (tmp_error= do_select(curr_join, (List<Item> *) 0,  
curr_tmp_table,
-                               0)))
+                               0, false)))
        {
         error= tmp_error;
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        }
        end_read_record(&curr_join->join_tab->read_record);
        curr_join->const_tables= curr_join->tables; // Mark free for  
cleanup()
@@ -1957,7 +1967,7 @@
         if (change_to_use_tmp_fields(thd, items2,
                                      tmp_fields_list2, tmp_all_fields2,
                                      fields_list.elements,  
tmp_all_fields1))
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
         curr_join->tmp_fields_list2= tmp_fields_list2;
         curr_join->tmp_all_fields2= tmp_all_fields2;
        }
@@ -1979,13 +1989,13 @@
         curr_join->tmp_having->update_used_tables();
        if (remove_duplicates(curr_join, curr_tmp_table,
                             *curr_fields_list, curr_join->tmp_having))
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        curr_join->tmp_having=0;
        curr_join->select_distinct=0;
      }
      curr_tmp_table->reginfo.lock_type= TL_UNLOCK;
      if (make_simple_join(curr_join, curr_tmp_table))
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      calc_group_buffer(curr_join, curr_join->group_list);
      count_field_types(select_lex, &curr_join->tmp_table_param,
                        *curr_all_fields, 0);
@@ -2000,7 +2010,7 @@
    {
      if (make_group_fields(this, curr_join))
      {
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      }
      if (!items3)
      {
@@ -2032,7 +2042,7 @@
                                       1, TRUE) ||
          setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
          thd->is_fatal_error)
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
    }
    if (curr_join->group_list || curr_join->order)
    {
@@ -2055,7 +2065,7 @@
        {
         if (!curr_table->select)
           if (!(curr_table->select= new SQL_SELECT))
-           DBUG_VOID_RETURN;
+           DBUG_RETURN(suspended_join);
         if (!curr_table->select->cond)
           curr_table->select->cond= sort_table_cond;
         else                                    // This should never  
happen
@@ -2063,7 +2073,7 @@
           if (!(curr_table->select->cond=
                 new Item_cond_and(curr_table->select->cond,
                                   sort_table_cond)))
-           DBUG_VOID_RETURN;
+           DBUG_RETURN(suspended_join);
           /*
             Item_cond_and do not need fix_fields for execution, its  
parameters
             are fixed or do not need fix_fields, too
@@ -2111,7 +2121,7 @@
        }
        if (curr_join->join_tab == join_tab && save_join_tab())
        {
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        }
        /*
         Here we sort rows for ORDER BY/GROUP BY clause, if the  
optimiser
@@ -2129,7 +2139,7 @@
                             (select_options & OPTION_FOUND_ROWS ?
                              HA_POS_ERROR : unit->select_limit_cnt),
                              curr_join->group_list ? TRUE : FALSE))
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        sortorder= curr_join->sortorder;
        if (curr_join->const_tables != curr_join->tables &&
            !curr_join->join_tab[curr_join->const_tables].table- 
 >sort.io_cache)
@@ -2147,7 +2157,7 @@
    if (thd->is_error())
    {
      error= thd->is_error();
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }
    curr_join->having= curr_join->tmp_having;
    curr_join->fields= curr_fields_list;
@@ -2156,6 +2166,10 @@
    if (is_top_level_join() && thd->cursor && tables != const_tables)
    {
      /*
+      Shouldn't be trying to do a k-way merge for a cursored result
+    */
+    DBUG_ASSERT(!suspendable);
+    /*
        We are here if this is JOIN::exec for the last select of the  
main unit
        and the client requested to open a cursor.
        We check that not all tables are constant because this case is  
not
@@ -2179,7 +2193,9 @@
      result->send_fields((procedure ? curr_join- 
 >procedure_fields_list :
                           *curr_fields_list),
                          Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
-    error= do_select(curr_join, curr_fields_list, NULL, procedure);
+    error= do_select(curr_join, curr_fields_list, NULL, procedure,  
suspendable);
+    if (error == NESTED_LOOP_SUSPENDED)
+      suspended_join= curr_join;
      thd->limit_found_rows= curr_join->send_records;
    }

@@ -2199,7 +2215,7 @@
         exec_tmp_table1 || exec_tmp_table2))
      set_items_ref_array(items0);

-  DBUG_VOID_RETURN;
+  DBUG_RETURN(suspended_join);
  }


@@ -2358,7 +2374,7 @@
    if (thd->is_error())
      goto err;

-  join->exec();
+  join->exec(false);

    if (thd->cursor && thd->cursor->is_open())
    {
@@ -9602,7 +9618,7 @@
  create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                  ORDER *group, bool distinct, bool save_sum_fields,
                  ulonglong select_options, ha_rows rows_limit,
-                char *table_alias)
+                char *table_alias, bool force_btree_key)
  {
    MEM_ROOT *mem_root_save, own_root;
    TABLE *table;
@@ -10120,7 +10136,7 @@
      keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
      keyinfo->key_length=0;
      keyinfo->rec_per_key=0;
-    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+    keyinfo->algorithm= force_btree_key ? HA_KEY_ALG_BTREE :  
HA_KEY_ALG_UNDEF;
      keyinfo->name= (char*) "group_key";
      ORDER *cur_group= group;
      for (; cur_group ; cur_group= cur_group->next, key_part_info++)
@@ -10199,7 +10215,7 @@
      keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
      keyinfo->key_length=(uint16) reclength;
      keyinfo->name= (char*) "distinct_key";
-    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+    keyinfo->algorithm= force_btree_key ? HA_KEY_ALG_BTREE :  
HA_KEY_ALG_UNDEF;
      keyinfo->rec_per_key=0;
      if (null_pack_length)
      {
@@ -10739,6 +10755,10 @@
  /**
    Make a join of all tables and write it on socket or to table.

+  SYNOPSIS
+    do_select()
+      suspendable  Do not call sub_select.  Incompatible with  
cursored select.
+
    @retval
      0  if ok
    @retval
@@ -10748,7 +10768,11 @@
  */

  static int
-do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure  
*procedure)
+do_select(JOIN *join,
+         List<Item> *fields,
+         TABLE *table,
+         Procedure *procedure,
+         bool suspendable)
  {
    int rc= 0;
    enum_nested_loop_state error= NESTED_LOOP_OK;
@@ -10808,6 +10832,10 @@
    else
    {
      DBUG_ASSERT(join->tables);
+    if (suspendable)
+    {
+      DBUG_RETURN(NESTED_LOOP_SUSPENDED);
+    }
      error= sub_select(join,join_tab,0);
      if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
        error= sub_select(join,join_tab,1);
@@ -11057,9 +11085,11 @@

    while (rc == NESTED_LOOP_OK)
    {
+    DBUG_PRINT("info", ("sub_select loop starting with rc=%u error= 
%u", rc, error));
      error= info->read_record(info);
      rc= evaluate_join_record(join, join_tab, error);
    }
+  DBUG_PRINT("info", ("sub_select loop ending with rc=%u error=%u",  
rc, error));

    if (rc == NESTED_LOOP_NO_MORE_ROWS &&
        join_tab->last_inner && !join_tab->found)
@@ -11168,6 +11198,7 @@
        enum enum_nested_loop_state rc;
        /* A match from join_tab is found for the current partial  
join. */
        rc= (*join_tab->next_select)(join, join_tab+1, 0);
+      DBUG_PRINT("info", ("next_select returned %u", rc));
        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
          return rc;
        if (join->return_tab < join_tab)
diff -rub mysql-5.1.30/sql/sql_select.h mysql-5.1.30.kway/sql/ 
sql_select.h
--- mysql-5.1.30/sql/sql_select.h       2008-11-14 08:37:21.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_select.h  2009-06-04 13:16:45.000000000  
-0700
@@ -122,7 +122,8 @@
  {
    NESTED_LOOP_KILLED= -2, NESTED_LOOP_ERROR= -1,
    NESTED_LOOP_OK= 0, NESTED_LOOP_NO_MORE_ROWS= 1,
-  NESTED_LOOP_QUERY_LIMIT= 3, NESTED_LOOP_CURSOR_LIMIT= 4
+  NESTED_LOOP_QUERY_LIMIT= 3, NESTED_LOOP_CURSOR_LIMIT= 4,
+  NESTED_LOOP_SUSPENDED= 5
  };


@@ -474,7 +475,7 @@
               SELECT_LEX_UNIT *unit);
    int optimize();
    int reinit();
-  void exec();
+  JOIN *exec(bool suspendable);
    int destroy();
    void restore_tmp();
    bool alloc_func_list();
@@ -537,7 +538,7 @@
  TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item>  
&fields,
                         ORDER *group, bool distinct, bool  
save_sum_fields,
                         ulonglong select_options, ha_rows rows_limit,
-                       char* alias);
+                       char* alias, bool force_btree_key);
  void free_tmp_table(THD *thd, TABLE *entry);
  void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
                         List<Item> &fields, bool reset_with_sum_func);
diff -rub mysql-5.1.30/sql/sql_show.cc mysql-5.1.30.kway/sql/sql_show.cc
--- mysql-5.1.30/sql/sql_show.cc        2008-11-14 08:37:22.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_show.cc   2009-06-04 13:21:22.000000000  
-0700
@@ -5558,7 +5558,7 @@
                                  field_list, (ORDER*) 0, 0, 0,
                                  (select_lex->options | thd->options |
                                   TMP_TABLE_ALL_COLUMNS),
-                                HA_POS_ERROR, table_list->alias)))
+                                HA_POS_ERROR, table_list->alias,  
false)))
      DBUG_RETURN(0);
    my_bitmap_map* bitmaps=
      (my_bitmap_map*) thd->alloc(bitmap_buffer_size(field_count));
diff -rub mysql-5.1.30/sql/sql_union.cc mysql-5.1.30.kway/sql/ 
sql_union.cc
--- mysql-5.1.30/sql/sql_union.cc       2008-11-14 08:37:23.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_union.cc  2009-06-05 15:49:33.000000000  
-0700
@@ -19,10 +19,12 @@
    UNION's  were introduced by Monty and Sinisa <sinisa@stripped>
  */

+#define KWAY_MERGE true

  #include "mysql_priv.h"
  #include "sql_select.h"
  #include "sql_cursor.h"
+#include "procedure_intholder.h"

  bool mysql_union(THD *thd, LEX *lex, select_result *result,
                   SELECT_LEX_UNIT *unit, ulong  
setup_tables_done_option)
@@ -113,15 +115,20 @@
  bool
  select_union::create_result_table(THD *thd_arg, List<Item>  
*column_types,
                                    bool is_union_distinct, ulonglong  
options,
-                                  const char *alias)
+                                  const char *alias,
+                                  bool kway_merge)
  {
    DBUG_ASSERT(table == 0);
+
    tmp_table_param.init();
    tmp_table_param.field_count= column_types->elements;

+  if (kway_merge)
+    tmp_table_param.hidden_field_count++;
+
    if (! (table= create_tmp_table(thd_arg, &tmp_table_param,  
*column_types,
                                   (ORDER*) 0, is_union_distinct, 1,
-                                 options, HA_POS_ERROR, (char*)  
alias)))
+                                 options, HA_POS_ERROR, (char*)  
alias, kway_merge)))
      return TRUE;
    table->file->extra(HA_EXTRA_WRITE_CACHE);
    table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
@@ -130,6 +137,19 @@


  /*
+   Merge only makes sense if there's a limited order by on the outer  
select.
+   Only supported for union distinct.
+*/
+inline bool st_select_lex_unit::kway_merge ()
+{
+  return KWAY_MERGE && (first_select()->options &  
SELECT_ASSUME_SORTED_UNION) &&
+    global_parameters->order_list.elements &&
+    first_select()->select_limit && !found_rows_for_union &&
+    union_distinct && !thd->cursor;
+}
+
+
+/*
    initialization procedures before fake_select_lex preparation()

    SYNOPSIS
@@ -175,8 +195,21 @@
    select_result *tmp_result;
    bool is_union_select;
    TABLE *empty_table= 0;
+  uint select_idx= 0;
+  Item  *kway_select_idx_item = NULL;
+  ORDER kway_select_idx_order;
    DBUG_ENTER("st_select_lex_unit::prepare");

+  if (kway_merge())
+  {
+    kway_select_idx_item = new Item_field(&sl->context, NULL, NULL,  
"intholder");
+    kway_select_idx_order.item= &kway_select_idx_item;
+  } else if (first_select()->options & SELECT_ASSUME_SORTED_UNION) {
+    // ignoring option
+    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,  
ER_WARN_INVALID_SQL_ASSUME_SORTED_UNION,
+                        ER(ER_WARN_INVALID_SQL_ASSUME_SORTED_UNION));
+  }
+
    describe= test(additional_options & SELECT_DESCRIBE);

    /*
@@ -259,7 +292,7 @@
                                 (ORDER*) 0 : (ORDER *)sl- 
 >order_list.first,
                                 (ORDER*) sl->group_list.first,
                                 sl->having,
-                               (is_union_select ? (ORDER*) 0 :
+                               (is_union_select ? (kway_merge() ?  
&kway_select_idx_order : (ORDER*) 0) :
                                  (ORDER*) thd_arg->lex- 
 >proc_list.first),
                                 sl, this);
      /* There are no * in the statement anymore (for PS) */
@@ -268,6 +301,10 @@

      if (saved_error || (saved_error= thd_arg->is_fatal_error))
        goto err;
+
+    if (kway_merge())
+      ((proc_intholder*) join->procedure)->set(select_idx);
+
      /*
        Use items list of underlaid select for derived tables to  
preserve
        information about fields lengths and exact types
@@ -313,6 +350,7 @@
           DBUG_RETURN(TRUE);
        }
      }
+    select_idx++;
    }

    if (is_union_select)
@@ -335,6 +373,11 @@
        }
      }

+    if (kway_merge() && last_procedure->change_columns(types))
+    {
+      goto err;
+    }
+
      create_options= (first_sl->options | thd_arg->options |
                       TMP_TABLE_ALL_COLUMNS);
      /*
@@ -347,7 +390,7 @@
        create_options= create_options | TMP_TABLE_FORCE_MYISAM;

      if (union_result->create_result_table(thd, &types,  
test(union_distinct),
-                                          create_options, ""))
+                                          create_options, "",  
kway_merge()))
        goto err;
      bzero((char*) &result_table_list, sizeof(result_table_list));
      result_table_list.db= (char*) "";
@@ -363,6 +406,15 @@

        saved_error= table->fill_item_list(&item_list);

+      // don't include select_idx (single hidden field at start of  
table) in final select,
+      // but do include it in select_idx lookup ones.
+      // FIXME:  only need to select the columns in the ORDER here
+      if (kway_merge())
+      {
+        kway_select_idx_item_list.concat(&item_list);
+        item_list.pop();
+      }
+
        if (arena)
          thd->restore_active_arena(arena, &backup_arena);

@@ -419,16 +471,40 @@

  bool st_select_lex_unit::exec()
  {
+  uint select_idx= 0;
    SELECT_LEX *lex_select_save= thd->lex->current_select;
-  SELECT_LEX *select_cursor=first_select();
+  SELECT_LEX *select_cursor=first_select(), *select_cursor_2;
    ulonglong add_rows=0;
    ha_rows examined_rows= 0;
+  uint num_selects = 0;
+  MY_BITMAP kway_open_selects;
    DBUG_ENTER("st_select_lex_unit::exec");

    if (executed && !uncacheable && !describe)
      DBUG_RETURN(FALSE);
    executed= 1;

+  if (kway_merge())
+  {
+    /* allocate bitmap for which joins are left to be merged */
+    my_bitmap_map *kway_open_selects_bitmap= 0;
+    for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+      num_selects++;
+
+    if (!(kway_open_selects_bitmap= (my_bitmap_map*)
+         thd->alloc(bitmap_buffer_size(num_selects))))
+    {
+      mem_alloc_error(bitmap_buffer_size(num_selects));
+      DBUG_RETURN(1);
+    }
+    if (bitmap_init(&kway_open_selects, kway_open_selects_bitmap,
+                   num_selects, FALSE))
+    {
+      mem_alloc_error(num_selects);
+      DBUG_RETURN(1);
+    }
+    bitmap_set_all(&kway_open_selects);
+  }
    if (uncacheable || !item || !item->assigned() || describe)
    {
      if (item)
@@ -478,42 +554,94 @@
            sl->options & ~OPTION_FOUND_ROWS : sl->options |  
found_rows_for_union;
         saved_error= sl->join->optimize();
        }
-      if (!saved_error)
+      if (saved_error)
        {
+       /* error readying join, cannot exec */
+       thd->lex->current_select= lex_select_save;
+       DBUG_RETURN(saved_error);
+      }
         records_at_start= table->file->stats.records;
-       sl->join->exec();
+      sl->suspended_join = sl->join->exec(kway_merge());
+      if (kway_merge())
+      {
+       DBUG_PRINT("info", ("join->exec set join->error to %u", sl- 
 >join->error));
+       if (sl->join->error == NESTED_LOOP_SUSPENDED)
+       {
+         /* Final join should be suspended at start of Sending data  
state */
+         DBUG_ASSERT(sl->suspended_join);
+         DBUG_ASSERT(sl->suspended_join->tmp_table == NULL);
+         sl->join->error= NESTED_LOOP_OK;
+
+          /* backup limit counters */
+         sl->suspended_select_limit_cnt= select_limit_cnt;
+         sl->suspended_offset_limit_cnt= offset_limit_cnt;
+
+         /* pasted setup from Sensitive_cursor::open */
+         /* set fetch_limit so that next sub_select gets one row past  
offset */
+         sl->suspended_join->fetch_limit= offset_limit_cnt + 1;
+
+         /* Disable JOIN CACHE as it is not working with cursors yet */
+         for (JOIN_TAB *tab= sl->suspended_join->join_tab + sl- 
 >suspended_join->const_tables;
+              tab != sl->suspended_join->join_tab + sl- 
 >suspended_join->tables - 1;
+              tab++)
+         {
+           if (tab->next_select == sub_select_cache)
+             tab->next_select= sub_select;
+         }
+
+         /* FIXME: are these applicable here like they are in  
Sensitive_cursor? */
+         JOIN_TAB *join_tab= sl->suspended_join->join_tab + sl- 
 >suspended_join->const_tables;
+         DBUG_ASSERT(join_tab->table->reginfo.not_exists_optimize ==  
0);
+         DBUG_ASSERT(join_tab->not_used_in_distinct == 0);
+         DBUG_ASSERT(join_tab->table->null_row == 0);
+       }
+       else if (sl->join->error == NESTED_LOOP_OK)
+       {
+         /* was a constant join, so we already did its full send_data  
*/
+         bitmap_clear_bit(&kway_open_selects, select_idx);
+       }
+      }
+
+      saved_error= sl->join->error;
+      /* clean up if we're done, whether normally or from error */
+      if (!kway_merge() || saved_error)
+      {
          if (sl == union_distinct)
         {
           if (table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL))
             DBUG_RETURN(TRUE);
           table->no_keyread=1;
         }
-       saved_error= sl->join->error;
+       /* i believe this next line is dead code...overwritten by  
set_limit */
         offset_limit_cnt= (ha_rows)(sl->offset_limit ?
                                      sl->offset_limit->val_uint() :
                                      0);
-       if (!saved_error)
+      }
+      if (saved_error)
         {
+       /* error in JOIN::exec, abort */
+       // TODO:  k-way cleanup
+       thd->lex->current_select= lex_select_save;
+       DBUG_RETURN(saved_error);
+      }
           examined_rows+= thd->examined_row_count;
+      if (!kway_merge())
+      {
           if (union_result->flush())
           {
             thd->lex->current_select= lex_select_save;
             DBUG_RETURN(1);
           }
-       }
-      }
-      if (saved_error)
-      {
-       thd->lex->current_select= lex_select_save;
-       DBUG_RETURN(saved_error);
-      }
        /* Needed for the following test and for records_at_start in  
next loop */
        int error= table->file->info(HA_STATUS_VARIABLE);
        if(error)
        {
          table->file->print_error(error, MYF(0));
+         /* Is there a reason not to set
+            thd->lex->current_select= lex_select_save on this error? */
          DBUG_RETURN(1);
        }
+
        if (found_rows_for_union && !sl->braces &&
            select_limit_cnt != HA_POS_ERROR)
        {
@@ -527,6 +655,278 @@
                               ((table->file->stats.records -   
records_at_start)));
        }
      }
+      select_idx++;
+    }
+
+    if (kway_merge())
+    {
+      ha_rows save_select_limit_cnt, save_offset_limit_cnt;
+      JOIN *save_join= NULL, *kway_select_idx_join= NULL;
+      select_singlerow_internal kway_select_idx_result;
+
+      /* Remember: no need to send_fields or send_eof on select_union  
result. */
+      thd_proc_info(thd, "Sending data");
+      DBUG_PRINT("info", ("%s", thd->proc_info));
+      enum_nested_loop_state error= NESTED_LOOP_OK;
+
+        // fetch a single unique result from each component
+        select_idx = 0;
+       for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+       {
+      bool first= true; // first try at non-dup in inner loop
+      ha_rows records_at_start= 0;
+
+         DBUG_PRINT("info", ("pre-processing select_idx %u",  
select_idx));
+
+      // process until we find a non-duplicate or we run out
+         while (bitmap_is_set(&kway_open_selects, select_idx) &&
+             (first || records_at_start == table->file->stats.records))
+         {
+        first= false;
+        records_at_start= table->file->stats.records;
+           DBUG_PRINT("info", ("pre-fetching select_idx %u",  
select_idx));
+           DBUG_ASSERT(sl->suspended_join->tables);
+           JOIN_TAB *join_tab= sl->suspended_join->join_tab + sl- 
 >suspended_join->const_tables;
+
+           /* set limit counters */
+           select_limit_cnt= sl->suspended_select_limit_cnt;
+           offset_limit_cnt= sl->suspended_offset_limit_cnt;
+
+           error= sub_select(sl->suspended_join,join_tab,0);
+           if (error == NESTED_LOOP_OK || error ==  
NESTED_LOOP_NO_MORE_ROWS)
+             error= sub_select(sl->suspended_join,join_tab,1);
+           if (error == NESTED_LOOP_QUERY_LIMIT)
+             error= NESTED_LOOP_OK;                    /*  
select_limit used */
+
+           /* backup limit counters */
+           sl->suspended_select_limit_cnt= select_limit_cnt;
+           sl->suspended_offset_limit_cnt= offset_limit_cnt; /*  
unnecessary */
+
+           if (error == NESTED_LOOP_CURSOR_LIMIT)
+           {
+             sl->suspended_join->resume_nested_loop= TRUE;
+             sl->suspended_join->fetch_limit+=1;
+           }
+           else if (error == NESTED_LOOP_OK)
+           {
+             DBUG_PRINT("info",("read all results from select_idx  
%u", select_idx));
+             /* NOTE: not sure if this is safe when others are open */
+             sl->suspended_join->join_free();
+             bitmap_clear_bit(&kway_open_selects, select_idx);
+           }
+           else
+           {
+             DBUG_PRINT("error",("Error: sub_select() failed for  
select_idx %u", select_idx));
+             sl->join->error = -1;
+             bitmap_clear_bit(&kway_open_selects, select_idx);
+
+             /* join_free all that haven't been */
+             select_idx = 0;
+             for (SELECT_LEX *sl2= select_cursor; sl2; sl2= sl2- 
 >next_select())
+             {
+               if (bitmap_is_set(&kway_open_selects, select_idx))
+               {
+                 DBUG_PRINT("info",("aborting select_idx %u",  
select_idx));
+                 sl2->suspended_join->join_free();
+               }
+               select_idx++;
+             }
+
+             thd->lex->current_select= lex_select_save;
+             DBUG_RETURN(-1);
+           }
+
+        /* Needed for the following test and for records_at_start in  
next loop */
+        if (union_result->flush())
+        {
+          thd->lex->current_select= lex_select_save;
+          DBUG_RETURN(1);
+        }
+        int table_error= table->file->info(HA_STATUS_VARIABLE);
+        if(table_error)
+        {
+          table->file->print_error(table_error, MYF(0));
+          DBUG_RETURN(1);
+        }
+         }
+         select_idx++;
+       }
+
+      // main k-way merge loop
+      // we only need select_limit_cnt - 1 fetches since we already  
have the top result from the pre-fetch above
+      for (ha_rows result_idx= 0; result_idx < select_limit_cnt - 1  
&& !bitmap_is_clear_all(&kway_open_selects); result_idx++)
+        {
+            bool first= true; // first try at non-dup in inner loop
+            ha_rows records_at_start= 0;
+
+          // select next select_idx to "emit"
+
+        // is this necessary?
+        init_prepare_fake_select_lex(thd);
+        fake_select_lex->table_list.empty();
+
+        // allocate join ourselves only so that we can set  
no_const_tables on it
+        if (!(kway_select_idx_join= new JOIN(thd,  
kway_select_idx_item_list,
+                                             fake_select_lex- 
 >options, &kway_select_idx_result)))
+        {
+          DBUG_RETURN(TRUE);
+        }
+        kway_select_idx_join->no_const_tables= TRUE;
+
+        // select out select_idx
+        fake_select_lex->item_list= kway_select_idx_item_list;
+        kway_select_idx_result.assigned(0);
+
+        save_select_limit_cnt= select_limit_cnt;
+        save_offset_limit_cnt= offset_limit_cnt;
+        offset_limit_cnt= result_idx;
+        select_limit_cnt= 1 + offset_limit_cnt;
+        save_join= fake_select_lex->join;
+        fake_select_lex->join= kway_select_idx_join;
+        saved_error= mysql_select(thd, &fake_select_lex- 
 >ref_pointer_array,
+                                  &result_table_list,
+                                  0, kway_select_idx_item_list,  
NULL, // TODO: pass *COND where ORDER is > X
+                                  global_parameters- 
 >order_list.elements,
+                                  (ORDER*)global_parameters- 
 >order_list.first,
+                                  (ORDER*) NULL, NULL, (ORDER*) NULL,
+                                  fake_select_lex->options |  
SELECT_NO_UNLOCK,
+                                  &kway_select_idx_result, this,  
fake_select_lex);
+        fake_select_lex->join= save_join;
+        select_limit_cnt= save_select_limit_cnt;
+        offset_limit_cnt= save_offset_limit_cnt;
+
+        kway_select_idx_join->destroy();
+        delete kway_select_idx_join;
+        kway_select_idx_join= 0;
+
+        if (saved_error != NESTED_LOOP_OK || ! 
kway_select_idx_result.assigned())
+        {
+          DBUG_PRINT("error",("Error: mysql_select() for select_idx  
failed on result_idx %lu with code %d", (ulong) result_idx,  
saved_error));
+          thd->lex->current_select= lex_select_save;
+             DBUG_RETURN(-1);
+        }
+
+        select_idx = kway_select_idx_result.element_index(0)- 
 >val_uint();
+        DBUG_PRINT("info",("mysql_select() found select_idx %u for  
result_idx %lu of kway merge", select_idx, (ulong) result_idx));
+
+        // select the next result for the select_idx we just emitted  
a result from
+          select_cursor_2= select_cursor;
+          for (uint i = 0; i != select_idx; i++)
+          {
+            select_cursor_2= select_cursor_2->next_select();
+          }
+
+          // FIXME:  this is just duplicated from above, encapsulate me
+          // process until we find a non-duplicate or we run out
+            while (bitmap_is_set(&kway_open_selects, select_idx) &&
+                   (first || records_at_start == table->file- 
 >stats.records))
+            {
+              first= false;
+              records_at_start= table->file->stats.records;
+              DBUG_PRINT("info", ("fetching select_idx %u",  
select_idx));
+              DBUG_ASSERT(select_cursor_2->suspended_join->tables);
+              JOIN_TAB *join_tab= select_cursor_2->suspended_join- 
 >join_tab + select_cursor_2->suspended_join->const_tables;
+
+              /* set limit counters */
+              select_limit_cnt= select_cursor_2- 
 >suspended_select_limit_cnt;
+              offset_limit_cnt= select_cursor_2- 
 >suspended_offset_limit_cnt;
+
+              error= sub_select(select_cursor_2- 
 >suspended_join,join_tab,0);
+              if (error == NESTED_LOOP_OK || error ==  
NESTED_LOOP_NO_MORE_ROWS)
+                error= sub_select(select_cursor_2- 
 >suspended_join,join_tab,1);
+              if (error == NESTED_LOOP_QUERY_LIMIT)
+                error= NESTED_LOOP_OK;                    /*  
select_limit used */
+
+              /* backup limit counters */
+              select_cursor_2->suspended_select_limit_cnt=  
select_limit_cnt;
+              select_cursor_2->suspended_offset_limit_cnt=  
offset_limit_cnt; /* unnecessary */
+
+              if (error == NESTED_LOOP_CURSOR_LIMIT)
+              {
+                select_cursor_2->suspended_join->resume_nested_loop=  
TRUE;
+                select_cursor_2->suspended_join->fetch_limit+=1;
+              }
+              else if (error == NESTED_LOOP_OK)
+              {
+                DBUG_PRINT("info",("read all results from select_idx  
%u", select_idx));
+                /* NOTE: not sure if this is safe when others are  
open */
+                select_cursor_2->suspended_join->join_free();
+                bitmap_clear_bit(&kway_open_selects, select_idx);
+              }
+              else
+              {
+                DBUG_PRINT("error",("Error: sub_select() failed for  
select_idx %u", select_idx));
+                select_cursor_2->join->error = -1;
+                bitmap_clear_bit(&kway_open_selects, select_idx);
+
+                /* join_free all that haven't been */
+                select_idx = 0;
+                for (SELECT_LEX *sl2= select_cursor; sl2; sl2= sl2- 
 >next_select())
+                {
+                  if (bitmap_is_set(&kway_open_selects, select_idx))
+                  {
+                    DBUG_PRINT("info",("aborting select_idx %u",  
select_idx));
+                    sl2->suspended_join->join_free();
+                  }
+                  select_idx++;
+                }
+
+                thd->lex->current_select= lex_select_save;
+                DBUG_RETURN(-1);
+              }
+
+              /* Needed for the following test and for  
records_at_start in next loop */
+              if (union_result->flush())
+              {
+                thd->lex->current_select= lex_select_save;
+                DBUG_RETURN(1);
+              }
+              int table_error= table->file->info(HA_STATUS_VARIABLE);
+              if(table_error)
+              {
+                table->file->print_error(table_error, MYF(0));
+                DBUG_RETURN(1);
+              }
+            }
+      }
+
+      // FIXME:  encapsulate
+      /* join_free all that haven't been */
+      select_idx = 0;
+      for (SELECT_LEX *sl2= select_cursor; sl2; sl2= sl2- 
 >next_select())
+      {
+        if (bitmap_is_set(&kway_open_selects, select_idx))
+        {
+          DBUG_PRINT("info",("freeing select_idx %u", select_idx));
+          sl2->suspended_join->join_free();
+        }
+        select_idx++;
+      }
+
+      select_idx= 0;
+      for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+      {
+       DBUG_PRINT("info",("%ld records output by select_idx %u",  
(long) sl->suspended_join ? sl->suspended_join->send_records : sl- 
 >join->send_records, select_idx));
+       /* unwrapped cleanup from JOIN::exec after it would have  
called do_select */
+
+       /* Accumulate the counts from all join iterations of all join  
parts. */
+       if (sl->suspended_join)
+         examined_rows+= sl->suspended_join->examined_rows;
+       DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
+                             (ulong) thd->examined_row_count));
+       /* FIXME:  this doesn't really make sense here, don't try to  
handle union all */
+//     if (sl == union_distinct)
+//     {
+//       if (table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL))
+//       {
+//         DBUG_RETURN(TRUE);
+//       }
+//       table->no_keyread=1;
+//     }
+
+       select_idx++;
+      }
+    }
    }
    optimized= 1;

@@ -600,7 +1000,7 @@
          {
            join->examined_rows= 0;
            saved_error= join->reinit();
-          join->exec();
+          join->exec(false);
          }
        }

diff -rub mysql-5.1.30/sql/sql_update.cc mysql-5.1.30.kway/sql/ 
sql_update.cc
--- mysql-5.1.30/sql/sql_update.cc      2008-11-14 08:37:23.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_update.cc 2009-06-04 13:18:30.000000000  
-0700
@@ -1536,7 +1536,7 @@
                                            (ORDER*) &group, 0, 0,
                                            TMP_TABLE_ALL_COLUMNS,
                                            HA_POS_ERROR,
-                                          (char *) "")))
+                                          (char *) "", false)))
        DBUG_RETURN(1);
      tmp_tables[cnt]->file->extra(HA_EXTRA_WRITE_CACHE);
    }
diff -rub mysql-5.1.30/sql/sql_yacc.yy mysql-5.1.30.kway/sql/sql_yacc.yy
--- mysql-5.1.30/sql/sql_yacc.yy        2008-11-14 08:37:23.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_yacc.yy   2009-04-13 19:58:17.000000000  
-0700
@@ -998,6 +998,7 @@
  %token  SQL_SMALL_RESULT
  %token  SQL_SYM                       /* SQL-2003-R */
  %token  SQL_THREAD
+%token  SQL_ASSUME_SORTED_UNION
  %token  SSL_SYM
  %token  STARTING
  %token  STARTS_SYM
@@ -6494,6 +6495,18 @@
          | DISTINCT         { Select->options|= SELECT_DISTINCT; }
          | SQL_SMALL_RESULT { Select->options|= SELECT_SMALL_RESULT; }
          | SQL_BIG_RESULT   { Select->options|= SELECT_BIG_RESULT; }
+        | SQL_ASSUME_SORTED_UNION
+          {
+           /* assertions
+              1. this is a union:  code below segfaults, too early to  
check this?
+              2. all are ordered:  all have order_list.elements?
+              3. all order_lists are equivalent?
+
+            if (!Lex->current_select->master_unit()->is_union())
+              MYSQL_YYABORT;
+                   */
+            Select->options|= SELECT_ASSUME_SORTED_UNION;
+          }
          | SQL_BUFFER_RESULT
            {
              if (check_simple_select())

Thread
proposed design for UNION Order By optimizationEric Jensen13 Mar
  • RE: proposed design for UNION Order By optimizationRick James13 Mar
  • Re: proposed design for UNION Order By optimizationEric Jensen6 Jun
    • RE: proposed design for UNION Order By optimizationRick James6 Jun
    • Re: proposed design for UNION Order By optimizationEric Jensen7 Jun
    • Re: proposed design for UNION Order By optimizationEric Jensen5 Jul
      • Re: proposed design for UNION Order By optimizationEric Jensen5 Jul
        • Re: proposed design for UNION Order By optimizationEric Jensen6 Jul
Re: proposed design for UNION Order By optimizationEric Jensen6 Jun
Re: proposed design for UNION Order By optimizationEric Jensen12 Jul