List:Internals« Previous MessageNext Message »
From:Eric Jensen Date:July 11 2009 10:05pm
Subject:Re: proposed design for UNION Order By optimization
View as plain text  
I figured out how to construct a COND for just a single column order- 
by, so my prototype is now complete.  Patch is below.  Using the  
benchmark script which is also below, this is about a 25% improvement  
over the previous union behavior.

Testing unions with k-way merge descending
Time for union_with_kway_desc (100:10000): 34 wallclock secs ( 0.10  
usr  0.03 sys +  0.00 cusr  0.00 csys =  0.13 CPU)
Testing unions without k-way merge descending
Time for union_without_kway_desc (100:10000): 42 wallclock secs ( 0.10  
usr  0.03 sys +  0.00 cusr  0.00 csys =  0.13 CPU)

Testing unions without k-way merge ascending
Time for union_without_kway_asc (100:10000): 79 wallclock secs ( 0.11  
usr  0.04 sys +  0.00 cusr  0.00 csys =  0.15 CPU)
Testing unions with k-way merge ascending
Time for union_with_kway_asc (100:10000): 62 wallclock secs ( 0.12  
usr  0.04 sys +  0.00 cusr  0.00 csys =  0.16 CPU)

The new optimizations also add some elements to my list of hacks in  
this patch that would need to be fixed for this to be generally usable:

1.  doesn't do the validation necessary to ensure component selects  
are sorted identically to the external one
2.  some of the validation that it does do isn't working (see the  
broken tests i added)
3.  doesn't show anything meaningful in explain
4.  doesn't make any attempt to automatically select this plan via the  
optimizer (relying on a query hint)
5.  only works on single-column order by
6.  assumes the order by column uniquely identifies the union'ed row,  
even though the actual distinct behavior is handled by the temporary  
table index across all the columns union'ed in the select

eric

On Jul 11, 2009, at 10:39 AM, Eric Jensen wrote:

> i should have been clearer that this is actually done in my patch to  
> the mysql sql server code as follows, just need COND instead of  
> limit/offset:
>
>         // select out select_idx
>         fake_select_lex->item_list= kway_select_idx_item_list;
>         kway_select_idx_result.assigned(0);
>
>         save_select_limit_cnt= select_limit_cnt;
>         save_offset_limit_cnt= offset_limit_cnt;
>         offset_limit_cnt= result_idx;
>         select_limit_cnt= 1 + offset_limit_cnt;
>         save_join= fake_select_lex->join;
>         fake_select_lex->join= kway_select_idx_join;
>         saved_error= mysql_select(thd, &fake_select_lex- 
> >ref_pointer_array,
>                                   &result_table_list,
>                                   0, kway_select_idx_item_list,  
> NULL, // TODO: pass *COND where ORDER is > X
>                                   global_parameters- 
> >order_list.elements,
>                                   (ORDER*)global_parameters- 
> >order_list.first,
>                                   (ORDER*) NULL, NULL, (ORDER*) NULL,
>                                   fake_select_lex->options |  
> SELECT_NO_UNLOCK,
>                                   &kway_select_idx_result, this,  
> fake_select_lex);
>         select_idx = kway_select_idx_result.element_index(0)- 
> >val_uint();
>
> On Jul 10, 2009, at 10:06 PM, Igor Babaev wrote:
>
>> Eric Jensen wrote:
>>> The broader design discussion and actual code patch is on that  
>>> mailing
>>> list thread, but basically I'm doing a k-way merge where i treat the
>>> temporary table as my heap.  So each iteration of the merge I do a
>>> mysql_select call to determine the current maximum value and the  
>>> query
>>> that it came from.  I'm already reading the "query that it came  
>>> from"
>>> part which is just an integer, but I need to be able to also read  
>>> the
>>> maximum value from the order by and use it in the next mysql_select.
>>> For example:
>>>
>>> create table t1 (a int not null);
>>> create table t2 (a int not null);
>>> insert some stuff;
>>> (select SQL_ASSUME_SORTED_UNION a from t1 order by a limit 10) UNION
>>> (select a from t1 order by a limit 10) order by a limit 10;
>>>
>>> max = 0
>>> for result_idx in 1..10
>>>   mysql_select("select result_idx, a as value from temp where a >  
>>> max
>>> order by a limit 1")
>>>   max = value
>>>   read one result from result_idx into temp
>>> end
>>
>> I would suggest using the construction like this:
>>
>> set @@max = 0
>> for result_idx in 1..10
>>   mysql_select("select result_idx, @@max:= a as value from temp
>>                   where a > @@max order by a limit 1")
>>   read one result from result_idx into temp
>> end
>>
>> In this case you don't have to create any dynamic execution code.
>> Bear in mind that the select list is calculated always after
>> the evaluation of the where condition.
>>
>> Igor
>>
>>>
>>> I have all of this except the ability to construct the "where a >  
>>> max"
>>> COND dynamically, especially in the case of a multi-column order by.
>>>
>>> eric
>>>


(ej@lakitu) ~> cat mysql-5.1.30.kway.patch
diff -rup mysql-5.1.30/client/mysql.cc mysql-5.1.30.kway/client/mysql.cc
--- mysql-5.1.30/client/mysql.cc        2008-11-14 08:34:16.000000000  
-0800
+++ mysql-5.1.30.kway/client/mysql.cc   2009-04-13 18:58:10.000000000  
-0700
@@ -692,6 +692,7 @@ static COMMANDS commands[] = {
    { "SQL_NO_CACHE", 0, 0, 0, ""},
    { "SQL_SMALL_RESULT", 0, 0, 0, ""},
    { "SQL_THREAD", 0, 0, 0, ""},
+  { "SQL_ASSUME_SORTED_UNION", 0, 0, 0, ""},
    { "SQL_TSI_FRAC_SECOND", 0, 0, 0, ""},
    { "SQL_TSI_SECOND", 0, 0, 0, ""},
    { "SQL_TSI_MINUTE", 0, 0, 0, ""},
diff -rup mysql-5.1.30/libmysqld/Makefile.am mysql-5.1.30.kway/ 
libmysqld/Makefile.am
--- mysql-5.1.30/libmysqld/Makefile.am  2008-11-14 08:34:37.000000000  
-0800
+++ mysql-5.1.30.kway/libmysqld/Makefile.am     2009-05-11  
19:44:40.000000000 -0700
@@ -61,7 +61,7 @@ sqlsources = derror.cc field.cc field_co
         opt_sum.cc procedure.cc records.cc sql_acl.cc \
         sql_load.cc discover.cc sql_locale.cc \
         sql_profile.cc \
-       sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc \
+       sql_analyse.cc sql_base.cc sql_cache.cc sql_class.cc  
procedure_intholder.cc \
         sql_crypt.cc sql_db.cc sql_delete.cc sql_error.cc  
sql_insert.cc \
         sql_lex.cc sql_list.cc sql_manager.cc sql_map.cc \
         scheduler.cc sql_connect.cc sql_parse.cc \
diff -rup mysql-5.1.30/mysql-test/r/union.result mysql-5.1.30.kway/ 
mysql-test/r/union.result
--- mysql-5.1.30/mysql-test/r/union.result      2008-11-14  
09:31:01.000000000 -0800
+++ mysql-5.1.30.kway/mysql-test/r/union.result 2009-06-05  
19:59:46.000000000 -0700
@@ -474,6 +474,84 @@ ERROR HY000: You can't specify target ta
  select a from t1 union select a from t2 order by t2.a;
  ERROR 42S22: Unknown column 't2.a' in 'order clause'
  drop table t1,t2;
+create table t1 (a int);
+insert into t1 values (1),(2),(3);
+create table t2 (a int);
+insert into t2 values (3),(4),(5);
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 1) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 1) ORDER BY a LIMIT 1;
+a
+1
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 3) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 3) ORDER BY a LIMIT 3;
+a
+1
+2
+3
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a DESC LIMIT 3)  
UNION (SELECT * FROM t2 ORDER BY a DESC LIMIT 3) ORDER BY a DESC LIMIT  
3;
+a
+5
+4
+3
+drop table t1,t2;
+create table t1 (a int);
+insert into t1 values (1),(3),(5),(6);
+create table t2 (a int);
+insert into t2 values (2),(3),(4),(7);
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+a
+1
+2
+3
+4
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) LIMIT 4;
+a
+1
+3
+5
+6
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a;
+a
+1
+2
+3
+4
+5
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+(SELECT SQL_ASSUME_SORTED_UNION SQL_CALC_FOUND_ROWS * FROM t1 ORDER  
by a LIMIT 4) UNION (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a  
LIMIT 4;
+a
+1
+2
+3
+4
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
ALL (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+a
+1
+2
+3
+3
+Warnings:
+Warning        1617    Invalid use of SQL_ASSUME_SORTED_UNION ignored
+drop table t1,t2;
+create table t1 (s char(200));
+insert into t1 values (repeat("1",200)),(repeat("3",200)),(repeat("5", 
200));
+create table t2 select * from t1;
+insert into t1 values (repeat("2",200)),(repeat("4",200)),(repeat("6", 
200)),(repeat("7",200));
+set local tmp_table_size=1024;
+select count(*) from ((select SQL_ASSUME_SORTED_UNION * from t1 order  
by 1) union (select * from t2 order by 1) order by 1 limit 7) b;
+count(*)
+7
+select count(*) from t1;
+count(*)
+7
+select count(*) from t2;
+count(*)
+3
+drop table t1,t2;
+set local tmp_table_size=default;
  select length(version()) > 1 as `*` UNION select 2;
  *
  1
diff -rup mysql-5.1.30/mysql-test/t/union.test mysql-5.1.30.kway/mysql- 
test/t/union.test
--- mysql-5.1.30/mysql-test/t/union.test        2008-11-14  
09:30:23.000000000 -0800
+++ mysql-5.1.30.kway/mysql-test/t/union.test   2009-06-05  
19:59:56.000000000 -0700
@@ -319,6 +319,47 @@ select a from t1 union select a from t2
  drop table t1,t2;

  #
+# Test k-way merge
+#
+
+create table t1 (a int);
+insert into t1 values (1),(2),(3);
+create table t2 (a int);
+insert into t2 values (3),(4),(5);
+
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 1) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 1) ORDER BY a LIMIT 1;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 3) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 3) ORDER BY a LIMIT 3;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a DESC LIMIT 3)  
UNION (SELECT * FROM t2 ORDER BY a DESC LIMIT 3) ORDER BY a DESC LIMIT  
3;
+
+drop table t1,t2;
+create table t1 (a int);
+insert into t1 values (1),(3),(5),(6);
+create table t2 (a int);
+insert into t2 values (2),(3),(4),(7);
+
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+
+# All of these should ignore SQL_ASSUME_SORTED_UNION
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) LIMIT 4;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
(SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a;
+(SELECT SQL_ASSUME_SORTED_UNION SQL_CALC_FOUND_ROWS * FROM t1 ORDER  
by a LIMIT 4) UNION (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a  
LIMIT 4;
+(SELECT SQL_ASSUME_SORTED_UNION * FROM t1 ORDER by a LIMIT 4) UNION  
ALL (SELECT * FROM t2 ORDER BY a LIMIT 4) ORDER BY a LIMIT 4;
+
+drop table t1,t2;
+
+# conversion memory->disk table with merge
+create table t1 (s char(200));
+insert into t1 values (repeat("1",200)),(repeat("3",200)),(repeat("5", 
200));
+create table t2 select * from t1;
+insert into t1 values (repeat("2",200)),(repeat("4",200)),(repeat("6", 
200)),(repeat("7",200));
+set local tmp_table_size=1024;
+select count(*) from ((select SQL_ASSUME_SORTED_UNION * from t1 order  
by 1) union (select * from t2 order by 1) order by 1 limit 7) b;
+select count(*) from t1;
+select count(*) from t2;
+drop table t1,t2;
+set local tmp_table_size=default;
+
+#
  # Problem with alias '*' (BUG #1249)
  #

diff -rup mysql-5.1.30/sql/Makefile.am mysql-5.1.30.kway/sql/Makefile.am
--- mysql-5.1.30/sql/Makefile.am        2008-11-14 08:37:08.000000000  
-0800
+++ mysql-5.1.30.kway/sql/Makefile.am   2009-05-11 19:44:26.000000000  
-0700
@@ -104,7 +104,7 @@ mysqld_SOURCES =    sql_lex.cc sql_handler.
                         ha_partition.cc \
                         sql_db.cc sql_table.cc sql_rename.cc  
sql_crypt.cc \
                         sql_load.cc mf_iocache.cc field_conv.cc  
sql_show.cc \
-                       sql_udf.cc sql_analyse.cc sql_analyse.h  
sql_cache.cc \
+                       sql_udf.cc sql_analyse.cc sql_analyse.h  
sql_cache.cc procedure_intholder.cc \
                         slave.cc sql_repl.cc rpl_filter.cc  
rpl_tblmap.cc \
                         rpl_utility.cc rpl_injector.cc rpl_rli.cc  
rpl_mi.cc \
                         rpl_reporting.cc \
diff -rup mysql-5.1.30/sql/item_subselect.cc mysql-5.1.30.kway/sql/ 
item_subselect.cc
--- mysql-5.1.30/sql/item_subselect.cc  2008-11-14 08:37:11.000000000  
-0800
+++ mysql-5.1.30.kway/sql/item_subselect.cc     2008-12-12  
10:37:52.000000000 -0800
@@ -1955,7 +1955,7 @@ int subselect_single_select_engine::exec
        }
      }

-    join->exec();
+    join->exec(false);

      /* Enable the optimizations back */
      for (JOIN_TAB **ptab= changed_tabs; ptab != last_changed_tab;  
ptab++)
diff -rup mysql-5.1.30/sql/item_sum.cc mysql-5.1.30.kway/sql/item_sum.cc
--- mysql-5.1.30/sql/item_sum.cc        2008-11-14 08:37:11.000000000  
-0800
+++ mysql-5.1.30.kway/sql/item_sum.cc   2009-06-04 13:18:08.000000000  
-0700
@@ -2593,7 +2593,7 @@ bool Item_sum_count_distinct::setup(THD
    if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*)  
0, 1,
                                 0,
                                 (select_lex->options | thd->options),
-                               HA_POS_ERROR, (char*)"")))
+                               HA_POS_ERROR, (char*)"", false)))
      return TRUE;
    table->file->extra(HA_EXTRA_NO_ROWS);                // Don't  
update rows
    table->no_rows=1;
@@ -3461,7 +3461,7 @@ bool Item_func_group_concat::setup(THD *
    if (!(table= create_tmp_table(thd, tmp_table_param, all_fields,
                                  (ORDER*) 0, 0, TRUE,
                                  (select_lex->options | thd->options),
-                                HA_POS_ERROR, (char*) "")))
+                                HA_POS_ERROR, (char*) "", false)))
      DBUG_RETURN(TRUE);
    table->file->extra(HA_EXTRA_NO_ROWS);
    table->no_rows= 1;
diff -rup mysql-5.1.30/sql/lex.h mysql-5.1.30.kway/sql/lex.h
--- mysql-5.1.30/sql/lex.h      2008-11-14 08:37:12.000000000 -0800
+++ mysql-5.1.30.kway/sql/lex.h 2009-04-13 19:44:30.000000000 -0700
@@ -498,6 +498,7 @@ static SYMBOL symbols[] = {
    { "SQL_NO_CACHE",    SYM(SQL_NO_CACHE_SYM)},
    { "SQL_SMALL_RESULT", SYM(SQL_SMALL_RESULT)},
    { "SQL_THREAD",      SYM(SQL_THREAD)},
+  { "SQL_ASSUME_SORTED_UNION", SYM(SQL_ASSUME_SORTED_UNION)},
    { "SQL_TSI_FRAC_SECOND", SYM(FRAC_SECOND_SYM)},
    { "SQL_TSI_SECOND",   SYM(SECOND_SYM)},
    { "SQL_TSI_MINUTE",   SYM(MINUTE_SYM)},
diff -rup mysql-5.1.30/sql/mysql_priv.h mysql-5.1.30.kway/sql/ 
mysql_priv.h
--- mysql-5.1.30/sql/mysql_priv.h       2008-11-14 08:37:13.000000000  
-0800
+++ mysql-5.1.30.kway/sql/mysql_priv.h  2009-04-13 18:56:19.000000000  
-0700
@@ -476,7 +476,7 @@ protected:
  */
  #define TMP_TABLE_FORCE_MYISAM          (ULL(1) << 32)
  #define OPTION_PROFILING                (ULL(1) << 33)
-
+#define SELECT_ASSUME_SORTED_UNION      (ULL(1) << 34) // SELECT, user


  /**
diff -rup mysql-5.1.30/sql/procedure.cc mysql-5.1.30.kway/sql/ 
procedure.cc
--- mysql-5.1.30/sql/procedure.cc       2008-11-14 08:37:14.000000000  
-0800
+++ mysql-5.1.30.kway/sql/procedure.cc  2009-05-11 18:08:15.000000000  
-0700
@@ -23,6 +23,7 @@
  #include "mysql_priv.h"
  #include "procedure.h"
  #include "sql_analyse.h"                       // Includes procedure
+#include "procedure_intholder.h"
  #ifdef USE_PROC_RANGE
  #include "proc_range.h"
  #endif
@@ -37,6 +38,7 @@ static struct st_procedure_def {
    { "split_count",proc_count_range_init },     // Internal procedure  
at TCX
    { "matris_ranges",proc_matris_range_init },  // Internal procedure  
at TCX
  #endif
+  { "intholder",proc_intholder_init },         // Return an int that  
is internally set.  FIXME:  make private somehow
    { "analyse",proc_analyse_init }              // Analyse a result
  };

diff -rup mysql-5.1.30/sql/procedure_intholder.cc mysql-5.1.30.kway/ 
sql/procedure_intholder.cc
--- mysql-5.1.30/sql/procedure_intholder.cc     2009-06-03  
19:25:09.000000000 -0700
+++ mysql-5.1.30.kway/sql/procedure_intholder.cc        2009-06-05  
12:09:51.000000000 -0700
@@ -0,0 +1,75 @@
+/* procedure that returns an internally set int */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation                         // gcc: Class  
implementation
+#endif
+
+#include "mysql_priv.h"
+#include "procedure.h"
+
+#include "procedure_intholder.h"
+
+// Create and register the actual procedure object
+Procedure *proc_intholder_init(THD *thd, ORDER *param,
+ 
                                                                              
select_result 
  *result,
+ 
                                                                               List 
<Item> &field_list)
+{
+    DBUG_ENTER("proc_intholder_init");
+
+    proc_intholder *pc = new proc_intholder(result);
+
+    DBUG_RETURN(pc);
+}
+
+// modify the result set column types here
+bool proc_intholder::change_columns(List<Item> &field_list)
+{
+    DBUG_ENTER("proc_intholder::change_columns");
+
+    // and attach it to the column list
+    field_list.push_front(value_column);
+
+    DBUG_RETURN(0);
+}
+
+// not used here and not in ANALYSE() either,
+// probably called for each row before grouping
+void proc_intholder::add(void)
+{
+    DBUG_ENTER("proc_intholder::add");
+
+    DBUG_VOID_RETURN;
+}
+
+// not used here and not in ANALYSE() either,
+// probably called whenever a group rows were fully processed
+void proc_intholder::end_group(void)
+{
+    DBUG_ENTER("proc_intholder::end_group");
+
+    DBUG_VOID_RETURN;
+}
+
+// called before sending result rows, you may modify the results here
+int proc_intholder::send_row(List<Item> &field_list  
__attribute__((unused)))
+{
+    DBUG_ENTER("proc_intholder::send_row");
+
+    // set value in result row
+    value_column->set(value);
+
+    // now send the modified results
+    if (result->send_data(field_list))
+       DBUG_RETURN(-1);
+
+      DBUG_RETURN(0);
+    }
+
+// called after sending all result rows,
+// you may add summary rows here as ANALYSE() does
+int proc_intholder::end_of_records(void)
+{
+    DBUG_ENTER("proc_intholder::end_of_records");
+
+    DBUG_RETURN(0);
+}
diff -rup mysql-5.1.30/sql/procedure_intholder.h mysql-5.1.30.kway/sql/ 
procedure_intholder.h
--- mysql-5.1.30/sql/procedure_intholder.h      2009-06-03  
19:25:13.000000000 -0700
+++ mysql-5.1.30.kway/sql/procedure_intholder.h 2009-06-05  
12:09:51.000000000 -0700
@@ -0,0 +1,52 @@
+/* procedure that returns an internally set int */
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface                              /* gcc class  
implementation */
+#endif
+
+#ifndef PROCEDURE_INTHOLDER_H
+#define PROCEDURE_INTHOLDER_H
+
+Procedure *proc_intholder_init(THD *thd, ORDER *param,
+                                                    select_result  
*result,
+                                                    List<Item>  
&field_list);
+
+
+class proc_intholder: public Procedure
+{
+  protected:
+    longlong value;
+    Item_proc_int *value_column;
+    ORDER param_fields_value;
+
+  public:
+
+    proc_intholder(select_result *res) :Procedure(res, 0), value(-1)
+    {
+      // create a new column item
+      value_column = new Item_proc_int("IntHolder");
+      param_fields_value.item= (Item**) &value_column;
+      param_fields_value.next= NULL;
+      param_fields = &param_fields_value;
+    }
+
+    ~proc_intholder()
+    {
+      // Items are deleted by Query_arena::free_items
+      //delete value_column;
+    }
+
+    void set(longlong nr) { value=nr; }
+
+    virtual void add(void);
+    virtual bool change_columns(List<Item> &fields);
+    virtual int  send_row(List<Item> &fields);
+    virtual void end_group(void);
+    virtual int end_of_records(void);
+
+    friend Procedure *proc_intholder_init(THD *thd, ORDER *param,
+ 
                                                                             
select_result 
  *result,
+ 
                                                                              List 
<Item> &field_list);
+};
+
+#endif
diff -rup mysql-5.1.30/sql/share/errmsg.txt mysql-5.1.30.kway/sql/ 
share/errmsg.txt
--- mysql-5.1.30/sql/share/errmsg.txt   2008-11-14 08:37:17.000000000  
-0800
+++ mysql-5.1.30.kway/sql/share/errmsg.txt      2009-06-05  
11:35:49.000000000 -0700
@@ -6139,3 +6139,7 @@ ER_NEED_REPREPARE

  ER_DELAYED_NOT_SUPPORTED
    eng "DELAYED option not supported for table '%-.192s'"
+
+ER_WARN_INVALID_SQL_ASSUME_SORTED_UNION
+  eng "Invalid use of SQL_ASSUME_SORTED_UNION ignored"
+
diff -rup mysql-5.1.30/sql/sql_class.cc mysql-5.1.30.kway/sql/ 
sql_class.cc
--- mysql-5.1.30/sql/sql_class.cc       2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_class.cc  2009-05-26 19:04:52.000000000  
-0700
@@ -2052,6 +2052,49 @@ err:
  }


+int select_singlerow_internal::prepare(List<Item> &list,  
SELECT_LEX_UNIT *u)
+{
+  Item *sel_item;
+  List_iterator_fast<Item> li(list);
+
+  unit= u;
+
+  if (!(row= (Item_cache**)  
sql_alloc(sizeof(Item_cache*)*list.elements)))
+    return 1;
+
+  for (uint i= 0; (sel_item= li++); i++)
+  {
+    if (!(row[i]= Item_cache::get_cache(sel_item)))
+      return 1;
+    row[i]->setup(sel_item);
+  }
+
+  return 0;
+}
+
+
+bool select_singlerow_internal::send_data(List<Item> &items)
+{
+  DBUG_ENTER("select_singlerow_internal::send_data");
+  if (assigned())
+  {
+    my_message(ER_SUBQUERY_NO_1_ROW, ER(ER_SUBQUERY_NO_1_ROW), MYF(0));
+    DBUG_RETURN(1);
+  }
+  if (unit->offset_limit_cnt)
+  {                                      // Using limit offset,count
+    unit->offset_limit_cnt--;
+    DBUG_RETURN(0);
+  }
+  List_iterator_fast<Item> li(items);
+  Item *val_item;
+  for (uint i= 0; (val_item= li++); i++)
+    row[i]->store(val_item);
+  assigned(1);
+  DBUG_RETURN(0);
+}
+
+
  select_subselect::select_subselect(Item_subselect *item_arg)
  {
    item= item_arg;
diff -rup mysql-5.1.30/sql/sql_class.h mysql-5.1.30.kway/sql/sql_class.h
--- mysql-5.1.30/sql/sql_class.h        2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_class.h   2009-05-26 18:58:37.000000000  
-0700
@@ -2599,7 +2599,25 @@ public:

    bool create_result_table(THD *thd, List<Item> *column_types,
                             bool is_distinct, ulonglong options,
-                           const char *alias);
+                           const char *alias,
+                           bool kway_merge);
+};
+
+/* Stores single row of values from internal select, used by kway  
merge */
+class select_singlerow_internal :public select_result_interceptor
+{
+  my_bool value_assigned; /* value already assigned */
+protected:
+  Item_cache **row;
+public:
+  select_singlerow_internal() :value_assigned(0) {}
+  int prepare(List<Item> &list, SELECT_LEX_UNIT *u);
+  bool send_data(List<Item> &items);
+  bool send_eof() { return 0; };
+  Item* element_index(uint i) { return my_reinterpret_cast(Item*) 
(row[i]); }
+  Item** addr(uint i) { return (Item**)row + i; }
+  bool assigned() { return value_assigned; }
+  void assigned(bool a) { value_assigned= a; }
  };

  /* Base subselect interface class */
diff -rup mysql-5.1.30/sql/sql_cursor.cc mysql-5.1.30.kway/sql/ 
sql_cursor.cc
--- mysql-5.1.30/sql/sql_cursor.cc      2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_cursor.cc 2009-05-08 13:18:54.000000000  
-0700
@@ -706,7 +706,7 @@ bool Select_materialize::send_fields(Lis
  {
    DBUG_ASSERT(table == 0);
    if (create_result_table(unit->thd, unit->get_unit_column_types(),
-                          FALSE, thd->options |  
TMP_TABLE_ALL_COLUMNS, ""))
+                          FALSE, thd->options |  
TMP_TABLE_ALL_COLUMNS, "", FALSE))
      return TRUE;

    materialized_cursor= new (&table->mem_root)
diff -rup mysql-5.1.30/sql/sql_derived.cc mysql-5.1.30.kway/sql/ 
sql_derived.cc
--- mysql-5.1.30/sql/sql_derived.cc     2008-11-14 08:37:19.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_derived.cc        2009-05-08  
13:00:09.000000000 -0700
@@ -168,7 +168,8 @@ bool mysql_derived_prepare(THD *thd, LEX
      */
      if ((res= derived_result->create_result_table(thd, &unit->types,  
FALSE,
                                                   create_options,
-                                                 orig_table_list- 
 >alias)))
+                                                 orig_table_list- 
 >alias,
+                                                 FALSE)))
        goto exit;

      table= derived_result->table;
diff -rup mysql-5.1.30/sql/sql_lex.cc mysql-5.1.30.kway/sql/sql_lex.cc
--- mysql-5.1.30/sql/sql_lex.cc 2008-11-14 08:37:20.000000000 -0800
+++ mysql-5.1.30.kway/sql/sql_lex.cc    2009-06-05 16:06:22.000000000  
-0700
@@ -1516,6 +1516,7 @@ void st_select_lex_unit::init_query()
    fake_select_lex= 0;
    cleaned= 0;
    item_list.empty();
+  kway_select_idx_item_list.empty();
    describe= 0;
    found_rows_for_union= 0;
  }
@@ -1523,6 +1524,8 @@ void st_select_lex_unit::init_query()
  void st_select_lex::init_query()
  {
    st_select_lex_node::init_query();
+  suspended_select_limit_cnt= HA_POS_ERROR;
+  suspended_offset_limit_cnt= 0;
    table_list.empty();
    top_join_list.empty();
    join_list= &top_join_list;
diff -rup mysql-5.1.30/sql/sql_lex.h mysql-5.1.30.kway/sql/sql_lex.h
--- mysql-5.1.30/sql/sql_lex.h  2008-11-14 08:37:20.000000000 -0800
+++ mysql-5.1.30.kway/sql/sql_lex.h     2009-06-03 17:28:14.000000000  
-0700
@@ -481,7 +481,7 @@ protected:
    select_result *result;
    ulonglong found_rows_for_union;
    bool saved_error;
-
+
  public:
    bool  prepared, // prepare phase already performed for UNION (unit)
      optimized, // optimize phase already performed for UNION (unit)
@@ -490,6 +490,8 @@ public:

    // list of fields which points to temporary table for union
    List<Item> item_list;
+  // list of fields which points to temporary table for union,  
including select_idx
+  List<Item> kway_select_idx_item_list;
    /*
      list of types of items inside union (used for union & derived  
tables)

@@ -552,6 +554,7 @@ public:
    void set_limit(st_select_lex *values);
    void set_thd(THD *thd_arg) { thd= thd_arg; }
    inline bool is_union ();
+  inline bool kway_merge ();

    friend void lex_start(THD *thd);
    friend int subselect_union_engine::exec();
@@ -590,6 +593,9 @@ public:
    List<Item_func_match> *ftfunc_list;
    List<Item_func_match> ftfunc_list_alloc;
    JOIN *join; /* after JOIN::prepare it is pointer to corresponding  
JOIN */
+  JOIN *suspended_join; /* after JOIN::exec(true), pointer to  
suspended JOIN */
+  /* after JOIN::exec(true), suspended LIMIT counters */
+  ha_rows suspended_select_limit_cnt, suspended_offset_limit_cnt;
    List<TABLE_LIST> top_join_list; /* join list of the top  
level          */
    List<TABLE_LIST> *join_list;    /* list for the currently parsed  
join  */
    TABLE_LIST *embedding;          /* table embedding to the above  
list   */
diff -rup mysql-5.1.30/sql/sql_select.cc mysql-5.1.30.kway/sql/ 
sql_select.cc
--- mysql-5.1.30/sql/sql_select.cc      2008-11-14 08:37:21.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_select.cc 2009-07-05 19:11:05.000000000  
-0700
@@ -123,7 +123,7 @@ static bool open_tmp_table(TABLE *table)
  static bool create_myisam_tmp_table(TABLE *table,TMP_TABLE_PARAM  
*param,
                                     ulonglong options);
  static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
-                    Procedure *proc);
+                    Procedure *proc,bool suspendable);

  static enum_nested_loop_state
  evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
@@ -1432,7 +1432,7 @@ JOIN::optimize()
                            group_list && simple_group,
                            select_options,
                             tmp_rows_limit,
-                          (char *) "")))
+                          (char *) "", false)))
                 {
        DBUG_RETURN(1);
      }
@@ -1620,6 +1620,11 @@ JOIN::save_join_tab()
  /**
    Exec select.

+  SYNOPSIS
+    exec()
+      suspendable  Pass to final do_select sweep to indicate it  
should not call
+                   sub_select.  Incompatible with cursored select.
+
    @todo
      Note, that create_sort_index calls test_if_skip_sort_order and may
      finally replace sorting with index scan if there is a LIMIT  
clause in
@@ -1627,24 +1632,29 @@ JOIN::save_join_tab()

    @todo
      When can we have here thd->net.report_error not zero?
+
+  RETURN Sending data join that was suspended if suspendable
  */
-void
-JOIN::exec()
+JOIN*
+JOIN::exec(bool suspendable)
  {
+  JOIN *suspended_join= NULL;
    List<Item> *columns_list= &fields_list;
    int      tmp_error;
    DBUG_ENTER("JOIN::exec");

    thd_proc_info(thd, "executing");
    error= 0;
+
    if (procedure)
    {
      procedure_fields_list= fields_list;
+
      if (procedure->change_columns(procedure_fields_list) ||
-       result->prepare(procedure_fields_list, unit))
+        result->prepare(procedure_fields_list, unit))
      {
        thd->limit_found_rows= thd->examined_row_count= 0;
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      }
      columns_list= &procedure_fields_list;
    }
@@ -1688,7 +1698,7 @@ JOIN::exec()
      /* Single select (without union) always returns 0 or 1 row */
      thd->limit_found_rows= send_records;
      thd->examined_row_count= 0;
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }
    /*
      Don't reset the found rows count if there're no tables as
@@ -1706,12 +1716,12 @@ JOIN::exec()
                             select_options,
                             zero_result_cause,
                             having);
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }

    if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
        get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);

    if (select_options & SELECT_DESCRIBE)
    {
@@ -1744,7 +1754,7 @@ JOIN::exec()
                     order != 0 && !skip_sort_order,
                     select_distinct,
                      !tables ? "No tables used" : NullS);
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }

    JOIN *curr_join= this;
@@ -1779,10 +1789,10 @@ JOIN::exec()
      if (!curr_join->sort_and_group &&
          curr_join->const_tables != curr_join->tables)
        curr_join->join_tab[curr_join->const_tables].sorted= 0;
-    if ((tmp_error= do_select(curr_join, (List<Item> *) 0,  
curr_tmp_table, 0)))
+    if ((tmp_error= do_select(curr_join, (List<Item> *) 0,  
curr_tmp_table, 0, false)))
      {
        error= tmp_error;
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      }
      curr_tmp_table->file->info(HA_STATUS_VARIABLE);

@@ -1800,14 +1810,14 @@ JOIN::exec()
         if (change_to_use_tmp_fields(thd, items1,
                                      tmp_fields_list1, tmp_all_fields1,
                                      fields_list.elements, all_fields))
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
        }
        else
        {
         if (change_refs_to_tmp_fields(thd, items1,
                                       tmp_fields_list1,  
tmp_all_fields1,
                                       fields_list.elements,  
all_fields))
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
        }
        curr_join->tmp_all_fields1= tmp_all_fields1;
        curr_join->tmp_fields_list1= tmp_fields_list1;
@@ -1862,7 +1872,7 @@ JOIN::exec()
        /* Free first data from old join */
        curr_join->join_free();
        if (make_simple_join(curr_join, curr_tmp_table))
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        calc_group_buffer(curr_join, group_list);
        count_field_types(select_lex, &curr_join->tmp_table_param,
                         curr_join->tmp_all_fields1,
@@ -1895,8 +1905,8 @@ JOIN::exec()
                                                 !curr_join->group_list,
                                                 1, curr_join- 
 >select_options,
                                                 HA_POS_ERROR,
-                                               (char *) "")))
-         DBUG_VOID_RETURN;
+                                               (char *) "", false)))
+         DBUG_RETURN(suspended_join);
         curr_join->exec_tmp_table2= exec_tmp_table2;
        }
        if (curr_join->group_list)
@@ -1904,13 +1914,13 @@ JOIN::exec()
         thd_proc_info(thd, "Creating sort index");
         if (curr_join->join_tab == join_tab && save_join_tab())
         {
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
         }
         if (create_sort_index(thd, curr_join, curr_join->group_list,
                               HA_POS_ERROR, HA_POS_ERROR, FALSE) ||
             make_group_fields(this, curr_join))
         {
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
         }
          sortorder= curr_join->sortorder;
        }
@@ -1934,17 +1944,17 @@ JOIN::exec()
        }
        if (curr_join->make_sum_func_list(*curr_all_fields,  
*curr_fields_list,
                                         1, TRUE))
-        DBUG_VOID_RETURN;
+        DBUG_RETURN(suspended_join);
        curr_join->group_list= 0;
        if (!curr_join->sort_and_group &&
            curr_join->const_tables != curr_join->tables)
          curr_join->join_tab[curr_join->const_tables].sorted= 0;
        if (setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
           (tmp_error= do_select(curr_join, (List<Item> *) 0,  
curr_tmp_table,
-                               0)))
+                               0, false)))
        {
         error= tmp_error;
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        }
        end_read_record(&curr_join->join_tab->read_record);
        curr_join->const_tables= curr_join->tables; // Mark free for  
cleanup()
@@ -1957,7 +1967,7 @@ JOIN::exec()
         if (change_to_use_tmp_fields(thd, items2,
                                      tmp_fields_list2, tmp_all_fields2,
                                      fields_list.elements,  
tmp_all_fields1))
-         DBUG_VOID_RETURN;
+         DBUG_RETURN(suspended_join);
         curr_join->tmp_fields_list2= tmp_fields_list2;
         curr_join->tmp_all_fields2= tmp_all_fields2;
        }
@@ -1979,13 +1989,13 @@ JOIN::exec()
         curr_join->tmp_having->update_used_tables();
        if (remove_duplicates(curr_join, curr_tmp_table,
                             *curr_fields_list, curr_join->tmp_having))
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        curr_join->tmp_having=0;
        curr_join->select_distinct=0;
      }
      curr_tmp_table->reginfo.lock_type= TL_UNLOCK;
      if (make_simple_join(curr_join, curr_tmp_table))
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      calc_group_buffer(curr_join, curr_join->group_list);
      count_field_types(select_lex, &curr_join->tmp_table_param,
                        *curr_all_fields, 0);
@@ -2000,7 +2010,7 @@ JOIN::exec()
    {
      if (make_group_fields(this, curr_join))
      {
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
      }
      if (!items3)
      {
@@ -2032,7 +2042,7 @@ JOIN::exec()
                                       1, TRUE) ||
          setup_sum_funcs(curr_join->thd, curr_join->sum_funcs) ||
          thd->is_fatal_error)
-      DBUG_VOID_RETURN;
+      DBUG_RETURN(suspended_join);
    }
    if (curr_join->group_list || curr_join->order)
    {
@@ -2055,7 +2065,7 @@ JOIN::exec()
        {
         if (!curr_table->select)
           if (!(curr_table->select= new SQL_SELECT))
-           DBUG_VOID_RETURN;
+           DBUG_RETURN(suspended_join);
         if (!curr_table->select->cond)
           curr_table->select->cond= sort_table_cond;
         else                                    // This should never  
happen
@@ -2063,7 +2073,7 @@ JOIN::exec()
           if (!(curr_table->select->cond=
                 new Item_cond_and(curr_table->select->cond,
                                   sort_table_cond)))
-           DBUG_VOID_RETURN;
+           DBUG_RETURN(suspended_join);
           /*
             Item_cond_and do not need fix_fields for execution, its  
parameters
             are fixed or do not need fix_fields, too
@@ -2111,7 +2121,7 @@ JOIN::exec()
        }
        if (curr_join->join_tab == join_tab && save_join_tab())
        {
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        }
        /*
         Here we sort rows for ORDER BY/GROUP BY clause, if the  
optimiser
@@ -2129,7 +2139,7 @@ JOIN::exec()
                             (select_options & OPTION_FOUND_ROWS ?
                              HA_POS_ERROR : unit->select_limit_cnt),
                              curr_join->group_list ? TRUE : FALSE))
-       DBUG_VOID_RETURN;
+       DBUG_RETURN(suspended_join);
        sortorder= curr_join->sortorder;
        if (curr_join->const_tables != curr_join->tables &&
            !curr_join->join_tab[curr_join->const_tables].table- 
 >sort.io_cache)
@@ -2147,7 +2157,7 @@ JOIN::exec()
    if (thd->is_error())
    {
      error= thd->is_error();
-    DBUG_VOID_RETURN;
+    DBUG_RETURN(suspended_join);
    }
    curr_join->having= curr_join->tmp_having;
    curr_join->fields= curr_fields_list;
@@ -2155,6 +2165,10 @@ JOIN::exec()

    if (is_top_level_join() && thd->cursor && tables != const_tables)
    {
+    /*
+      Shouldn't be trying to do a k-way merge for a cursored result
+    */
+    DBUG_ASSERT(!suspendable);
      /*
        We are here if this is JOIN::exec for the last select of the  
main unit
        and the client requested to open a cursor.
@@ -2172,14 +2186,16 @@ JOIN::exec()
      /* Open cursor for the last join sweep */
      error= thd->cursor->open(this);
    }
-  else
+  else
    {
      thd_proc_info(thd, "Sending data");
      DBUG_PRINT("info", ("%s", thd->proc_info));
      result->send_fields((procedure ? curr_join- 
 >procedure_fields_list :
                           *curr_fields_list),
                          Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
-    error= do_select(curr_join, curr_fields_list, NULL, procedure);
+    error= do_select(curr_join, curr_fields_list, NULL, procedure,  
suspendable);
+    if (error == NESTED_LOOP_SUSPENDED)
+      suspended_join= curr_join;
      thd->limit_found_rows= curr_join->send_records;
    }

@@ -2199,7 +2215,7 @@ JOIN::exec()
         exec_tmp_table1 || exec_tmp_table2))
      set_items_ref_array(items0);

-  DBUG_VOID_RETURN;
+  DBUG_RETURN(suspended_join);
  }


@@ -2358,7 +2374,7 @@ mysql_select(THD *thd, Item ***rref_poin
    if (thd->is_error())
      goto err;

-  join->exec();
+  join->exec(false);

    if (thd->cursor && thd->cursor->is_open())
    {
@@ -3673,6 +3689,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
    KEY_FIELD *key_fields, *end, *field;
    uint sz;
    uint m= max(select_lex->max_equal_elems,1);
+  DBUG_ENTER("update_ref_and_keys");

    /*
      We use the same piece of memory to store both  KEY_FIELD
@@ -3699,7 +3716,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
        (((thd->lex->current_select->cond_count+1)*2 +
         thd->lex->current_select->between_count)*m+1);
    if (!(key_fields=(KEY_FIELD*)        thd->alloc(sz)))
-    return TRUE; /* purecov: inspected */
+    DBUG_RETURN(1); /* purecov: inspected */
    and_level= 0;
    field= end= key_fields;
    *sargables= (SARGABLE_PARAM *) key_fields +
@@ -3708,7 +3725,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
    (*sargables)[0].field= 0;

    if (my_init_dynamic_array(keyuse,sizeof(KEYUSE),20,64))
-    return TRUE;
+    DBUG_RETURN(1);
    if (cond)
    {
      add_key_fields(join_tab->join, &end, &and_level, cond,  
normal_tables,
@@ -3816,7 +3833,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR
      VOID(set_dynamic(keyuse,(uchar*) &key_end,i));
      keyuse->elements=i;
    }
-  return FALSE;
+  DBUG_RETURN(0);
  }

  /**
@@ -9602,7 +9619,7 @@ TABLE *
  create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
                  ORDER *group, bool distinct, bool save_sum_fields,
                  ulonglong select_options, ha_rows rows_limit,
-                char *table_alias)
+                char *table_alias, bool force_btree_key)
  {
    MEM_ROOT *mem_root_save, own_root;
    TABLE *table;
@@ -10120,7 +10137,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
      keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts;
      keyinfo->key_length=0;
      keyinfo->rec_per_key=0;
-    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+    keyinfo->algorithm= force_btree_key ? HA_KEY_ALG_BTREE :  
HA_KEY_ALG_UNDEF;
      keyinfo->name= (char*) "group_key";
      ORDER *cur_group= group;
      for (; cur_group ; cur_group= cur_group->next, key_part_info++)
@@ -10199,8 +10216,21 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
      keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL;
      keyinfo->key_length=(uint16) reclength;
      keyinfo->name= (char*) "distinct_key";
-    keyinfo->algorithm= HA_KEY_ALG_UNDEF;
-    keyinfo->rec_per_key=0;
+    if (force_btree_key)
+    {
+      share->key_info= keyinfo;
+      keyinfo->algorithm= HA_KEY_ALG_BTREE;
+      if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&share->mem_root,
+                                           sizeof(ulong*)*keyinfo- 
 >key_parts)))
+        goto err;
+      bzero((void*) keyinfo->rec_per_key, keyinfo->key_parts *  
sizeof(ulong*));
+      share->keys_in_use.init(1);
+    }
+    else
+    {
+      keyinfo->algorithm= HA_KEY_ALG_UNDEF;
+      keyinfo->rec_per_key=0;
+    }
      if (null_pack_length)
      {
        key_part_info->null_bit=0;
@@ -10234,6 +10264,11 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA
          (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1  
||
          (ha_base_keytype) key_part_info->type ==  
HA_KEYTYPE_VARTEXT2) ?
         0 : FIELDFLAG_BINARY;
+
+      if (force_btree_key)
+      {
+        key_part_info->field->part_of_sortkey.set_bit(0);
+      }
      }
    }

@@ -10739,6 +10774,10 @@ Next_select_func setup_end_select_func(J
  /**
    Make a join of all tables and write it on socket or to table.

+  SYNOPSIS
+    do_select()
+      suspendable  Do not call sub_select.  Incompatible with  
cursored select.
+
    @retval
      0  if ok
    @retval
@@ -10748,7 +10787,11 @@ Next_select_func setup_end_select_func(J
  */

  static int
-do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure  
*procedure)
+do_select(JOIN *join,
+         List<Item> *fields,
+         TABLE *table,
+         Procedure *procedure,
+         bool suspendable)
  {
    int rc= 0;
    enum_nested_loop_state error= NESTED_LOOP_OK;
@@ -10808,6 +10851,10 @@ do_select(JOIN *join,List<Item> *fields,
    else
    {
      DBUG_ASSERT(join->tables);
+    if (suspendable)
+    {
+      DBUG_RETURN(NESTED_LOOP_SUSPENDED);
+    }
      error= sub_select(join,join_tab,0);
      if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)
        error= sub_select(join,join_tab,1);
@@ -11057,9 +11104,11 @@ sub_select(JOIN *join,JOIN_TAB *join_tab

    while (rc == NESTED_LOOP_OK)
    {
+    DBUG_PRINT("info", ("sub_select loop starting with rc=%u error= 
%u", rc, error));
      error= info->read_record(info);
      rc= evaluate_join_record(join, join_tab, error);
    }
+  DBUG_PRINT("info", ("sub_select loop ending with rc=%u error=%u",  
rc, error));

    if (rc == NESTED_LOOP_NO_MORE_ROWS &&
        join_tab->last_inner && !join_tab->found)
@@ -11168,6 +11217,7 @@ evaluate_join_record(JOIN *join, JOIN_TA
        enum enum_nested_loop_state rc;
        /* A match from join_tab is found for the current partial  
join. */
        rc= (*join_tab->next_select)(join, join_tab+1, 0);
+      DBUG_PRINT("info", ("next_select returned %u", rc));
        if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
          return rc;
        if (join->return_tab < join_tab)
diff -rup mysql-5.1.30/sql/sql_select.h mysql-5.1.30.kway/sql/ 
sql_select.h
--- mysql-5.1.30/sql/sql_select.h       2008-11-14 08:37:21.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_select.h  2009-06-04 13:16:45.000000000  
-0700
@@ -122,7 +122,8 @@ enum enum_nested_loop_state
  {
    NESTED_LOOP_KILLED= -2, NESTED_LOOP_ERROR= -1,
    NESTED_LOOP_OK= 0, NESTED_LOOP_NO_MORE_ROWS= 1,
-  NESTED_LOOP_QUERY_LIMIT= 3, NESTED_LOOP_CURSOR_LIMIT= 4
+  NESTED_LOOP_QUERY_LIMIT= 3, NESTED_LOOP_CURSOR_LIMIT= 4,
+  NESTED_LOOP_SUSPENDED= 5
  };


@@ -474,7 +475,7 @@ public:
               SELECT_LEX_UNIT *unit);
    int optimize();
    int reinit();
-  void exec();
+  JOIN *exec(bool suspendable);
    int destroy();
    void restore_tmp();
    bool alloc_func_list();
@@ -537,7 +538,7 @@ bool store_val_in_field(Field *field, It
  TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item>  
&fields,
                         ORDER *group, bool distinct, bool  
save_sum_fields,
                         ulonglong select_options, ha_rows rows_limit,
-                       char* alias);
+                       char* alias, bool force_btree_key);
  void free_tmp_table(THD *thd, TABLE *entry);
  void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
                         List<Item> &fields, bool reset_with_sum_func);
diff -rup mysql-5.1.30/sql/sql_show.cc mysql-5.1.30.kway/sql/sql_show.cc
--- mysql-5.1.30/sql/sql_show.cc        2008-11-14 08:37:22.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_show.cc   2009-06-04 13:21:22.000000000  
-0700
@@ -5558,7 +5558,7 @@ TABLE *create_schema_table(THD *thd, TAB
                                  field_list, (ORDER*) 0, 0, 0,
                                  (select_lex->options | thd->options |
                                   TMP_TABLE_ALL_COLUMNS),
-                                HA_POS_ERROR, table_list->alias)))
+                                HA_POS_ERROR, table_list->alias,  
false)))
      DBUG_RETURN(0);
    my_bitmap_map* bitmaps=
      (my_bitmap_map*) thd->alloc(bitmap_buffer_size(field_count));
diff -rup mysql-5.1.30/sql/sql_union.cc mysql-5.1.30.kway/sql/ 
sql_union.cc
--- mysql-5.1.30/sql/sql_union.cc       2008-11-14 08:37:23.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_union.cc  2009-07-11 14:20:13.000000000  
-0700
@@ -19,10 +19,12 @@
    UNION's  were introduced by Monty and Sinisa <sinisa@stripped>
  */

+#define KWAY_MERGE true

  #include "mysql_priv.h"
  #include "sql_select.h"
  #include "sql_cursor.h"
+#include "procedure_intholder.h"

  bool mysql_union(THD *thd, LEX *lex, select_result *result,
                   SELECT_LEX_UNIT *unit, ulong  
setup_tables_done_option)
@@ -113,15 +115,20 @@ bool select_union::flush()
  bool
  select_union::create_result_table(THD *thd_arg, List<Item>  
*column_types,
                                    bool is_union_distinct, ulonglong  
options,
-                                  const char *alias)
+                                  const char *alias,
+                                  bool kway_merge)
  {
    DBUG_ASSERT(table == 0);
+
    tmp_table_param.init();
    tmp_table_param.field_count= column_types->elements;
-
+
+  if (kway_merge)
+    tmp_table_param.hidden_field_count++;
+
    if (! (table= create_tmp_table(thd_arg, &tmp_table_param,  
*column_types,
                                   (ORDER*) 0, is_union_distinct, 1,
-                                 options, HA_POS_ERROR, (char*)  
alias)))
+                                 options, HA_POS_ERROR, (char*)  
alias, kway_merge)))
      return TRUE;
    table->file->extra(HA_EXTRA_WRITE_CACHE);
    table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
@@ -130,6 +137,20 @@ select_union::create_result_table(THD *t


  /*
+   Merge only makes sense if there's a limited order by on the outer  
select.
+   Only supported for union distinct.
+*/
+inline bool st_select_lex_unit::kway_merge ()
+{
+  return KWAY_MERGE && (first_select()->options &  
SELECT_ASSUME_SORTED_UNION) &&
+    //global_parameters->item_list.elements == 1 && // FIXME:  only  
works for single-column selects
+    global_parameters->order_list.elements == 1 && // FIXME:  only  
works for single-column order by
+    first_select()->select_limit && !found_rows_for_union &&
+    union_distinct && !thd->cursor;
+}
+
+
+/*
    initialization procedures before fake_select_lex preparation()

    SYNOPSIS
@@ -175,7 +196,20 @@ bool st_select_lex_unit::prepare(THD *th
    select_result *tmp_result;
    bool is_union_select;
    TABLE *empty_table= 0;
+  uint select_idx= 0;
+  Item  *kway_select_idx_item = NULL;
+  ORDER kway_select_idx_order;
    DBUG_ENTER("st_select_lex_unit::prepare");
+
+  if (kway_merge())
+  {
+    kway_select_idx_item = new Item_field(&sl->context, NULL, NULL,  
"intholder");
+    kway_select_idx_order.item= &kway_select_idx_item;
+  } else if (first_select()->options & SELECT_ASSUME_SORTED_UNION) {
+    // ignoring option
+    push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,  
ER_WARN_INVALID_SQL_ASSUME_SORTED_UNION,
+                        ER(ER_WARN_INVALID_SQL_ASSUME_SORTED_UNION));
+  }

    describe= test(additional_options & SELECT_DESCRIBE);

@@ -226,7 +260,7 @@ bool st_select_lex_unit::prepare(THD *th
      tmp_result= sel_result;

    sl->context.resolve_in_select_list= TRUE;
-
+
    for (;sl; sl= sl->next_select())
    {
      bool can_skip_order_by;
@@ -247,7 +281,7 @@ bool st_select_lex_unit::prepare(THD *th
      thd_arg->lex->current_select= sl;

      can_skip_order_by= is_union_select && !(sl->braces && sl- 
 >explicit_limit);
-
+
      saved_error= join->prepare(&sl->ref_pointer_array,
                                 (TABLE_LIST*) sl->table_list.first,
                                 sl->with_wild,
@@ -259,7 +293,7 @@ bool st_select_lex_unit::prepare(THD *th
                                 (ORDER*) 0 : (ORDER *)sl- 
 >order_list.first,
                                 (ORDER*) sl->group_list.first,
                                 sl->having,
-                               (is_union_select ? (ORDER*) 0 :
+                               (is_union_select ? (kway_merge() ?  
&kway_select_idx_order : (ORDER*) 0) :
                                  (ORDER*) thd_arg->lex- 
 >proc_list.first),
                                 sl, this);
      /* There are no * in the statement anymore (for PS) */
@@ -268,6 +302,10 @@ bool st_select_lex_unit::prepare(THD *th

      if (saved_error || (saved_error= thd_arg->is_fatal_error))
        goto err;
+
+    if (kway_merge())
+      ((proc_intholder*) join->procedure)->set(select_idx);
+
      /*
        Use items list of underlaid select for derived tables to  
preserve
        information about fields lengths and exact types
@@ -313,6 +351,7 @@ bool st_select_lex_unit::prepare(THD *th
           DBUG_RETURN(TRUE);
        }
      }
+    select_idx++;
    }

    if (is_union_select)
@@ -335,6 +374,11 @@ bool st_select_lex_unit::prepare(THD *th
        }
      }

+    if (kway_merge() && last_procedure->change_columns(types))
+    {
+      goto err;
+    }
+
      create_options= (first_sl->options | thd_arg->options |
                       TMP_TABLE_ALL_COLUMNS);
      /*
@@ -347,7 +391,7 @@ bool st_select_lex_unit::prepare(THD *th
        create_options= create_options | TMP_TABLE_FORCE_MYISAM;

      if (union_result->create_result_table(thd, &types,  
test(union_distinct),
-                                          create_options, ""))
+                                          create_options, "",  
kway_merge()))
        goto err;
      bzero((char*) &result_table_list, sizeof(result_table_list));
      result_table_list.db= (char*) "";
@@ -362,6 +406,15 @@ bool st_select_lex_unit::prepare(THD *th
        arena= thd->activate_stmt_arena_if_needed(&backup_arena);

        saved_error= table->fill_item_list(&item_list);
+
+      // don't include select_idx (single hidden field at start of  
table) in final select,
+      // but do include it in select_idx lookup ones.
+      // FIXME:  only need to select the columns in the ORDER here
+      if (kway_merge())
+      {
+        kway_select_idx_item_list.concat(&item_list);
+        item_list.pop();
+      }

        if (arena)
          thd->restore_active_arena(arena, &backup_arena);
@@ -419,16 +472,40 @@ err:

  bool st_select_lex_unit::exec()
  {
+  uint select_idx= 0;
    SELECT_LEX *lex_select_save= thd->lex->current_select;
-  SELECT_LEX *select_cursor=first_select();
+  SELECT_LEX *select_cursor=first_select(), *select_cursor_2;
    ulonglong add_rows=0;
    ha_rows examined_rows= 0;
+  uint num_selects = 0;
+  MY_BITMAP kway_open_selects;
    DBUG_ENTER("st_select_lex_unit::exec");

    if (executed && !uncacheable && !describe)
      DBUG_RETURN(FALSE);
    executed= 1;
-
+
+  if (kway_merge())
+  {
+    /* allocate bitmap for which joins are left to be merged */
+    my_bitmap_map *kway_open_selects_bitmap= 0;
+    for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+      num_selects++;
+
+    if (!(kway_open_selects_bitmap= (my_bitmap_map*)
+         thd->alloc(bitmap_buffer_size(num_selects))))
+    {
+      mem_alloc_error(bitmap_buffer_size(num_selects));
+      DBUG_RETURN(1);
+    }
+    if (bitmap_init(&kway_open_selects, kway_open_selects_bitmap,
+                   num_selects, FALSE))
+    {
+      mem_alloc_error(num_selects);
+      DBUG_RETURN(1);
+    }
+    bitmap_set_all(&kway_open_selects);
+  }
    if (uncacheable || !item || !item->assigned() || describe)
    {
      if (item)
@@ -478,53 +555,403 @@ bool st_select_lex_unit::exec()
            sl->options & ~OPTION_FOUND_ROWS : sl->options |  
found_rows_for_union;
         saved_error= sl->join->optimize();
        }
-      if (!saved_error)
+      if (saved_error)
+      {
+       /* error readying join, cannot exec */
+       thd->lex->current_select= lex_select_save;
+       DBUG_RETURN(saved_error);
+      }
+      records_at_start= table->file->stats.records;
+      sl->suspended_join = sl->join->exec(kway_merge());
+      if (kway_merge())
+      {
+       DBUG_PRINT("info", ("join->exec set join->error to %u", sl- 
 >join->error));
+       if (sl->join->error == NESTED_LOOP_SUSPENDED)
+       {
+         /* Final join should be suspended at start of Sending data  
state */
+         DBUG_ASSERT(sl->suspended_join);
+         DBUG_ASSERT(sl->suspended_join->tmp_table == NULL);
+         sl->join->error= NESTED_LOOP_OK;
+
+          /* backup limit counters */
+         sl->suspended_select_limit_cnt= select_limit_cnt;
+         sl->suspended_offset_limit_cnt= offset_limit_cnt;
+
+         /* pasted setup from Sensitive_cursor::open */
+         /* set fetch_limit so that next sub_select gets one row past  
offset */
+         sl->suspended_join->fetch_limit= offset_limit_cnt + 1;
+
+         /* Disable JOIN CACHE as it is not working with cursors yet */
+         for (JOIN_TAB *tab= sl->suspended_join->join_tab + sl- 
 >suspended_join->const_tables;
+              tab != sl->suspended_join->join_tab + sl- 
 >suspended_join->tables - 1;
+              tab++)
+         {
+           if (tab->next_select == sub_select_cache)
+             tab->next_select= sub_select;
+         }
+
+         /* FIXME: are these applicable here like they are in  
Sensitive_cursor? */
+         JOIN_TAB *join_tab= sl->suspended_join->join_tab + sl- 
 >suspended_join->const_tables;
+         DBUG_ASSERT(join_tab->table->reginfo.not_exists_optimize ==  
0);
+         DBUG_ASSERT(join_tab->not_used_in_distinct == 0);
+         DBUG_ASSERT(join_tab->table->null_row == 0);
+       }
+       else if (sl->join->error == NESTED_LOOP_OK)
+       {
+         /* was a constant join, so we already did its full send_data  
*/
+         bitmap_clear_bit(&kway_open_selects, select_idx);
+       }
+      }
+
+      saved_error= sl->join->error;
+      /* clean up if we're done, whether normally or from error */
+      if (!kway_merge() || saved_error)
        {
-       records_at_start= table->file->stats.records;
-       sl->join->exec();
-        if (sl == union_distinct)
+       if (sl == union_distinct)
         {
           if (table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL))
             DBUG_RETURN(TRUE);
           table->no_keyread=1;
         }
-       saved_error= sl->join->error;
+       /* i believe this next line is dead code...overwritten by  
set_limit */
         offset_limit_cnt= (ha_rows)(sl->offset_limit ?
-                                    sl->offset_limit->val_uint() :
-                                    0);
-       if (!saved_error)
-       {
-         examined_rows+= thd->examined_row_count;
-         if (union_result->flush())
-         {
-           thd->lex->current_select= lex_select_save;
-           DBUG_RETURN(1);
-         }
-       }
+                                   sl->offset_limit->val_uint() :
+                                   0);
        }
        if (saved_error)
        {
+       /* error in JOIN::exec, abort */
+       // TODO:  k-way cleanup
         thd->lex->current_select= lex_select_save;
         DBUG_RETURN(saved_error);
        }
-      /* Needed for the following test and for records_at_start in  
next loop */
-      int error= table->file->info(HA_STATUS_VARIABLE);
-      if(error)
+      examined_rows+= thd->examined_row_count;
+      if (!kway_merge())
        {
-        table->file->print_error(error, MYF(0));
-        DBUG_RETURN(1);
+       if (union_result->flush())
+       {
+         thd->lex->current_select= lex_select_save;
+         DBUG_RETURN(1);
+       }
+       /* Needed for the following test and for records_at_start in  
next loop */
+       int error= table->file->info(HA_STATUS_VARIABLE);
+       if(error)
+       {
+         table->file->print_error(error, MYF(0));
+         /* Is there a reason not to set
+            thd->lex->current_select= lex_select_save on this error? */
+         DBUG_RETURN(1);
+       }
+
+       if (found_rows_for_union && !sl->braces &&
+           select_limit_cnt != HA_POS_ERROR)
+       {
+         /*
+           This is a union without braces. Remember the number of  
rows that
+           could also have been part of the result set.
+           We get this from the difference of between total number of  
possible
+           rows and actual rows added to the temporary table.
+         */
+         add_rows+= (ulonglong) (thd->limit_found_rows - (ulonglong)
+                                 ((table->file->stats.records -   
records_at_start)));
+       }
        }
-      if (found_rows_for_union && !sl->braces &&
-          select_limit_cnt != HA_POS_ERROR)
+      select_idx++;
+    }
+
+    if (kway_merge())
+    {
+      ha_rows save_select_limit_cnt, save_offset_limit_cnt;
+      JOIN *save_join= NULL, *kway_select_idx_join= NULL;
+      select_singlerow_internal kway_select_idx_result;
+      Item *kway_result_max= NULL;
+
+      /* Remember: no need to send_fields or send_eof on select_union  
result. */
+      thd_proc_info(thd, "Sending data");
+      DBUG_PRINT("info", ("%s", thd->proc_info));
+      enum_nested_loop_state error= NESTED_LOOP_OK;
+
+        // fetch a single unique result from each component
+        select_idx = 0;
+       for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+       {
+      bool first= true; // first try at non-dup in inner loop
+      ha_rows records_at_start= 0;
+
+         DBUG_PRINT("info", ("pre-processing select_idx %u",  
select_idx));
+
+      // process until we find a non-duplicate or we run out
+         while (bitmap_is_set(&kway_open_selects, select_idx) &&
+             (first || records_at_start == table->file->stats.records))
+         {
+        first= false;
+        records_at_start= table->file->stats.records;
+           DBUG_PRINT("info", ("pre-fetching select_idx %u",  
select_idx));
+           DBUG_ASSERT(sl->suspended_join->tables);
+           JOIN_TAB *join_tab= sl->suspended_join->join_tab + sl- 
 >suspended_join->const_tables;
+
+           /* set limit counters */
+           select_limit_cnt= sl->suspended_select_limit_cnt;
+           offset_limit_cnt= sl->suspended_offset_limit_cnt;
+
+           error= sub_select(sl->suspended_join,join_tab,0);
+           if (error == NESTED_LOOP_OK || error ==  
NESTED_LOOP_NO_MORE_ROWS)
+             error= sub_select(sl->suspended_join,join_tab,1);
+           if (error == NESTED_LOOP_QUERY_LIMIT)
+             error= NESTED_LOOP_OK;                    /*  
select_limit used */
+
+           /* backup limit counters */
+           sl->suspended_select_limit_cnt= select_limit_cnt;
+           sl->suspended_offset_limit_cnt= offset_limit_cnt; /*  
unnecessary */
+
+           if (error == NESTED_LOOP_CURSOR_LIMIT)
+           {
+             sl->suspended_join->resume_nested_loop= TRUE;
+             sl->suspended_join->fetch_limit+=1;
+           }
+           else if (error == NESTED_LOOP_OK)
+           {
+             DBUG_PRINT("info",("read all results from select_idx  
%u", select_idx));
+             /* NOTE: not sure if this is safe when others are open */
+             sl->suspended_join->join_free();
+             bitmap_clear_bit(&kway_open_selects, select_idx);
+           }
+           else
+           {
+             DBUG_PRINT("error",("Error: sub_select() failed for  
select_idx %u", select_idx));
+             sl->join->error = -1;
+             bitmap_clear_bit(&kway_open_selects, select_idx);
+
+             /* join_free all that haven't been */
+             select_idx = 0;
+             for (SELECT_LEX *sl2= select_cursor; sl2; sl2= sl2- 
 >next_select())
+             {
+               if (bitmap_is_set(&kway_open_selects, select_idx))
+               {
+                 DBUG_PRINT("info",("aborting select_idx %u",  
select_idx));
+                 sl2->suspended_join->join_free();
+               }
+               select_idx++;
+             }
+
+             thd->lex->current_select= lex_select_save;
+             DBUG_RETURN(-1);
+           }
+
+        /* Needed for the following test and for records_at_start in  
next loop */
+        if (union_result->flush())
+        {
+          thd->lex->current_select= lex_select_save;
+          DBUG_RETURN(1);
+        }
+        int table_error= table->file->info(HA_STATUS_VARIABLE);
+        if(table_error)
+        {
+          table->file->print_error(table_error, MYF(0));
+          DBUG_RETURN(1);
+        }
+         }
+         select_idx++;
+       }
+
+      // main k-way merge loop
+      // we only need select_limit_cnt - 1 fetches since we already  
have the top result from the pre-fetch above
+      for (ha_rows result_idx= 0; result_idx < select_limit_cnt - 1  
&& !bitmap_is_clear_all(&kway_open_selects); result_idx++)
+        {
+            ORDER *kway_order = (ORDER*)global_parameters- 
 >order_list.first; // convenience
+            bool first= true; // first try at non-dup in inner loop
+            COND *kway_result_cond= NULL;
+            char buff[32];
+            String dbug_strbuf(buff, sizeof(buff),  
system_charset_info), *dbug_str= NULL;
+            ha_rows records_at_start= 0;
+
+          // select next select_idx to "emit"
+
+        // is this necessary?
+        init_prepare_fake_select_lex(thd);
+        fake_select_lex->table_list.empty();
+
+        // allocate join ourselves only so that we can set  
no_const_tables on it
+        if (!(kway_select_idx_join= new JOIN(thd,  
kway_select_idx_item_list,
+                                             fake_select_lex- 
 >options, &kway_select_idx_result)))
+        {
+          DBUG_RETURN(TRUE);
+        }
+        kway_select_idx_join->no_const_tables= TRUE;
+
+        // FIXME:  do we need to steal this stuff from  
st_select_lex::init_query()
+        //         so that simplify_joins and  
build_bitmap_for_nested_joins are called?
+//        fake_select_lex->first_execution= 1;
+//        fake_select_lex->first_natural_join_processing= 1;
+//        fake_select_lex->first_cond_optimization= 1;
+
+        // select out select_idx
+        fake_select_lex->item_list= kway_select_idx_item_list;
+        kway_select_idx_result.assigned(0);
+
+        if (result_idx > 0)
+        {
+          // FIXME:  this only works for single-column order by
+          if (kway_order->asc)
+            kway_result_cond = comp_gt_creator(false)- 
 >create(kway_order->item[0], kway_result_max);
+          else
+            kway_result_cond = comp_lt_creator(false)- 
 >create(kway_order->item[0], kway_result_max);
+        }
+
+        save_select_limit_cnt= select_limit_cnt;
+        //save_offset_limit_cnt= offset_limit_cnt;
+        //offset_limit_cnt= result_idx;
+        select_limit_cnt= 1; //+ offset_limit_cnt;
+        save_join= fake_select_lex->join;
+        fake_select_lex->join= kway_select_idx_join;
+        saved_error= mysql_select(thd, &fake_select_lex- 
 >ref_pointer_array,
+                                  &result_table_list,
+                                  0, kway_select_idx_item_list,  
kway_result_cond,
+                                  global_parameters- 
 >order_list.elements,
+                                  kway_order,
+                                  (ORDER*) NULL, NULL, (ORDER*) NULL,
+                                  fake_select_lex->options |  
SELECT_NO_UNLOCK,
+                                  &kway_select_idx_result, this,  
fake_select_lex);
+        fake_select_lex->join= save_join;
+        select_limit_cnt= save_select_limit_cnt;
+        //offset_limit_cnt= save_offset_limit_cnt;
+
+        kway_select_idx_join->destroy();
+        delete kway_select_idx_join;
+        kway_select_idx_join= 0;
+
+        if (saved_error != NESTED_LOOP_OK || ! 
kway_select_idx_result.assigned())
+        {
+          DBUG_PRINT("error",("Error: mysql_select() for select_idx  
failed on result_idx %lu with code %d", (ulong) result_idx,  
saved_error));
+          thd->lex->current_select= lex_select_save;
+             DBUG_RETURN(-1);
+        }
+
+        select_idx= kway_select_idx_result.element_index(0)- 
 >val_uint();
+        DBUG_PRINT("info",("mysql_select() found select_idx %u for  
result_idx %lu of kway merge", select_idx, (ulong) result_idx));
+
+        kway_result_max= kway_select_idx_result.element_index(1);
+        if (dbug_str = kway_result_max->val_str(&dbug_strbuf))
+        {
+          DBUG_PRINT("info",("mysql_select() found kway_result_max %s  
for result_idx %lu of kway merge", dbug_str->c_ptr(), (ulong)  
result_idx));
+        }
+
+        // select the next result for the select_idx we just emitted  
a result from
+          select_cursor_2= select_cursor;
+          for (uint i = 0; i != select_idx; i++)
+          {
+            select_cursor_2= select_cursor_2->next_select();
+          }
+
+          // FIXME:  this is just duplicated from above, encapsulate me
+          // process until we find a non-duplicate or we run out
+            while (bitmap_is_set(&kway_open_selects, select_idx) &&
+                   (first || records_at_start == table->file- 
 >stats.records))
+            {
+              first= false;
+              records_at_start= table->file->stats.records;
+              DBUG_PRINT("info", ("fetching select_idx %u",  
select_idx));
+              DBUG_ASSERT(select_cursor_2->suspended_join->tables);
+              JOIN_TAB *join_tab= select_cursor_2->suspended_join- 
 >join_tab + select_cursor_2->suspended_join->const_tables;
+
+              /* set limit counters */
+              select_limit_cnt= select_cursor_2- 
 >suspended_select_limit_cnt;
+              offset_limit_cnt= select_cursor_2- 
 >suspended_offset_limit_cnt;
+
+              error= sub_select(select_cursor_2- 
 >suspended_join,join_tab,0);
+              if (error == NESTED_LOOP_OK || error ==  
NESTED_LOOP_NO_MORE_ROWS)
+                error= sub_select(select_cursor_2- 
 >suspended_join,join_tab,1);
+              if (error == NESTED_LOOP_QUERY_LIMIT)
+                error= NESTED_LOOP_OK;                    /*  
select_limit used */
+
+              /* backup limit counters */
+              select_cursor_2->suspended_select_limit_cnt=  
select_limit_cnt;
+              select_cursor_2->suspended_offset_limit_cnt=  
offset_limit_cnt; /* unnecessary */
+
+              if (error == NESTED_LOOP_CURSOR_LIMIT)
+              {
+                select_cursor_2->suspended_join->resume_nested_loop=  
TRUE;
+                select_cursor_2->suspended_join->fetch_limit+=1;
+              }
+              else if (error == NESTED_LOOP_OK)
+              {
+                DBUG_PRINT("info",("read all results from select_idx  
%u", select_idx));
+                /* NOTE: not sure if this is safe when others are  
open */
+                select_cursor_2->suspended_join->join_free();
+                bitmap_clear_bit(&kway_open_selects, select_idx);
+              }
+              else
+              {
+                DBUG_PRINT("error",("Error: sub_select() failed for  
select_idx %u", select_idx));
+                select_cursor_2->join->error = -1;
+                bitmap_clear_bit(&kway_open_selects, select_idx);
+
+                /* join_free all that haven't been */
+                select_idx = 0;
+                for (SELECT_LEX *sl2= select_cursor; sl2; sl2= sl2- 
 >next_select())
+                {
+                  if (bitmap_is_set(&kway_open_selects, select_idx))
+                  {
+                    DBUG_PRINT("info",("aborting select_idx %u",  
select_idx));
+                    sl2->suspended_join->join_free();
+                  }
+                  select_idx++;
+                }
+
+                thd->lex->current_select= lex_select_save;
+                DBUG_RETURN(-1);
+              }
+
+              /* Needed for the following test and for  
records_at_start in next loop */
+              if (union_result->flush())
+              {
+                thd->lex->current_select= lex_select_save;
+                DBUG_RETURN(1);
+              }
+              int table_error= table->file->info(HA_STATUS_VARIABLE);
+              if(table_error)
+              {
+                table->file->print_error(table_error, MYF(0));
+                DBUG_RETURN(1);
+              }
+            }
+      }
+
+      // FIXME:  encapsulate
+      /* join_free all that haven't been */
+      select_idx = 0;
+      for (SELECT_LEX *sl2= select_cursor; sl2; sl2= sl2- 
 >next_select())
        {
-       /*
-         This is a union without braces. Remember the number of rows  
that
-         could also have been part of the result set.
-         We get this from the difference of between total number of  
possible
-         rows and actual rows added to the temporary table.
-       */
-       add_rows+= (ulonglong) (thd->limit_found_rows - (ulonglong)
-                             ((table->file->stats.records -   
records_at_start)));
+        if (bitmap_is_set(&kway_open_selects, select_idx))
+        {
+          DBUG_PRINT("info",("freeing select_idx %u", select_idx));
+          sl2->suspended_join->join_free();
+        }
+        select_idx++;
+      }
+
+      select_idx= 0;
+      for (SELECT_LEX *sl= select_cursor; sl; sl= sl->next_select())
+      {
+       DBUG_PRINT("info",("%ld records output by select_idx %u",  
(long) sl->suspended_join ? sl->suspended_join->send_records : sl- 
 >join->send_records, select_idx));
+       /* unwrapped cleanup from JOIN::exec after it would have  
called do_select */
+
+       /* Accumulate the counts from all join iterations of all join  
parts. */
+       if (sl->suspended_join)
+         examined_rows+= sl->suspended_join->examined_rows;
+       DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
+                             (ulong) thd->examined_row_count));
+       /* FIXME:  this doesn't really make sense here, don't try to  
handle union all */
+//     if (sl == union_distinct)
+//     {
+//       if (table->file->ha_disable_indexes(HA_KEY_SWITCH_ALL))
+//       {
+//         DBUG_RETURN(TRUE);
+//       }
+//       table->no_keyread=1;
+//     }
+
+       select_idx++;
        }
      }
    }
@@ -600,7 +1027,7 @@ bool st_select_lex_unit::exec()
          {
            join->examined_rows= 0;
            saved_error= join->reinit();
-          join->exec();
+          join->exec(false);
          }
        }

diff -rup mysql-5.1.30/sql/sql_update.cc mysql-5.1.30.kway/sql/ 
sql_update.cc
--- mysql-5.1.30/sql/sql_update.cc      2008-11-14 08:37:23.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_update.cc 2009-06-04 13:18:30.000000000  
-0700
@@ -1536,7 +1536,7 @@ multi_update::initialize_tables(JOIN *jo
                                            (ORDER*) &group, 0, 0,
                                            TMP_TABLE_ALL_COLUMNS,
                                            HA_POS_ERROR,
-                                          (char *) "")))
+                                          (char *) "", false)))
        DBUG_RETURN(1);
      tmp_tables[cnt]->file->extra(HA_EXTRA_WRITE_CACHE);
    }
diff -rup mysql-5.1.30/sql/sql_yacc.yy mysql-5.1.30.kway/sql/sql_yacc.yy
--- mysql-5.1.30/sql/sql_yacc.yy        2008-11-14 08:37:23.000000000  
-0800
+++ mysql-5.1.30.kway/sql/sql_yacc.yy   2009-04-13 19:58:17.000000000  
-0700
@@ -998,6 +998,7 @@ bool my_yyoverflow(short **a, YYSTYPE **
  %token  SQL_SMALL_RESULT
  %token  SQL_SYM                       /* SQL-2003-R */
  %token  SQL_THREAD
+%token  SQL_ASSUME_SORTED_UNION
  %token  SSL_SYM
  %token  STARTING
  %token  STARTS_SYM
@@ -6494,6 +6495,18 @@ select_option:
          | DISTINCT         { Select->options|= SELECT_DISTINCT; }
          | SQL_SMALL_RESULT { Select->options|= SELECT_SMALL_RESULT; }
          | SQL_BIG_RESULT   { Select->options|= SELECT_BIG_RESULT; }
+        | SQL_ASSUME_SORTED_UNION
+          {
+           /* assertions
+              1. this is a union:  code below segfaults, too early to  
check this?
+              2. all are ordered:  all have order_list.elements?
+              3. all order_lists are equivalent?
+
+            if (!Lex->current_select->master_unit()->is_union())
+              MYSQL_YYABORT;
+                   */
+            Select->options|= SELECT_ASSUME_SORTED_UNION;
+          }
          | SQL_BUFFER_RESULT
            {
              if (check_simple_select())

(ej@lakitu) ~> cat mysql-5.1.30.patched//sql-bench/test-kway-merge.sh
#!@PERL@
# Copyright (C) 2000-2001, 2003 MySQL AB
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; version 2
# of the License.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA
#
# Test of k-way merge in union
#
##################### Standard benchmark inits  
##############################

use Cwd;
use DBI;
use Getopt::Long;
use Benchmark;

# fix sequence
srand(0);

$opt_loop_count=1000;
$opt_small_loop_count=100;
$opt_unions=100;
$opt_limit=100;

$pwd = cwd(); $pwd = "." if ($pwd eq '');
require "$pwd/bench-init.pl" || die "Can't read Configuration file: $! 
\n";

$opt_unions=min($opt_unions,($limits->{'query_size'}-50)/24);

if ($opt_small_test)
{
   $opt_loop_count/=10;
   $opt_small_loop_count/=10;
   $opt_unions/=10;
   $opt_limit/=10;
}

print "Testing the speed of k-way merge vs traditional union\n";
print "The $opt_union test-tables have about " . ($opt_loop_count/10 +  
$opt_loop_count) . " rows each, queries have $opt_unions unions, and  
request a limit of $opt_limit results.  They will be run in a loop  
$opt_small_loop_count times.\n\n";

####
####  Connect and start timeing
####

$dbh = $server->connect();
$start_time=new Benchmark;

####
#### Create needed tables
####

goto select_test if ($opt_skip_create);

print "Creating tables\n";

for ($i = 0; $i < $opt_unions; $i++) {
   $dbh->do("drop table bench$i" . $server->{'drop_attr'});

   do_many($dbh,$server->create("bench$i",
                              ["id int NOT NULL",
                               "i1 int NOT NULL",
                               "i2 int NOT NULL",
                               "i3 int NOT NULL",
                               "vc1 varchar(150) NOT NULL",
                               "vc2 varchar(100) NOT NULL",
                               "vc3 varchar(50) NOT NULL"],
                              ["primary key (id)",
                               "index (i1)",
                               "index (i2)"]));
}

if ($opt_lock_tables)
{
   $query = "LOCK TABLES ";
   for ($i = 0; $i < $opt_unions; $i++) {
     $query .= "bench$i WRITE";
     $query .= ", " if ($i < $opt_unions - 1);
   }
   do_query($dbh,$query);
}

if ($opt_fast && defined($server->{vacuum}))
{
   $server->vacuum(1,\$dbh);
}

####
#### Insert $opt_loop_count records with
#### id:        distributed values 0 - > $opt_loop_count/100
####

print "Inserting " . ($opt_loop_count/10) . " rows\n";

$loop_time=new Benchmark;

if ($opt_fast && $server->{transactions})
{
   $dbh->{AutoCommit} = 0;
}

for ($i = 0; $i < $opt_unions; $i++) {
   $query="insert ignore into bench$i values";
   for ($j=0; $j < $opt_loop_count ; $j++)
   {
     $id = $i1 = $i2 = $i3 = int(rand($opt_loop_count/10)) + 1;
     $vc1 = "repeat('$id',75)";
     $vc2 = "repeat('$id',50)";
     $vc3 = "repeat('$id',25)";
     do_query($dbh,"$query ($id,$i1,$i2,$i3,$vc1,$vc2,$vc3)");
   }
}

if ($opt_fast && $server->{transactions})
{
   $dbh->commit;
   $dbh->{AutoCommit} = 1;
}

$end_time=new Benchmark;
print "Time to insert ($opt_loop_count): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n\n";

if ($opt_lock_tables)
{
   do_query($dbh,"UNLOCK TABLES");
}

if ($opt_fast && defined($server->{vacuum}))
{
   for ($i = 0; $i < $opt_unions; $i++) {
     $server->vacuum(0,\$dbh,"bench$i");
   }
}

if ($opt_lock_tables)
{
   $query = "LOCK TABLES ";
   for ($i = 0; $i < $opt_unions; $i++) {
     $query .= "bench$i WRITE";
     $query .= ", " if ($i < $opt_unions - 1);
   }
   do_query($dbh,$query);
}


####
#### Do some selects on the tables
####

select_test:

if ($opt_fast) {
print "Testing unions with k-way merge descending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= ($j == 0 ? "(select SQL_ASSUME_SORTED_UNION " :  
"(select ");
     $query .= "* from bench$j order by id desc limit $opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id desc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_with_kway_desc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";
}

print "Testing unions without k-way merge descending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= "(select * from bench$j order by id desc limit  
$opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id desc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_without_kway_desc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";

####
#### Insert $opt_loop_count records with
#### id:        distributed values 0 - > $opt_loop_count
####

print "\nInserting about $opt_loop_count more rows\n";

$loop_time=new Benchmark;

if ($opt_fast && $server->{transactions})
{
   $dbh->{AutoCommit} = 0;
}

for ($i = 0; $i < $opt_unions; $i++) {
   $query="insert ignore into bench$i values";
   for ($j=0; $j < $opt_loop_count ; $j++)
   {
     $id = $i1 = $i2 = $i3 = int(rand($opt_loop_count)) + 1;
     $vc1 = "repeat('$id',75)";
     $vc2 = "repeat('$id',50)";
     $vc3 = "repeat('$id',25)";
     do_query($dbh,"$query ($id,$i1,$i2,$i3,$vc1,$vc2,$vc3)");
   }
}

if ($opt_fast && $server->{transactions})
{
   $dbh->commit;
   $dbh->{AutoCommit} = 1;
}

$end_time=new Benchmark;
print "Time to insert again ($opt_loop_count): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n\n";

print "Testing unions without k-way merge ascending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= "(select * from bench$j order by id asc limit  
$opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id asc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_without_kway_asc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";

if ($opt_fast) {
print "Testing unions with k-way merge ascending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= ($j == 0 ? "(select SQL_ASSUME_SORTED_UNION " :  
"(select ");
     $query .= "* from bench$j order by id asc limit $opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id asc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_with_kway_asc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";
}

####
#### End of benchmark
####

if ($opt_lock_tables)
{
   do_query($dbh,"UNLOCK TABLES");
}
if (!$opt_skip_delete)
{
   for ($i = 0; $i < $opt_unions; $i++) {
     do_query($dbh,"drop table bench$i" . $server->{'drop_attr'});
   }
}

if ($opt_fast && defined($server->{vacuum}))
{
   $server->vacuum(0,\$dbh);
}

$dbh->disconnect;                               # close connection

end_benchmark($start_time);

Thread
proposed design for UNION Order By optimizationEric Jensen13 Mar
  • RE: proposed design for UNION Order By optimizationRick James13 Mar
  • Re: proposed design for UNION Order By optimizationEric Jensen6 Jun
    • RE: proposed design for UNION Order By optimizationRick James6 Jun
    • Re: proposed design for UNION Order By optimizationEric Jensen7 Jun
    • Re: proposed design for UNION Order By optimizationEric Jensen5 Jul
      • Re: proposed design for UNION Order By optimizationEric Jensen5 Jul
        • Re: proposed design for UNION Order By optimizationEric Jensen6 Jul
Re: proposed design for UNION Order By optimizationEric Jensen6 Jun
Re: proposed design for UNION Order By optimizationEric Jensen12 Jul