List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:November 16 2008 9:46pm
Subject:bzr commit into mysql-6.0-opt-subqueries branch (sergefp:2720)
View as plain text  
#At file:///home/spetrunia/dev/mysql-6.0-exists2in/

 2720 Sergey Petrunia	2008-11-17 [merge]
      mysql-6.0-opt-subqueries -> mysql-6.0-exists2in merge
modified:
  mysql-test/r/subselect3.result
  mysql-test/t/subselect3.test
  sql/item_cmpfunc.h
  sql/mysql_priv.h
  sql/mysqld.cc
  sql/sql_select.cc

=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result	2008-10-30 17:29:28 +0000
+++ b/mysql-test/r/subselect3.result	2008-11-16 19:47:47 +0000
@@ -1,4 +1,4 @@
-drop table if exists t0, t1, t2, t3, t4, t5;
+drop table if exists t0, t1, t2, t3, t4, t5, t11, t12, t21, t22;
 create table t1 (oref int, grp int, ie int) ;
 insert into t1 (oref, grp, ie) values
 (1, 1, 1),
@@ -822,3 +822,58 @@ t1.a < (select t4.a+10                  
 from t4, t5 limit 2));
 ERROR 21000: Subquery returns more than 1 row
 drop table t0, t1, t2, t3, t4, t5;
+CREATE TABLE t1 (
+a int(11) NOT NULL,
+b int(11) NOT NULL,
+c datetime default NULL,
+PRIMARY KEY  (a),
+KEY idx_bc (b,c)
+);
+INSERT INTO t1 VALUES 
+(406989,67,'2006-02-23 17:08:46'), (150078,67,'2005-10-26 11:17:45'),
+(406993,67,'2006-02-27 11:20:57'), (245655,67,'2005-12-08 15:59:08'),
+(406994,67,'2006-02-27 11:26:46'), (256,67,NULL),
+(398341,67,'2006-02-20 04:48:44'), (254,67,NULL),(1120,67,NULL),
+(406988,67,'2006-02-23 17:07:22'), (255,67,NULL),
+(398340,67,'2006-02-20 04:38:53'),(406631,67,'2006-02-23 10:49:42'),
+(245653,67,'2005-12-08 15:59:07'),(406992,67,'2006-02-24 16:47:18'),
+(245654,67,'2005-12-08 15:59:08'),(406995,67,'2006-02-28 11:55:00'),
+(127261,67,'2005-10-13 12:17:58'),(406991,67,'2006-02-24 16:42:32'),
+(245652,67,'2005-12-08 15:58:27'),(398545,67,'2006-02-20 04:53:13'),
+(154504,67,'2005-10-28 11:53:01'),(9199,67,NULL),(1,67,'2006-02-23 15:01:35'),
+(223456,67,NULL),(4101,67,NULL),(1133,67,NULL),
+(406990,67,'2006-02-23 18:01:45'),(148815,67,'2005-10-25 15:34:17'),
+(148812,67,'2005-10-25 15:30:01'),(245651,67,'2005-12-08 15:58:27'),
+(154503,67,'2005-10-28 11:52:38');
+create table t11 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 asc;
+create table t12 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 desc;
+create table t21 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 asc;
+create table t22 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 desc;
+update t22 set c = '2005-12-08 15:58:27' where a = 255;
+explain select t21.* from t21,t22 where t21.a = t22.a and 
+t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t11	ALL	NULL	NULL	NULL	NULL	8	Using where; Using temporary; Using filesort; Start materialize; Scan
+1	PRIMARY	t12	ALL	NULL	NULL	NULL	NULL	8	Using where; End materialize; Using join buffer
+1	PRIMARY	t21	ALL	NULL	NULL	NULL	NULL	26	Using where; Using join buffer
+1	PRIMARY	t22	ALL	NULL	NULL	NULL	NULL	32	Using where; Using join buffer
+select t21.* from t21,t22 where t21.a = t22.a and 
+t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
+a	b	c
+256	67	NULL
+drop table t1, t11, t12, t21, t22;
+create table t1(a int);
+insert into t1 values (0),(1);
+set @@optimizer_switch='no_firstmatch';
+explain 
+select (select max(Y.a) from t1 Y where a in (select a from t1 Z) and a < X.a) as subq from t1 X;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	X	ALL	NULL	NULL	NULL	NULL	2	
+2	DEPENDENT SUBQUERY	Y	ALL	NULL	NULL	NULL	NULL	2	Using where
+2	DEPENDENT SUBQUERY	Z	ALL	NULL	NULL	NULL	NULL	2	Materialize
+select (select max(Y.a) from t1 Y where a in (select a from t1 Z) and a < X.a) as subq from t1 X;
+subq
+NULL
+0
+set @@optimizer_switch='';
+drop table t1;

=== modified file 'mysql-test/t/subselect3.test'
--- a/mysql-test/t/subselect3.test	2008-10-30 17:29:28 +0000
+++ b/mysql-test/t/subselect3.test	2008-11-16 19:47:47 +0000
@@ -1,5 +1,5 @@
 --disable_warnings
-drop table if exists t0, t1, t2, t3, t4, t5;
+drop table if exists t0, t1, t2, t3, t4, t5, t11, t12, t21, t22;
 --enable_warnings
 
 #
@@ -678,3 +678,64 @@ where
                                   from t4, t5 limit 2));
 
 drop table t0, t1, t2, t3, t4, t5;
+
+#
+# Test for the problem with using sj-materialization when subquery's select 
+# list element SCOL is covered by equality propagation and has preceding equal
+# column PCOL which belongs to a table within the the semi-join nest: SJM-Scan
+# process should unpack column value not to SCOL but rather to PCOL, as 
+# substitute_best_equal has made all conditions to refer to PCOL.
+#
+CREATE TABLE t1 (
+  a int(11) NOT NULL,
+  b int(11) NOT NULL,
+  c datetime default NULL,
+  PRIMARY KEY  (a),
+  KEY idx_bc (b,c)
+);
+
+INSERT INTO t1 VALUES 
+(406989,67,'2006-02-23 17:08:46'), (150078,67,'2005-10-26 11:17:45'),
+(406993,67,'2006-02-27 11:20:57'), (245655,67,'2005-12-08 15:59:08'),
+(406994,67,'2006-02-27 11:26:46'), (256,67,NULL),
+(398341,67,'2006-02-20 04:48:44'), (254,67,NULL),(1120,67,NULL),
+(406988,67,'2006-02-23 17:07:22'), (255,67,NULL),
+(398340,67,'2006-02-20 04:38:53'),(406631,67,'2006-02-23 10:49:42'),
+(245653,67,'2005-12-08 15:59:07'),(406992,67,'2006-02-24 16:47:18'),
+(245654,67,'2005-12-08 15:59:08'),(406995,67,'2006-02-28 11:55:00'),
+(127261,67,'2005-10-13 12:17:58'),(406991,67,'2006-02-24 16:42:32'),
+(245652,67,'2005-12-08 15:58:27'),(398545,67,'2006-02-20 04:53:13'),
+(154504,67,'2005-10-28 11:53:01'),(9199,67,NULL),(1,67,'2006-02-23 15:01:35'),
+(223456,67,NULL),(4101,67,NULL),(1133,67,NULL),
+(406990,67,'2006-02-23 18:01:45'),(148815,67,'2005-10-25 15:34:17'),
+(148812,67,'2005-10-25 15:30:01'),(245651,67,'2005-12-08 15:58:27'),
+(154503,67,'2005-10-28 11:52:38');
+
+create table t11 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 asc;
+create table t12 select * from t1 where b = 67 AND (c IS NULL OR c > NOW()) order by 3 desc;
+create table t21 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 asc;
+create table t22 select * from t1 where b = 67 AND (c IS NULL OR c > '2005-12-08') order by 3 desc;
+
+update t22 set c = '2005-12-08 15:58:27' where a = 255;
+explain select t21.* from t21,t22 where t21.a = t22.a and 
+t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
+select t21.* from t21,t22 where t21.a = t22.a and 
+t22.a in (select t12.a from t11, t12 where t11.a in(255,256) and t11.a = t12.a and t11.c is null) and t22.c is null order by t21.a;
+
+drop table t1, t11, t12, t21, t22;
+
+#
+# Test sj-materialization re-execution. The test isn't meaningful (materialized
+# table stays the same across all executions) because it's hard to create a
+# dataset that would verify correct re-execution without hitting BUG#31480
+# 
+create table t1(a int);
+insert into t1 values (0),(1);
+
+set @@optimizer_switch='no_firstmatch';
+explain 
+select (select max(Y.a) from t1 Y where a in (select a from t1 Z) and a < X.a) as subq from t1 X;
+select (select max(Y.a) from t1 Y where a in (select a from t1 Z) and a < X.a) as subq from t1 X;
+set @@optimizer_switch='';
+
+drop table t1;

=== modified file 'sql/item_cmpfunc.h'
--- a/sql/item_cmpfunc.h	2008-10-29 20:35:16 +0000
+++ b/sql/item_cmpfunc.h	2008-11-16 15:19:48 +0000
@@ -1572,6 +1572,7 @@ public:
   for them. We have to take care of restricting the predicate such an
   object represents f1=f2= ...=fn to the projection of known fields fi1=...=fik.
 */
+struct st_join_table;
 
 class Item_equal: public Item_bool_func
 {
@@ -1610,6 +1611,7 @@ public:
   { return fields.head()->collation.collation; }
   friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
                            Item_equal *item_equal);
+  friend bool setup_sj_materialization(struct st_join_table *tab);
 }; 
 
 class COND_EQUAL: public Sql_alloc

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2008-11-09 17:13:57 +0000
+++ b/sql/mysql_priv.h	2008-11-16 15:19:48 +0000
@@ -581,7 +581,7 @@ enum open_table_mode
 #define OPTIMIZER_SWITCH_NO_MATERIALIZATION 1
 #define OPTIMIZER_SWITCH_NO_SEMIJOIN 2
 #define OPTIMIZER_SWITCH_NO_LOOSE_SCAN 4
-
+#define OPTIMIZER_SWITCH_NO_FIRSTMATCH 8
 
 /*
   Replication uses 8 bytes to store SQL_MODE in the binary log. The day you

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2008-10-30 17:29:28 +0000
+++ b/sql/mysqld.cc	2008-11-16 15:19:48 +0000
@@ -333,7 +333,7 @@ TYPELIB sql_mode_typelib= { array_elemen
 
 static const char *optimizer_switch_names[]=
 {
-  "no_materialization", "no_semijoin", "no_loosescan",
+  "no_materialization", "no_semijoin", "no_loosescan", "no_firstmatch",
   NullS
 };
 
@@ -343,6 +343,7 @@ static const unsigned int optimizer_swit
   /*no_materialization*/          18,
   /*no_semijoin*/                 11,
   /*no_loosescan*/                12,
+  /*no_firstmatch*/               13
 };
 
 TYPELIB optimizer_switch_typelib= { array_elements(optimizer_switch_names)-1,"",

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2008-11-14 09:46:50 +0000
+++ b/sql/sql_select.cc	2008-11-16 21:46:03 +0000
@@ -270,6 +270,14 @@ bool subquery_types_allow_materializatio
 int do_sj_reset(SJ_TMP_TABLE *sj_tbl);
 TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
                                           SJ_TMP_TABLE *sjtbl);
+inline bool optimizer_flag(THD *thd, uint flag)
+{ 
+  return (thd->variables.optimizer_switch & flag);
+}
+
+Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
+                            bool *inherited_fl);
+
 
 /**
   This handles SELECT with and without UNION.
@@ -572,8 +580,6 @@ JOIN::prepare(Item ***rref_pointer_array
   if (!thd->lex->view_prepare_mode &&                    // (1)
       (subselect= select_lex->master_unit()->item))      // (2)
   {
-    bool do_semijoin= !test(thd->variables.optimizer_switch &
-                            OPTIMIZER_SWITCH_NO_SEMIJOIN);
     bool do_materialize= !test(thd->variables.optimizer_switch &
                                OPTIMIZER_SWITCH_NO_MATERIALIZATION);
 
@@ -614,7 +620,7 @@ JOIN::prepare(Item ***rref_pointer_array
         8. No execution method was already chosen (by a prepared statement)
         9. Parent select is not a confluent table-less select
     */
-    if (do_semijoin &&
+    if (!optimizer_flag(thd, OPTIMIZER_SWITCH_NO_SEMIJOIN) &&
         item_subs &&                                                  // 1
         !select_lex->is_part_of_union() &&                            // 2
         !select_lex->group_list.elements && !order &&                 // 3
@@ -706,7 +712,7 @@ JOIN::prepare(Item ***rref_pointer_array
         perform the whole transformation or only that part of it which wraps
         Item_in_subselect in an Item_in_optimizer.
       */
-      if (do_materialize && 
+      if (!optimizer_flag(thd, OPTIMIZER_SWITCH_NO_MATERIALIZATION)  && 
           subselect->substype() == Item_subselect::IN_SUBS &&           // 1
           !select_lex->is_part_of_union() &&                            // 2
           select_lex->master_unit()->first_select()->leaf_tables &&     // 3
@@ -1210,19 +1216,12 @@ static bool sj_table_is_included(JOIN *j
 
       (4) - THe suffix of outer and outer non-correlated tables.
 
-    If several strategies are applicable, their relative priorities are:
-      1. LooseScan
-      2. FirstMatch 
-      3. DuplicateWeedout
-
-    This function walks over the join order and sets up the strategies by
-    setting appropriate members in join_tab structures.
   
-  Optimizer
-  =========
-  We have the choice made for us by the join optimizer. The optimizer
-  guarantees that applicability conditions for loosescan
-
+  The choice between the strategies is made by the join optimizer (see
+  advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()).
+  This function sets up all fields/structures/etc needed for execution except
+  for setup/initialization of semi-join materialization which is done in 
+  setup_sj_materialization() (todo: can't we move that to here also?)
     A "trivially-correlated subquery" is defined as a subquery used in an
     IN/=ANY or EXISTS predicate on the form:
       (SELECT select-list
@@ -4646,100 +4645,103 @@ static bool optimize_semijoin_nests(JOIN
   DBUG_ENTER("optimize_semijoin_nests");
   List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
   TABLE_LIST *sj_nest;
-  while ((sj_nest= sj_list_it++))
+  if (!optimizer_flag(join->thd, OPTIMIZER_SWITCH_NO_MATERIALIZATION))
   {
-    sj_nest->sj_mat_info= NULL;
-    if (sj_nest->sj_inner_tables && /* not everything was pulled out */
-        !sj_nest->sj_subq_pred->is_correlated && 
-         sj_nest->sj_subq_pred->types_allow_materialization)
+    while ((sj_nest= sj_list_it++))
     {
-      join->emb_sjm_nest= sj_nest;
-      if (choose_plan(join, all_table_map))
-        DBUG_RETURN(TRUE);
-      /*
-        The best plan to run the subquery is now in join->best_positions,
-        save it.
-      */
-      uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
-      SJ_MATERIALIZATION_INFO* sjm;
-      if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
-          !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
-                                                        n_tables)))
-        DBUG_RETURN(TRUE);
-      sjm->tables= n_tables;
-      sjm->is_used= FALSE;
-      double subjoin_out_rows, subjoin_read_time;
-      get_partial_join_cost(join, n_tables,
-                            &subjoin_read_time, &subjoin_out_rows);
+      sj_nest->sj_mat_info= NULL;
+      if (sj_nest->sj_inner_tables && /* not everything was pulled out */
+          !sj_nest->sj_subq_pred->is_correlated && 
+           sj_nest->sj_subq_pred->types_allow_materialization)
+      {
+        join->emb_sjm_nest= sj_nest;
+        if (choose_plan(join, all_table_map))
+          DBUG_RETURN(TRUE);
+        /*
+          The best plan to run the subquery is now in join->best_positions,
+          save it.
+        */
+        uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
+        SJ_MATERIALIZATION_INFO* sjm;
+        if (!(sjm= new SJ_MATERIALIZATION_INFO) ||
+            !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
+                                                          n_tables)))
+          DBUG_RETURN(TRUE);
+        sjm->tables= n_tables;
+        sjm->is_used= FALSE;
+        double subjoin_out_rows, subjoin_read_time;
+        get_partial_join_cost(join, n_tables,
+                              &subjoin_read_time, &subjoin_out_rows);
 
-      sjm->materialization_cost.convert_from_cost(subjoin_read_time);
-      sjm->rows= subjoin_out_rows;
+        sjm->materialization_cost.convert_from_cost(subjoin_read_time);
+        sjm->rows= subjoin_out_rows;
 
-      List<Item> &right_expr_list= 
-        sj_nest->sj_subq_pred->unit->first_select()->item_list;
-      /*
-        Adjust output cardinality estimates. If the subquery has form
+        List<Item> &right_expr_list= 
+          sj_nest->sj_subq_pred->unit->first_select()->item_list;
+        /*
+          Adjust output cardinality estimates. If the subquery has form
 
-         ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
+           ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
 
-         then the number of distinct output record combinations has an
-         upper bound of product of number of records matching the tables 
-         that are used by the SELECT clause.
-         TODO:
-           We can get a more precise estimate if we
-            - use rec_per_key cardinality estimates. For simple cases like 
-              "oe IN (SELECT t.key ...)" it is trivial. 
-            - Functional dependencies between the tables in the semi-join
-              nest (the payoff is probably less here?)
-      */
-      {
-        for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
+           then the number of distinct output record combinations has an
+           upper bound of product of number of records matching the tables 
+           that are used by the SELECT clause.
+           TODO:
+             We can get a more precise estimate if we
+              - use rec_per_key cardinality estimates. For simple cases like 
+                "oe IN (SELECT t.key ...)" it is trivial. 
+              - Functional dependencies between the tables in the semi-join
+                nest (the payoff is probably less here?)
+        */
         {
-          JOIN_TAB *tab= join->best_positions[i].table;
-          join->map2table[tab->table->tablenr]= tab;
+          for (uint i=0 ; i < join->const_tables + sjm->tables ; i++)
+          {
+            JOIN_TAB *tab= join->best_positions[i].table;
+            join->map2table[tab->table->tablenr]= tab;
+          }
+          List_iterator<Item> it(right_expr_list);
+          Item *item;
+          table_map map= 0;
+          while ((item= it++))
+            map |= item->used_tables();
+          map= map & ~PSEUDO_TABLE_BITS;
+          Table_map_iterator tm_it(map);
+          int tableno;
+          double rows= 1.0;
+          while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+            rows *= join->map2table[tableno]->table->quick_condition_rows;
+          sjm->rows= min(sjm->rows, rows);
         }
-        List_iterator<Item> it(right_expr_list);
-        Item *item;
-        table_map map= 0;
-        while ((item= it++))
-          map |= item->used_tables();
-        map= map & ~PSEUDO_TABLE_BITS;
-        Table_map_iterator tm_it(map);
-        int tableno;
-        double rows= 1.0;
-        while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
-          rows *= join->map2table[tableno]->table->quick_condition_rows;
-        sjm->rows= min(sjm->rows, rows);
-      }
-      memcpy(sjm->positions, join->best_positions + join->const_tables, 
-             sizeof(POSITION) * n_tables);
+        memcpy(sjm->positions, join->best_positions + join->const_tables, 
+               sizeof(POSITION) * n_tables);
 
-      /*
-        Calculate temporary table parameters and usage costs
-      */
-      uint rowlen= get_tmp_table_rec_length(right_expr_list);
-      double lookup_cost;
-      if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size)
-        lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
-      else
-        lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
+        /*
+          Calculate temporary table parameters and usage costs
+        */
+        uint rowlen= get_tmp_table_rec_length(right_expr_list);
+        double lookup_cost;
+        if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size)
+          lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST;
+        else
+          lookup_cost= DISK_TEMPTABLE_LOOKUP_COST;
 
-      /*
-        Let materialization cost include the cost to write the data into the
-        temporary table:
-      */ 
-      sjm->materialization_cost.add_io(subjoin_out_rows, lookup_cost);
-      
-      /*
-        Set the cost to do a full scan of the temptable (will need this to 
-        consider doing sjm-scan):
-      */ 
-      sjm->scan_cost.zero();
-      sjm->scan_cost.add_io(sjm->rows, lookup_cost);
+        /*
+          Let materialization cost include the cost to write the data into the
+          temporary table:
+        */ 
+        sjm->materialization_cost.add_io(subjoin_out_rows, lookup_cost);
+        
+        /*
+          Set the cost to do a full scan of the temptable (will need this to 
+          consider doing sjm-scan):
+        */ 
+        sjm->scan_cost.zero();
+        sjm->scan_cost.add_io(sjm->rows, lookup_cost);
 
-      sjm->lookup_cost.convert_from_cost(lookup_cost);
-      sj_nest->sj_mat_info= sjm;
-      DBUG_EXECUTE("opt", print_sjm(sjm););
+        sjm->lookup_cost.convert_from_cost(lookup_cost);
+        sj_nest->sj_mat_info= sjm;
+        DBUG_EXECUTE("opt", print_sjm(sjm););
+      }
     }
   }
   join->emb_sjm_nest= NULL;
@@ -5982,7 +5984,7 @@ public:
         !(remaining_tables & 
           s->emb_sj_nest->nested_join->sj_corr_tables) &&               // (4)
         remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5)
-        !test(join->thd->variables.optimizer_switch & OPTIMIZER_SWITCH_NO_LOOSE_SCAN))
+        !optimizer_flag(join->thd, OPTIMIZER_SWITCH_NO_LOOSE_SCAN))
     {
       /* This table is an LooseScan scan candidate */
       bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, 
@@ -8798,7 +8800,7 @@ static bool make_join_select(JOIN *join,
 	SQL_SELECT *sel= tab->select= new (thd->mem_root) SQL_SELECT;
 	if (!sel)
 	  DBUG_RETURN(1);			// End of memory
-        sel->read_tables= sel->const_tables= join->const_table_map; // psergey-new
+        sel->read_tables= sel->const_tables= join->const_table_map;
         /*
           If tab is an inner table of an outer join operation,
           add a match guard to the pushed down predicate.
@@ -9749,8 +9751,20 @@ bool setup_sj_materialization(JOIN_TAB *
     it.rewind();
     for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
     {
-      sjm->copy_field[i].set(((Item_field*)it++)->field,
-                              sjm->table->field[i], FALSE);
+      bool dummy;
+      Item_equal *item_eq;
+      Field *copy_to=((Item_field*)it++)->field; 
+      Item *head;
+      item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy);
+
+      if (!item_eq->const_item && 
+          (head= item_eq->fields.head())->used_tables() &
+          emb_sj_nest->sj_inner_tables)
+      {
+        DBUG_ASSERT(head->type() == Item::FIELD_ITEM);
+        copy_to= ((Item_field*)head)->field;
+      }
+      sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE);
     }
   }
 
@@ -12545,6 +12559,9 @@ void optimize_wo_join_buffering(JOIN *jo
     Most of the new state is saved join->positions[idx] (and hence no undo
     is necessary). Several members of class JOIN are updated also, these
     changes can be rolled back with restore_prev_sj_state().
+
+    See setup_semijoin_dups_elimination() for a description of what kinds of
+    join prefixes each strategy can handle.
 */
 
 static 
@@ -12607,7 +12624,8 @@ void advance_sj_state(JOIN *join, table_
         !join->cur_sj_inner_tables &&   // (2)
         !(remaining_tables &                             // (3)
           (s->emb_sj_nest->nested_join->sj_corr_tables | // (3)
-           s->emb_sj_nest->nested_join->sj_depends_on))) // (3)
+           s->emb_sj_nest->nested_join->sj_depends_on)) && // (3)
+        !optimizer_flag(join->thd, OPTIMIZER_SWITCH_NO_FIRSTMATCH))
     {
       /* Start tracking potential FirstMatch range */
       pos->first_firstmatch_table= idx;
@@ -15659,10 +15677,13 @@ sub_select_sjm(JOIN *join, JOIN_TAB *joi
     return sub_select(join, join_tab, end_of_records);
   }
 
-  if (end_of_records)
-    return (*join_tab->next_select)(join, join_tab + 1, end_of_records);
-
   SJ_MATERIALIZATION_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info;
+  if (end_of_records)
+  {
+    return (*join_tab[sjm->tables - 1].next_select)(join,
+                                                    join_tab + sjm->tables,
+                                                    end_of_records);
+  }
   if (!sjm->materialized)
   {
     /* 
@@ -15710,15 +15731,13 @@ sub_select_sjm(JOIN *join, JOIN_TAB *joi
   {
     /* Do full scan of the materialized table */
     JOIN_TAB *last_tab= join_tab + (sjm->tables - 1);
-    enum_nested_loop_state res;
 
     Item *save_cond= last_tab->select_cond;
     last_tab->select_cond= sjm->join_cond;
        
-    res = sub_select(join, last_tab, end_of_records);
-
+    rc= sub_select(join, last_tab, end_of_records);
     last_tab->select_cond= save_cond;
-    return res;
+    return rc;
   }
   else
   {
@@ -15728,10 +15747,9 @@ sub_select_sjm(JOIN *join, JOIN_TAB *joi
     if (res || !sjm->in_equality->val_int())
       return NESTED_LOOP_NO_MORE_ROWS;
   }
-
   return (*join_tab[sjm->tables - 1].next_select)(join,
-                                                    join_tab + sjm->tables,
-                                                    FALSE);
+                                                  join_tab + sjm->tables,
+                                                  end_of_records);
 }
 
 
@@ -17474,8 +17492,8 @@ static bool test_if_ref(Item_field *left
 
    @note Because of current requirements for semijoin flattening, we do not
    need to recurse here, hence this function will only examine the top-level
-   AND conditions. (see JOIN::prepare, comment above the line 
-   'if (do_materialize)'
+   AND conditions. (see JOIN::prepare, comment starting with "Check if the 
+   subquery predicate can be executed via materialization".
    
    @param join The top-level query.
    @param old_cond The expression to be replaced.
@@ -21719,7 +21737,7 @@ void select_describe(JOIN *join, bool ne
             extra.append(STRING_WITH_LEN("; Materialize"));
           else
           {
-            last_sjm_table= i + join->best_positions[i].n_sj_tables;
+            last_sjm_table= i + join->best_positions[i].n_sj_tables - 1;
             extra.append(STRING_WITH_LEN("; Start materialize"));
           }
           if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)

Thread
bzr commit into mysql-6.0-opt-subqueries branch (sergefp:2720)Sergey Petrunia16 Nov