List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:August 14 2008 7:19pm
Subject:bzr push into mysql-6.0-opt-subqueries branch (sergefp:2686 to 2687) WL#3985
View as plain text  
 2687 Sergey Petrunia	2008-08-14
      WL#3985: Subquery optimization: smart choice between semi-join and materialization
      - Better calculations for materialized table's cardinality.
      - Fix a number of problems in semi-join cost calculations
modified:
  mysql-test/r/group_by.result
  mysql-test/r/subselect.result
  mysql-test/r/subselect3.result
  mysql-test/r/subselect_no_mat.result
  mysql-test/r/subselect_no_opts.result
  mysql-test/r/subselect_no_semijoin.result
  mysql-test/r/subselect_sj2.result
  sql/sql_class.h
  sql/sql_select.cc

 2686 Sergey Petrunia	2008-07-28
      Remove deadcode - old version of setup_semijoin_dups_elimination()
modified:
  sql/sql_select.cc

=== modified file 'mysql-test/r/group_by.result'
--- a/mysql-test/r/group_by.result	2008-07-26 19:37:13 +0000
+++ b/mysql-test/r/group_by.result	2008-08-14 19:00:08 +0000
@@ -1513,8 +1513,8 @@ id	select_type	table	type	possible_keys	
 EXPLAIN SELECT 1 FROM t1 WHERE a IN
 (SELECT a FROM t1 USE INDEX (i2) IGNORE INDEX (i2));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	144	Materialize; Scan
-1	PRIMARY	t1	eq_ref	PRIMARY,i2	PRIMARY	4	test.t1.a	1	Using index
+1	PRIMARY	t1	index	PRIMARY,i2	PRIMARY	4	NULL	144	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	144	Using where; FirstMatch(t1)
 CREATE TABLE t2 (a INT, b INT, KEY(a));
 INSERT INTO t2 VALUES (1, 1), (2, 2), (3,3), (4,4);
 EXPLAIN SELECT a, SUM(b) FROM t2 GROUP BY a LIMIT 2;

=== modified file 'mysql-test/r/subselect.result'
--- a/mysql-test/r/subselect.result	2008-07-28 15:44:17 +0000
+++ b/mysql-test/r/subselect.result	2008-08-14 19:00:08 +0000
@@ -1356,9 +1356,9 @@ a
 3
 explain extended select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t2	index	a	a	5	NULL	4	100.00	Using index; Start temporary
-1	PRIMARY	t3	index	a	a	5	NULL	3	100.00	Using index; Using join buffer
-1	PRIMARY	t1	ref	a	a	10	test.t2.a,test.t3.a	116	100.00	Using index; End temporary
+1	PRIMARY	t2	index	a	a	5	NULL	4	100.00	Using index
+1	PRIMARY	t3	index	a	a	5	NULL	3	100.00	Using index
+1	PRIMARY	t1	ref	a	a	10	test.t2.a,test.t3.a	116	100.00	Using index; FirstMatch(t2)
 Warnings:
 Note	1003	select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1` join `test`.`t3`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` = `test`.`t3`.`a`))
 insert into t1 values (3,31);
@@ -4215,8 +4215,8 @@ CREATE INDEX I1 ON t1 (a);
 CREATE INDEX I2 ON t1 (b);
 EXPLAIN SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 a	b
 CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10));
@@ -4225,15 +4225,15 @@ CREATE INDEX I1 ON t2 (a);
 CREATE INDEX I2 ON t2 (b);
 EXPLAIN SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; LooseScan
+1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 a	b
 EXPLAIN
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 a	b
 DROP TABLE t1,t2;

=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result	2008-07-26 19:37:13 +0000
+++ b/mysql-test/r/subselect3.result	2008-08-14 19:00:08 +0000
@@ -99,7 +99,7 @@ oref	a
 1	1
 show status like '%Handler_read_rnd_next';
 Variable_name	Value
-Handler_read_rnd_next	24
+Handler_read_rnd_next	5
 delete from t2;
 insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);
 flush status;

=== modified file 'mysql-test/r/subselect_no_mat.result'
--- a/mysql-test/r/subselect_no_mat.result	2008-07-28 15:44:17 +0000
+++ b/mysql-test/r/subselect_no_mat.result	2008-08-14 19:00:08 +0000
@@ -1360,9 +1360,9 @@ a
 3
 explain extended select * from t2 where t2.a in (select t1.a from t1,t3 where t1.b=t3.a);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t2	index	a	a	5	NULL	4	100.00	Using index; Start temporary
-1	PRIMARY	t3	index	a	a	5	NULL	3	100.00	Using index; Using join buffer
-1	PRIMARY	t1	ref	a	a	10	test.t2.a,test.t3.a	116	100.00	Using index; End temporary
+1	PRIMARY	t2	index	a	a	5	NULL	4	100.00	Using index
+1	PRIMARY	t3	index	a	a	5	NULL	3	100.00	Using index
+1	PRIMARY	t1	ref	a	a	10	test.t2.a,test.t3.a	116	100.00	Using index; FirstMatch(t2)
 Warnings:
 Note	1003	select `test`.`t2`.`a` AS `a` from `test`.`t2` semi join (`test`.`t1` join `test`.`t3`) where ((`test`.`t1`.`a` = `test`.`t2`.`a`) and (`test`.`t1`.`b` = `test`.`t3`.`a`))
 insert into t1 values (3,31);
@@ -4219,8 +4219,8 @@ CREATE INDEX I1 ON t1 (a);
 CREATE INDEX I2 ON t1 (b);
 EXPLAIN SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 a	b
 CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10));
@@ -4229,15 +4229,15 @@ CREATE INDEX I1 ON t2 (a);
 CREATE INDEX I2 ON t2 (b);
 EXPLAIN SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; LooseScan
+1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 a	b
 EXPLAIN
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 a	b
 DROP TABLE t1,t2;

=== modified file 'mysql-test/r/subselect_no_opts.result'
--- a/mysql-test/r/subselect_no_opts.result	2008-07-28 15:44:17 +0000
+++ b/mysql-test/r/subselect_no_opts.result	2008-08-14 19:00:08 +0000
@@ -4219,8 +4219,8 @@ CREATE INDEX I1 ON t1 (a);
 CREATE INDEX I2 ON t1 (b);
 EXPLAIN SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 a	b
 CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10));
@@ -4229,15 +4229,15 @@ CREATE INDEX I1 ON t2 (a);
 CREATE INDEX I2 ON t2 (b);
 EXPLAIN SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; LooseScan
+1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 a	b
 EXPLAIN
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 a	b
 DROP TABLE t1,t2;

=== modified file 'mysql-test/r/subselect_no_semijoin.result'
--- a/mysql-test/r/subselect_no_semijoin.result	2008-07-28 15:44:17 +0000
+++ b/mysql-test/r/subselect_no_semijoin.result	2008-08-14 19:00:08 +0000
@@ -4219,8 +4219,8 @@ CREATE INDEX I1 ON t1 (a);
 CREATE INDEX I2 ON t1 (b);
 EXPLAIN SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1);
 a	b
 CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10));
@@ -4229,15 +4229,15 @@ CREATE INDEX I1 ON t2 (a);
 CREATE INDEX I2 ON t2 (b);
 EXPLAIN SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; Materialize; Scan
-1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t2	index	I1	I1	4	NULL	2	Using index; LooseScan
+1	PRIMARY	t2	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2);
 a	b
 EXPLAIN
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; Materialize; Scan
-1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where; Using join buffer
+1	PRIMARY	t1	index	I1	I1	2	NULL	2	Using where; Using index; LooseScan
+1	PRIMARY	t1	ALL	I2	NULL	NULL	NULL	2	Using where
 SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500);
 a	b
 DROP TABLE t1,t2;

=== modified file 'mysql-test/r/subselect_sj2.result'
--- a/mysql-test/r/subselect_sj2.result	2008-07-28 10:05:21 +0000
+++ b/mysql-test/r/subselect_sj2.result	2008-08-14 19:00:08 +0000
@@ -50,8 +50,8 @@ primary key(pk1, pk2, pk3)
 insert into t3 select a,a, a,a,a from t0;
 explain select * from t3 where b in (select a from t1);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	3	Materialize; Scan
-1	PRIMARY	t3	ref	b	b	5	test.t1.a	1	
+1	PRIMARY	t3	ALL	b	NULL	NULL	NULL	10	
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	3	Using where; FirstMatch(t3)
 select * from t3 where b in (select a from t1);
 a	b	pk1	pk2	pk3
 1	1	1	1	1
@@ -304,9 +304,9 @@ t2.Code IN (SELECT Country FROM t3 
 WHERE Language='English' AND Percentage > 10 AND
 t2.Population > 100000);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	t1	range	Population,Country	Population	4	NULL	1	Using index condition; Using MRR; Materialize; Scan
-1	PRIMARY	t2	eq_ref	PRIMARY,Population	PRIMARY	3	test.t1.Country	1	Using where
-1	PRIMARY	t3	eq_ref	PRIMARY,Percentage	PRIMARY	33	test.t2.Code,const	1	Using index condition; Using where
+1	PRIMARY	t1	range	Population,Country	Population	4	NULL	1	Using index condition; Using MRR
+1	PRIMARY	t3	eq_ref	PRIMARY,Percentage	PRIMARY	33	test.t1.Country,const	1	Using index condition; Using where
+1	PRIMARY	t2	eq_ref	PRIMARY,Population	PRIMARY	3	test.t3.Country	1	Using index condition; Using where
 DROP TABLE t1,t2,t3;
 CREATE TABLE t1 (
 Code char(3) NOT NULL DEFAULT '',

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2008-07-28 15:44:17 +0000
+++ b/sql/sql_class.h	2008-08-14 19:00:08 +0000
@@ -2821,7 +2821,9 @@ public:
   /* Cost to make one lookup in the temptable */
   COST_VECT lookup_cost;
   
+  /* Cost fo scanning the materialization table */
   COST_VECT scan_cost;
+
   /* Execution structures */
   TMP_TABLE_PARAM sjm_table_param;
   List<Item> sjm_table_cols;

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2008-07-28 15:48:11 +0000
+++ b/sql/sql_select.cc	2008-08-14 19:00:08 +0000
@@ -4354,6 +4354,41 @@ make_join_statistics(JOIN *join, TABLE_L
 
         sjm->materialization_cost.set_double(subjoin_read_time);
         sjm->rows= subjoin_out_rows;
+
+        List<Item> &right_expr_list= 
+          sj_nest->sj_subq_pred->unit->first_select()->item_list;
+        /*
+          Adjust output cardinality estimates. If the subquery has form
+
+           ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) )
+
+           then the number of distinct output record combinations has an
+           upper bound of product of number of records matching the tables 
+           that are used by the SELECT clause.
+           (TODO: functional dependencies may make this number even less)
+        */
+        {
+          //for (i=0 ; i < join->tables ; i++)
+          //  join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i;
+          for (i=0 ; i < join->const_tables + sjm->n_tables ; i++)
+          {
+            JOIN_TAB *tab= join->best_positions[i].table;
+            join->map2table[tab->table->tablenr]= tab;
+          }
+          List_iterator<Item> it(right_expr_list);
+          Item *item;
+          table_map map= 0;
+          while ((item= it++))
+            map |= item->used_tables();
+          map= map & ~PSEUDO_TABLE_BITS;
+          Table_map_iterator tm_it(map);
+          int tableno;
+          double rows= 1.0;
+          while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+            rows *= join->map2table[tableno]->table->quick_condition_rows;
+          sjm->rows= min(sjm->rows, rows);
+        }
+
         memcpy(sjm->positions, join->best_positions + join->const_tables, 
                sizeof(POSITION) * n_tables);
 
@@ -4365,22 +4400,21 @@ make_join_statistics(JOIN *join, TABLE_L
         /*
           Calculate temporary table parameters
         */
-        uint rowlen= get_tmp_table_rec_length(sj_nest->sj_subq_pred->unit->
-                                              first_select()->item_list);
+        uint rowlen= get_tmp_table_rec_length(right_expr_list);
         double lookup_cost;
         if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size)
         {
-          sjm->materialization_cost.add_io(0.05, subjoin_out_rows);
+          sjm->materialization_cost.add_io(subjoin_out_rows, 0.05);
           sjm->scan_cost.zero();
-          sjm->scan_cost.add_io(0.05, subjoin_out_rows);
+          sjm->scan_cost.add_io(sjm->rows, 0.05);
           lookup_cost= 0.05;
         }
         else
         {
-          sjm->materialization_cost.add_io(1.0, subjoin_out_rows);
+          sjm->materialization_cost.add_io(subjoin_out_rows, 1.0);
           lookup_cost= 1;
           sjm->scan_cost.zero();
-          sjm->scan_cost.add_io(1.0, subjoin_out_rows);
+          sjm->scan_cost.add_io(sjm->rows, 1.0);
         }
         sjm->lookup_cost.set_double(lookup_cost);
         sj_nest->sj_mat_info= sjm;
@@ -5790,7 +5824,7 @@ public:
     }
   }
 
-  void save_to_position(POSITION *pos)
+  void save_to_position(JOIN_TAB *tab, POSITION *pos)
   {
     pos->read_time=       best_loose_scan_cost;
     if (best_loose_scan_cost != DBL_MAX)
@@ -5799,6 +5833,10 @@ public:
       pos->key=             best_loose_scan_start_key;
       pos->loosescan_key=   best_loose_scan_key;
       pos->loosescan_parts= best_max_loose_keypart + 1;
+      pos->use_sj_mat=      0;
+      pos->use_join_buffer= FALSE;
+      pos->table=           tab;
+      // todo need ref_depend_map ?
     }
   }
 };
@@ -5821,15 +5859,12 @@ public:
   @param thd              thread for the connection that submitted the query
   @param remaining_tables set of tables not included into the partial plan yet
   @param idx              the length of the partial plan
+  @param disable_jbuf     TRUE<=> Don't use join buffering
   @param record_count     estimate for the number of records returned by the
                           partial plan
-  @param read_time        the cost of the partial plan
- 
- TODO: do we need to add this:
-  @param loose_scan_keys  if non-NULL but points to (key_map) - check if we
-                          can use loose scan
-                          IN   TRUE <=> can consider loosescan for semi-join
-                          OUT  TRUE <=> Picked loose scan.
+  @param pos              OUT Table access plan
+  @param loose_scan_pos   OUT Table plan that uses loosescan, or set cost to 
+                              DBL_MAX if not possible.
 
   @return
     None
@@ -6367,7 +6402,7 @@ best_access_path(JOIN      *join,
   pos->use_sj_mat= 0;
   pos->use_join_buffer= best_uses_jbuf;
    
-  loose_scan_opt.save_to_position(loose_scan_pos);
+  loose_scan_opt.save_to_position(s, loose_scan_pos);
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -6488,7 +6523,7 @@ choose_plan(JOIN *join, table_map join_t
     if (search_depth == MAX_TABLES+2)
     { /*
         TODO: 'MAX_TABLES+2' denotes the old implementation of find_best before
-        the greedy version. Will be removed when greedy_search is approved.
+        the greedy version. Will be removed when greedy_search is apppostqueueroved.
       */
       join->best_read= DBL_MAX;
       if (find_best(join, join_tables, join->const_tables, 1.0, 0.0))
@@ -6972,10 +7007,10 @@ void get_partial_join_cost(JOIN *join, u
   double read_time= 0.0;
   for (uint i= join->const_tables; i < n_tables + join->const_tables ; i++)
   {
-    if (join->positions[i].records_read)
+    if (join->best_positions[i].records_read) //!!psergey psergey fix AA BB
     {
-      record_count *= join->positions[i].records_read;
-      read_time += join->positions[i].read_time;
+      record_count *= join->best_positions[i].records_read;
+      read_time += join->best_positions[i].read_time;
     }
   }
   *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
@@ -7511,7 +7546,7 @@ get_best_combination(JOIN *join, table_m
   /*
     Prepare semi-join processing info for plan refimenent stage:
   */
-  table_map remaining_tables= join_tables;
+  table_map remaining_tables= 0;//join_tables;
   table_map handled_tabs= 0;
   for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--)
   {
@@ -7521,7 +7556,10 @@ get_best_combination(JOIN *join, table_m
     LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which doesn't use it
 
     if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE)
+    {
+      remaining_tables |= s->table->map;
       continue;
+    }
     
     if (pos->sj_strategy == SJ_OPT_MATERIALIZE)
     {
@@ -7546,6 +7584,33 @@ get_best_combination(JOIN *join, table_m
       join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
       join->best_positions[first].use_sj_mat |= SJ_MAT_SCAN;
       join->best_positions[first].n_sj_tables= sjm->n_tables;
+      /* 
+        Do what advance_sj_state did: re-run best_access_path for every table
+        in the [last_inner_table + 1; pos..) range
+      */
+      double prefix_rec_count;
+      /* Get the prefix record count */
+      if (first == join->const_tables)
+        prefix_rec_count= 1.0;
+      else
+        prefix_rec_count= join->best_positions[first-1].prefix_record_count;
+      
+      /* Add materialization record count*/
+      prefix_rec_count *= sjm->rows;
+      
+      uint i;
+      table_map rem_tables= remaining_tables;
+      for (i= tablenr; i != (first + sjm->n_tables - 1); i--)
+        rem_tables |= join->best_positions[i].table->table->map;
+
+      POSITION curpos, dummy;
+      for (i= first + sjm->n_tables; i <= tablenr; i++)
+      {
+        best_access_path(join, join->best_positions[i].table, rem_tables, i, FALSE,
+                         prefix_rec_count, join->best_positions + i, &dummy);
+        prefix_rec_count *= curpos.records_read;
+        rem_tables &= ~join->best_positions[i].table->table->map;
+      }
     }
  
     if (pos->sj_strategy == SJ_OPT_FIRST_MATCH)
@@ -7556,19 +7621,25 @@ get_best_combination(JOIN *join, table_m
       POSITION dummy; // For loose scan paths
       double record_count= (first== join->const_tables)? 1.0: 
                            join->best_positions[tablenr - 1].prefix_record_count;
+      
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+        rem_tables |= join->best_positions[idx].table->table->map;
       /*
         Re-run best_access_path to produce best access methods that do not use
         join buffering
       */ 
-      for (uint idx= first; idx <= tablenr; idx++)
+      for (idx= first; idx <= tablenr; idx++)
       {
         if (join->best_positions[idx].use_join_buffer)
         {
            best_access_path(join, join->best_positions[idx].table, 
-                            remaining_tables, idx, TRUE /* no jbuf */,
+                            rem_tables, idx, TRUE /* no jbuf */,
                             record_count, join->best_positions + idx, &dummy);
         }
         record_count *= join->best_positions[idx].records_read;
+        rem_tables &= ~join->best_positions[idx].table->table->map;
       }
     }
 
@@ -7576,29 +7647,34 @@ get_best_combination(JOIN *join, table_m
     {
       first= pos->first_loosescan_table;
       POSITION *first_pos= join->best_positions + first;
-      first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
-      first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
       POSITION loose_scan_pos; // For loose scan paths
       double record_count= (first== join->const_tables)? 1.0: 
                            join->best_positions[tablenr - 1].prefix_record_count;
       
+      table_map rem_tables= remaining_tables;
+      uint idx;
+      for (idx= first; idx <= tablenr; idx++)
+        rem_tables |= join->best_positions[idx].table->table->map;
       /*
         Re-run best_access_path to produce best access methods that do not use
         join buffering
       */ 
-      for (uint idx= first; idx <= tablenr; idx++)
+      for (idx= first; idx <= tablenr; idx++)
       {
         if (join->best_positions[idx].use_join_buffer || (idx == first))
         {
            best_access_path(join, join->best_positions[idx].table,
-                            remaining_tables, idx, TRUE /* no jbuf */,
+                            rem_tables, idx, TRUE /* no jbuf */,
                             record_count, join->best_positions + idx,
                             &loose_scan_pos);
            if (idx==first)
              join->best_positions[idx]= loose_scan_pos;
         }
+        rem_tables &= ~join->best_positions[idx].table->table->map;
         record_count *= join->best_positions[idx].records_read;
       }
+      first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
+      first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
     }
 
     if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT)
@@ -7616,6 +7692,7 @@ get_best_combination(JOIN *join, table_m
     for (uint i= first; i < i_end; i++)
       handled_tabs |= join->best_positions[i].table->table->map;
 
+    remaining_tables |= s->table->map;
   }
 
   for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
@@ -7641,7 +7718,9 @@ get_best_combination(JOIN *join, table_m
     if (j->type == JT_SYSTEM)
       continue;
     
-    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key))
+    if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key) || 
+        (join->best_positions[0].sj_strategy == SJ_OPT_LOOSE_SCAN /* &&
+        psergey-todo TODO*/))
     {
       j->type=JT_ALL;
       j->index= join->best_positions[tablenr].loosescan_key;
@@ -12230,29 +12309,45 @@ void advance_sj_state(JOIN *join, table_
       join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest;
     SJ_MATERIALIZE_INFO *mat_info= mat_nest->sj_mat_info;
 
-    COST_VECT prefix_cost;
+    double prefix_cost;
     double prefix_rec_count;
-    int first_tab;
-    if ((first_tab= pos->sjm_scan_last_inner - mat_info->n_tables) + 1 != 
-        (int)join->const_tables)
+    int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->n_tables;
+    /* Get the prefix cost */
+    if (first_tab == (int)join->const_tables)
     {
-      prefix_cost= join->positions[first_tab].prefix_cost;
-      prefix_rec_count= join->positions[first_tab].prefix_record_count;
+      prefix_rec_count= 1.0;
+      prefix_cost= 0.0;
     }
     else
     {
-      prefix_rec_count= 1.0;
-      prefix_cost.zero();
+      prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost();
+      prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+    }
+
+    /* Add materialization cost */
+    prefix_cost += mat_info->materialization_cost.total_cost() +
+                   prefix_rec_count * mat_info->scan_cost.total_cost();
+    prefix_rec_count *= mat_info->rows;
+    
+    uint i;
+    table_map rem_tables= remaining_tables;
+    for (i= idx; i != (first_tab + mat_info->n_tables - 1); i--)
+      rem_tables |= join->positions[i].table->table->map;
+
+    POSITION curpos, dummy;
+    /* Need to re-run best-access-path as we prefix_rec_count has changed */
+    for (i= first_tab + mat_info->n_tables; i <= idx; i++)
+    {
+      best_access_path(join, join->positions[i].table, rem_tables, i, FALSE,
+                       prefix_rec_count, &curpos, &dummy);
+      prefix_rec_count *= curpos.records_read;
+      prefix_cost += curpos.read_time;
     }
 
-    double mat_read_time= prefix_cost.total_cost();
-    mat_read_time += mat_info->materialization_cost.total_cost() +
-                     prefix_rec_count * mat_info->scan_cost.total_cost();
-    //psergey-todo: add costs for tables that are after the inner tables!
-    if (mat_read_time < *current_read_time)
+    if (prefix_cost < *current_read_time || join->cur_unhandled_sj_fanout)
     {
       pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
-      *current_read_time=    mat_read_time;
+      *current_read_time=    prefix_cost;
       *current_record_count= prefix_rec_count;
       join->cur_unhandled_sj_fanout&= ~mat_nest->sj_inner_tables;
 
@@ -12341,7 +12436,9 @@ void advance_sj_state(JOIN *join, table_
 
       double write_cost= join->positions[first_tab].prefix_record_count* 
                          sj_outer_fanout * (is_disk_table? 1.0: 0.05);
-      double lookup_cost= *current_record_count * (is_disk_table? 1.0:0.05);
+      double lookup_cost= join->positions[first_tab].prefix_record_count* 
+                         sj_outer_fanout* sj_inner_fanout * 
+                         (is_disk_table? 1.0:0.05);
       dups_cost += write_cost + lookup_cost;
       
       /*

Thread
bzr push into mysql-6.0-opt-subqueries branch (sergefp:2686 to 2687) WL#3985Sergey Petrunia14 Aug