Below is the list of changes that have just been committed into a local
6.0 repository of sergefp. When sergefp does a push these changes
will be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2008-06-13 19:59:40+04:00, sergefp@stripped +16 -0
WL#3980 subquery code test push
mysql-test/r/subselect.result@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +2 -2
WL#3980 subquery code test push
mysql-test/r/subselect_mat.result@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +2 -2
WL#3980 subquery code test push
mysql-test/r/subselect_no_mat.result@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +2 -2
WL#3980 subquery code test push
mysql-test/r/subselect_no_opts.result@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +2 -2
WL#3980 subquery code test push
mysql-test/r/subselect_no_semijoin.result@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +2 -2
WL#3980 subquery code test push
mysql-test/r/subselect_sj2.result@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +6 -6
WL#3980 subquery code test push
sql/handler.h@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +11 -0
WL#3980 subquery code test push
sql/item_cmpfunc.h@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +2 -0
WL#3980 subquery code test push
sql/item_subselect.h@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +1 -0
WL#3980 subquery code test push
sql/mysqld.cc@stripped, 2008-06-13 19:59:27+04:00, sergefp@stripped +1 -1
WL#3980 subquery code test push
sql/sql_class.h@stripped, 2008-06-13 19:59:28+04:00, sergefp@stripped +35 -0
WL#3980 subquery code test push
sql/sql_parse.cc@stripped, 2008-06-13 19:59:28+04:00, sergefp@stripped +1 -0
WL#3980 subquery code test push
sql/sql_select.cc@stripped, 2008-06-13 19:59:28+04:00, sergefp@stripped +957 -58
WL#3980 subquery code test push
sql/sql_select.h@stripped, 2008-06-13 19:59:28+04:00, sergefp@stripped +30 -9
WL#3980 subquery code test push
sql/table.h@stripped, 2008-06-13 19:59:28+04:00, sergefp@stripped +12 -1
WL#3980 subquery code test push
support-files/build-tags@stripped, 2008-06-13 19:59:28+04:00, sergefp@stripped +2 -1
WL#3980 subquery code test push
diff -Nrup a/mysql-test/r/subselect.result b/mysql-test/r/subselect.result
--- a/mysql-test/r/subselect.result 2008-06-13 18:11:28 +04:00
+++ b/mysql-test/r/subselect.result 2008-06-13 19:59:27 +04:00
@@ -2818,8 +2818,8 @@ Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = '0') and trigcond(((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`))) and trigcond(((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)))) having (trigcond(<is_not_null_test>(`test`.`t2`.`one`)) and trigcond(<is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1`
explain extended SELECT one,two from t1 where ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = 'N');
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00 Start temporary
-1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; End temporary; Using join buffer
+1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00
+1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; Materialize
Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t2`.`two` = `test`.`t1`.`two`) and (`test`.`t2`.`one` = `test`.`t1`.`one`) and (`test`.`t2`.`flag` = 'N'))
explain extended SELECT one,two,ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = '0' group by one,two) as 'test' from t1;
diff -Nrup a/mysql-test/r/subselect_mat.result b/mysql-test/r/subselect_mat.result
--- a/mysql-test/r/subselect_mat.result 2008-04-25 03:59:32 +04:00
+++ b/mysql-test/r/subselect_mat.result 2008-06-13 19:59:27 +04:00
@@ -657,12 +657,12 @@ where t2.b2 = substring(t2_16.b2,1,6) an
t2.b1 IN (select c1 from t3 where c2 > '0')));
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3 100.00 Using where
-2 SUBQUERY t1_16 ALL NULL NULL NULL NULL 3 100.00 Using where
+2 DEPENDENT SUBQUERY t1_16 ALL NULL NULL NULL NULL 3 100.00 Using where
3 DEPENDENT SUBQUERY t2_16 ALL NULL NULL NULL NULL 3 100.00 Using where
3 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 5 100.00 Using where; Using join buffer
4 SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where
Warnings:
-Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` where <in_optimizer>(concat(`test`.`t1`.`a1`,'x'),concat(`test`.`t1`.`a1`,'x') in ( <materialize> (select left(`test`.`t1_16`.`a1`,8) AS `left(a1,8)` from `test`.`t1_16` where <in_optimizer>((`test`.`t1_16`.`a1`,`test`.`t1_16`.`a2`),(`test`.`t1_16`.`a1`,`test`.`t1_16`.`a2`) in (select `test`.`t2_16`.`b1` AS `b1`,`test`.`t2_16`.`b2` AS `b2` from `test`.`t2_16` join `test`.`t2` where ((`test`.`t2`.`b2` = substr(`test`.`t2_16`.`b2`,1,6)) and <in_optimizer>(`test`.`t2`.`b1`,`test`.`t2`.`b1` in ( <materialize> (select `test`.`t3`.`c1` AS `c1` from `test`.`t3` where (`test`.`t3`.`c2` > '0') ), <primary_index_lookup>(`test`.`t2`.`b1` in <temporary table> on distinct_key))) and (<cache>(`test`.`t1_16`.`a1`) = `test`.`t2_16`.`b1`) and (<cache>(`test`.`t1_16`.`a2`) = `test`.`t2_16`.`b2`)))) ), <primary_index_lookup>(concat(`test`.`t1`.`a1`,'x') in <temporary table> on distinct_key)))
+Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` where <in_optimizer>(concat(`test`.`t1`.`a1`,'x'),<exists>(select 1 AS `Not_used` from `test`.`t1_16` where (<in_optimizer>((`test`.`t1_16`.`a1`,`test`.`t1_16`.`a2`),(`test`.`t1_16`.`a1`,`test`.`t1_16`.`a2`) in (select `test`.`t2_16`.`b1` AS `b1`,`test`.`t2_16`.`b2` AS `b2` from `test`.`t2_16` join `test`.`t2` where ((`test`.`t2`.`b2` = substr(`test`.`t2_16`.`b2`,1,6)) and <in_optimizer>(`test`.`t2`.`b1`,`test`.`t2`.`b1` in ( <materialize> (select `test`.`t3`.`c1` AS `c1` from `test`.`t3` where (`test`.`t3`.`c2` > '0') ), <primary_index_lookup>(`test`.`t2`.`b1` in <temporary table> on distinct_key))) and (<cache>(`test`.`t1_16`.`a1`) = `test`.`t2_16`.`b1`) and (<cache>(`test`.`t1_16`.`a2`) = `test`.`t2_16`.`b2`)))) and (<cache>(concat(`test`.`t1`.`a1`,'x')) = left(`test`.`t1_16`.`a1`,8)))))
drop table t1_16, t2_16, t3_16;
set @blob_len = 512;
set @suffix_len = @blob_len - @prefix_len;
diff -Nrup a/mysql-test/r/subselect_no_mat.result b/mysql-test/r/subselect_no_mat.result
--- a/mysql-test/r/subselect_no_mat.result 2008-06-13 18:11:28 +04:00
+++ b/mysql-test/r/subselect_no_mat.result 2008-06-13 19:59:27 +04:00
@@ -2822,8 +2822,8 @@ Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = '0') and trigcond(((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`))) and trigcond(((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)))) having (trigcond(<is_not_null_test>(`test`.`t2`.`one`)) and trigcond(<is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1`
explain extended SELECT one,two from t1 where ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = 'N');
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00 Start temporary
-1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; End temporary; Using join buffer
+1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00
+1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; Materialize
Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t2`.`two` = `test`.`t1`.`two`) and (`test`.`t2`.`one` = `test`.`t1`.`one`) and (`test`.`t2`.`flag` = 'N'))
explain extended SELECT one,two,ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = '0' group by one,two) as 'test' from t1;
diff -Nrup a/mysql-test/r/subselect_no_opts.result b/mysql-test/r/subselect_no_opts.result
--- a/mysql-test/r/subselect_no_opts.result 2008-06-13 18:11:28 +04:00
+++ b/mysql-test/r/subselect_no_opts.result 2008-06-13 19:59:27 +04:00
@@ -2822,8 +2822,8 @@ Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = '0') and trigcond(((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`))) and trigcond(((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)))) having (trigcond(<is_not_null_test>(`test`.`t2`.`one`)) and trigcond(<is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1`
explain extended SELECT one,two from t1 where ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = 'N');
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00 Start temporary
-1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; End temporary; Using join buffer
+1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00
+1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; Materialize
Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t2`.`two` = `test`.`t1`.`two`) and (`test`.`t2`.`one` = `test`.`t1`.`one`) and (`test`.`t2`.`flag` = 'N'))
explain extended SELECT one,two,ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = '0' group by one,two) as 'test' from t1;
diff -Nrup a/mysql-test/r/subselect_no_semijoin.result b/mysql-test/r/subselect_no_semijoin.result
--- a/mysql-test/r/subselect_no_semijoin.result 2008-06-13 18:11:28 +04:00
+++ b/mysql-test/r/subselect_no_semijoin.result 2008-06-13 19:59:27 +04:00
@@ -2822,8 +2822,8 @@ Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = '0') and trigcond(((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`))) and trigcond(((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)))) having (trigcond(<is_not_null_test>(`test`.`t2`.`one`)) and trigcond(<is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1`
explain extended SELECT one,two from t1 where ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = 'N');
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00 Start temporary
-1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; End temporary; Using join buffer
+1 PRIMARY t1 ALL NULL NULL NULL NULL 8 100.00
+1 PRIMARY t2 ALL NULL NULL NULL NULL 9 100.00 Using where; Materialize
Warnings:
Note 1003 select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t2`.`two` = `test`.`t1`.`two`) and (`test`.`t2`.`one` = `test`.`t1`.`one`) and (`test`.`t2`.`flag` = 'N'))
explain extended SELECT one,two,ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = '0' group by one,two) as 'test' from t1;
diff -Nrup a/mysql-test/r/subselect_sj2.result b/mysql-test/r/subselect_sj2.result
--- a/mysql-test/r/subselect_sj2.result 2008-06-13 18:11:29 +04:00
+++ b/mysql-test/r/subselect_sj2.result 2008-06-13 19:59:27 +04:00
@@ -159,8 +159,8 @@ explain select
a, mid(filler1, 1,10), length(filler1)=length(filler2)
from t2 ot where a in (select a from t1 it);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY ot ALL NULL NULL NULL NULL 22 Start temporary
-1 PRIMARY it ALL NULL NULL NULL NULL 32 Using where; End temporary; Using join buffer
+1 PRIMARY ot ALL NULL NULL NULL NULL 22
+1 PRIMARY it ALL NULL NULL NULL NULL 32 Materialize
select
a, mid(filler1, 1,10), length(filler1)=length(filler2)
from t2 ot where a in (select a from t1 it);
@@ -184,8 +184,8 @@ a mid(filler1, 1,10) length(filler1)=len
16 filler1234 1
17 filler1234 1
18 filler1234 1
-3 duplicate 1
19 filler1234 1
+3 duplicate 1
19 duplicate 1
insert into t1 select a+20, 'filler123456', 'filler123456' from t0;
insert into t1 select a+20, 'filler123456', 'filler123456' from t0;
@@ -225,8 +225,8 @@ explain select
a, mid(filler1, 1,10), length(filler1)=length(filler2)
from t2 ot where a in (select a from t1 it);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY ot ALL NULL NULL NULL NULL 22 Start temporary
-1 PRIMARY it ALL NULL NULL NULL NULL 52 Using where; End temporary; Using join buffer
+1 PRIMARY ot ALL NULL NULL NULL NULL 22
+1 PRIMARY it ALL NULL NULL NULL NULL 52 Materialize
select
a, mid(filler1, 1,10), length(filler1)=length(filler2)
from t2 ot where a in (select a from t1 it);
@@ -250,8 +250,8 @@ a mid(filler1, 1,10) length(filler1)=len
16 filler1234 1
17 filler1234 1
18 filler1234 1
-3 duplicate 1
19 filler1234 1
+3 duplicate 1
19 duplicate 1
drop table t1, t2;
create table t1 (a int, b int, key(a));
diff -Nrup a/sql/handler.h b/sql/handler.h
--- a/sql/handler.h 2008-06-13 18:11:29 +04:00
+++ b/sql/handler.h 2008-06-13 19:59:27 +04:00
@@ -1165,6 +1165,17 @@ public:
add_io_cnt * add_avg_cost) / io_count_sum;
io_count= io_count_sum;
}
+
+ /*
+ To be used when we go from old single value-based cost calculations to
+ the new COST_VECT-based.
+ */
+ void set_double(double cost)
+ {
+ zero();
+ avg_io_cost= 1.0;
+ io_count= cost;
+ }
};
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
diff -Nrup a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
--- a/sql/item_cmpfunc.h 2008-03-05 13:19:44 +03:00
+++ b/sql/item_cmpfunc.h 2008-06-13 19:59:27 +04:00
@@ -1598,6 +1598,8 @@ public:
virtual void print(String *str, enum_query_type query_type);
CHARSET_INFO *compare_collation()
{ return fields.head()->collation.collation; }
+ friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+ Item_equal *item_equal);
};
class COND_EQUAL: public Sql_alloc
diff -Nrup a/sql/item_subselect.h b/sql/item_subselect.h
--- a/sql/item_subselect.h 2008-05-01 07:53:30 +04:00
+++ b/sql/item_subselect.h 2008-06-13 19:59:27 +04:00
@@ -303,6 +303,7 @@ public:
- (TABLE_LIST*)1 if the predicate is in the WHERE.
*/
TABLE_LIST *expr_join_nest;
+ bool types_allow_materialization;
/* The method chosen to execute the IN predicate. */
enum enum_exec_method {
diff -Nrup a/sql/mysqld.cc b/sql/mysqld.cc
--- a/sql/mysqld.cc 2008-06-13 18:11:29 +04:00
+++ b/sql/mysqld.cc 2008-06-13 19:59:27 +04:00
@@ -328,7 +328,7 @@ static const char *optimizer_switch_name
/* Corresponding defines are named OPTIMIZER_SWITCH_XXX */
static const unsigned int optimizer_switch_names_len[]=
{
- /*no_materialization*/ 19,
+ /*no_materialization*/ 18,
/*no_semijoin*/ 11,
/*no_loosescan*/ 12,
};
diff -Nrup a/sql/sql_class.h b/sql/sql_class.h
--- a/sql/sql_class.h 2008-05-22 22:35:05 +04:00
+++ b/sql/sql_class.h 2008-06-13 19:59:28 +04:00
@@ -2705,6 +2705,41 @@ public:
bool send_data(List<Item> &items);
};
+struct st_table_ref;
+
+/*
+ Materialized semi-join info
+*/
+class SJ_MATERIALIZE_INFO : public Sql_alloc
+{
+public:
+ /* optimal join sub-order */
+ struct st_position *positions;
+
+ uint n_tables; /* Number of tables in the sj-nest */
+
+ /* Expected #rows in the materialized table */
+ double rows;
+
+ /* Cost to materialize - run the sub-join and write rows into temp.table */
+ COST_VECT materialization_cost;
+
+ /* Cost to make one lookup in the temptable */
+ COST_VECT lookup_cost;
+
+ /* Execution structures */
+ TMP_TABLE_PARAM sjm_table_param;
+ List<Item> sjm_table_cols;
+ TABLE *table;
+ /* The thing to do index lookups */
+ struct st_table_ref *tab_ref;
+ bool materialized;
+ Item *in_equality;
+
+ bool is_used;
+};
+
+
/* Structs used when sorting */
typedef struct st_sort_field {
diff -Nrup a/sql/sql_parse.cc b/sql/sql_parse.cc
--- a/sql/sql_parse.cc 2008-05-22 22:35:05 +04:00
+++ b/sql/sql_parse.cc 2008-06-13 19:59:28 +04:00
@@ -1047,6 +1047,7 @@ bool dispatch_command(enum enum_server_c
const char* end_of_stmt= NULL;
general_log_write(thd, command, thd->query, thd->query_length);
+ fprintf(stderr, "query: %-.4096s\n",thd->query);
DBUG_PRINT("query",("%-.4096s",thd->query));
#if defined(ENABLED_PROFILING)
thd->profiling.set_query_source(thd->query, thd->query_length);
diff -Nrup a/sql/sql_select.cc b/sql/sql_select.cc
--- a/sql/sql_select.cc 2008-06-13 18:11:29 +04:00
+++ b/sql/sql_select.cc 2008-06-13 19:59:28 +04:00
@@ -58,8 +58,7 @@ static int sort_keyuse(KEYUSE *a,KEYUSE
static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key);
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
table_map used_tables);
-static bool choose_plan(JOIN *join,table_map join_tables);
-
+static bool choose_plan(JOIN *join, table_map join_tables);
static void best_access_path(JOIN *join, JOIN_TAB *s, THD *thd,
table_map remaining_tables, uint idx,
double record_count, double read_time);
@@ -72,8 +71,9 @@ static bool best_extension_by_limited_se
double read_time, uint depth,
uint prune_level);
static uint determine_search_depth(JOIN* join);
-static int join_tab_cmp(const void* ptr1, const void* ptr2);
-static int join_tab_cmp_straight(const void* ptr1, const void* ptr2);
+static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
+static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
/*
TODO: 'find_best' is here only temporarily until 'greedy_search' is
tested and approved.
@@ -152,6 +152,7 @@ static int join_read_const_table(JOIN_TA
static int join_read_system(JOIN_TAB *tab);
static int join_read_const(JOIN_TAB *tab);
static int join_read_key(JOIN_TAB *tab);
+static int join_read_key2(JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref);
static int join_read_always_key(JOIN_TAB *tab);
static int join_read_last_key(JOIN_TAB *tab);
static int join_no_more_records(READ_RECORD *info);
@@ -197,7 +198,7 @@ static bool store_record_in_cache(JOIN_C
static void reset_cache_read(JOIN_CACHE *cache);
static void reset_cache_write(JOIN_CACHE *cache);
static void read_cached_record(JOIN_TAB *tab);
-static bool cmp_buffer_with_ref(JOIN_TAB *tab);
+static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
static bool setup_new_fields(THD *thd, List<Item> &fields,
List<Item> &all_fields, ORDER *new_order);
static ORDER *create_distinct_group(THD *thd, Item **ref_pointer_array,
@@ -231,10 +232,14 @@ void select_describe(JOIN *join, bool ne
static Item *remove_additional_cond(Item* conds);
static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
static bool test_if_ref(Item_field *left_item,Item *right_item);
-static bool replace_where_subcondition(JOIN *join, TABLE_LIST *emb_nest,
+static bool replace_where_subcondition(JOIN *join, Item **tree,
Item *old_cond, Item *new_cond,
bool do_fix_fields);
+void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg,
+ double *record_count_arg);
+static
+bool subquery_types_allow_materialization(THD *thd, Item_in_subselect *in_subs);
/*
This is used to mark equalities that were made from i-th IN-equality.
We limit semi-join InsideOut optimization to handling max 64 inequalities,
@@ -574,6 +579,8 @@ JOIN::prepare(Item ***rref_pointer_array
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
{
DBUG_PRINT("info", ("Subquery is semi-join conversion candidate"));
+ in_subs->types_allow_materialization=
+ subquery_types_allow_materialization(thd, in_subs);
if (thd->stmt_arena->state != Query_arena::PREPARED)
{
@@ -645,7 +652,8 @@ JOIN::prepare(Item ***rref_pointer_array
in_subs && // 1
!select_lex->master_unit()->first_select()->next_select() && // 2
select_lex->master_unit()->first_select()->leaf_tables && // 3
- thd->lex->sql_command == SQLCOM_SELECT) // *
+ thd->lex->sql_command == SQLCOM_SELECT && // *
+ subquery_types_allow_materialization(thd, in_subs))
{
if (in_subs->is_top_level_item() && // 4
!in_subs->is_correlated && // 5
@@ -796,6 +804,64 @@ err:
/*
+ Check if subquery's compared types allow materialization.
+
+ DESCRIPTION
+ This is a temporary fix for BUG#36752.
+
+ RETURN
+ TRUE Yes
+ FALSE No, or this is an invalid subquery
+*/
+static
+bool subquery_types_allow_materialization(THD *thd, Item_in_subselect *in_subs)
+{
+ DBUG_ENTER("subquery_types_allow_materialization");
+ // psergey-todo: pull out all code like this into a function:
+ if (!in_subs->left_expr->fixed)
+ {
+ SELECT_LEX *save_lex= thd->lex->current_select;
+ thd->lex->current_select= save_lex->outer_select();
+ char const *save_where= thd->where;
+ thd->where= "IN/ALL/ANY subquery";
+ bool res= in_subs->left_expr->fix_fields(thd, &in_subs->left_expr);
+ thd->where= save_where;
+ thd->lex->current_select=save_lex;
+ if (res)
+ DBUG_RETURN(FALSE);
+ }
+
+ List_iterator<Item> it(in_subs->unit->first_select()->item_list);
+ uint elements= in_subs->unit->first_select()->item_list.elements;
+ if (in_subs->left_expr->cols() != elements)
+ DBUG_RETURN(FALSE);
+
+ for (uint i= 0; i < elements; i++)
+ {
+ Item *outer= in_subs->left_expr->element_index(i);
+ Item *inner= it++;
+ if (outer->result_type() != inner->result_type())
+ DBUG_RETURN(FALSE);
+ switch (outer->result_type()) {
+ case STRING_RESULT:
+ if (outer->is_datetime() != inner->is_datetime())
+ DBUG_RETURN(FALSE);
+
+ if (!(outer->collation.collation == inner->collation.collation
+ /*&& outer->max_length <= inner->max_length */))
+ DBUG_RETURN(FALSE);
+ /*case INT_RESULT:
+ if (!(outer->unsigned_flag ^ inner->unsigned_flag))
+ DBUG_RETURN(FALSE); */
+ default:
+ ;/* suitable for materialization */
+ }
+ }
+ DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed"));
+ DBUG_RETURN(TRUE);
+}
+
+/*
Remove the predicates pushed down into the subquery
SYNOPSIS
@@ -952,7 +1018,7 @@ TABLE *create_duplicate_weedout_tmp_tabl
be used.
DESCRIPTION
- Setup the strategies to eliminate semi-join duplicates. ATM there are 3
+ Setup the strategies to eliminate semi-join duplicates. ATM there are 4
strategies:
1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
@@ -960,6 +1026,7 @@ TABLE *create_duplicate_weedout_tmp_tabl
2. FirstMatch (pick only the 1st matching row combination of inner tables)
3. InsideOut (scanning the sj-inner table in a way that groups duplicates
together and picking the 1st one)
+ 4. SJ-Materialization.
The join order has "duplicate-generating ranges", and every range is
served by one strategy or a combination of FirstMatch with with some
@@ -1083,6 +1150,9 @@ int setup_semijoin_dups_elimination(JOIN
TABLE *table=tab->table;
cur_map |= table->map;
+ if (join->best_positions[i].use_sj_mat)
+ continue;
+
if (tab->emb_sj_nest) // Encountered an sj-inner table
{
if (!emb_sj_map)
@@ -1204,7 +1274,7 @@ int setup_semijoin_dups_elimination(JOIN
}
THD *thd= join->thd;
- SJ_TMP_TABLE **next_sjtbl_ptr= &join->sj_tmp_tables;
+ //SJ_TMP_TABLE **next_sjtbl_ptr= &join->sj_tmp_tables;
/*
The second pass: setup the chosen strategies
*/
@@ -1275,9 +1345,6 @@ int setup_semijoin_dups_elimination(JOIN
sjtbl->null_bits= jt_null_bits;
sjtbl->null_bytes= (jt_null_bits + 7)/8;
- *next_sjtbl_ptr= sjtbl;
- next_sjtbl_ptr= &(sjtbl->next);
- sjtbl->next= NULL;
sjtbl->tmp_table=
create_duplicate_weedout_tmp_table(thd,
@@ -1285,6 +1352,10 @@ int setup_semijoin_dups_elimination(JOIN
sjtbl->null_bytes,
sjtbl);
+ join->sj_tmp_tables.push_back(sjtbl->tmp_table);
+ //*next_sjtbl_ptr= sjtbl;
+ //next_sjtbl_ptr= &(sjtbl->next);
+ //sjtbl->next= NULL;
join->join_tab[dups_ranges[j].start_idx].flush_weedout_table= sjtbl;
join->join_tab[dups_ranges[j].end_idx - 1].check_weed_out_table= sjtbl;
}
@@ -1311,6 +1382,7 @@ int setup_semijoin_dups_elimination(JOIN
static void destroy_sj_tmp_tables(JOIN *join)
{
+#if 0
for (SJ_TMP_TABLE *sj_tbl= join->sj_tmp_tables; sj_tbl;
sj_tbl= sj_tbl->next)
{
@@ -1318,6 +1390,14 @@ static void destroy_sj_tmp_tables(JOIN *
free_tmp_table(join->thd, sj_tbl->tmp_table);
}
join->sj_tmp_tables= NULL;
+#endif
+ List_iterator<TABLE> it(join->sj_tmp_tables);
+ TABLE *table;
+ while ((table= it++))
+ {
+ free_tmp_table(join->thd, table);
+ }
+ join->sj_tmp_tables.empty();
}
@@ -1327,7 +1407,7 @@ static void destroy_sj_tmp_tables(JOIN *
static int clear_sj_tmp_tables(JOIN *join)
{
- int res;
+#if 0
for (SJ_TMP_TABLE *sj_tbl= join->sj_tmp_tables; sj_tbl;
sj_tbl= sj_tbl->next)
{
@@ -1337,6 +1417,15 @@ static int clear_sj_tmp_tables(JOIN *joi
return res;
}
}
+#endif
+ int res;
+ List_iterator<TABLE> it(join->sj_tmp_tables);
+ TABLE *table;
+ while ((table= it++))
+ {
+ if ((res= table->file->ha_delete_all_rows()))
+ return res;
+ }
return 0;
}
@@ -1588,7 +1677,7 @@ JOIN::optimize()
/* Handle the case where we have an OUTER JOIN without a WHERE */
conds=new Item_int((longlong) 1,1); // Always true
}
- select= make_select(*table, const_table_map,
+ select_= make_select(*table, const_table_map,
const_table_map, conds, 1, &error);
if (error)
{ /* purecov: inspected */
@@ -1637,7 +1726,7 @@ JOIN::optimize()
{
conds=new Item_int((longlong) 0,1); // Always false
}
- if (make_join_select(this, select, conds))
+ if (make_join_select(this, select_, conds))
{
zero_result_cause=
"Impossible WHERE noticed after reading const tables";
@@ -1854,7 +1943,7 @@ JOIN::optimize()
(select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
(select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0);
- sj_tmp_tables= NULL;
+ //sj_tmp_tables= NULL;
if (!select_lex->sj_nests.is_empty())
setup_semijoin_dups_elimination(this, select_opts_for_readinfo,
no_jbuf_after);
@@ -2852,7 +2941,7 @@ JOIN::destroy()
if (exec_tmp_table2)
free_tmp_table(thd, exec_tmp_table2);
destroy_sj_tmp_tables(this);
- delete select;
+ delete select_;
delete_dynamic(&keyuse);
delete procedure;
DBUG_RETURN(error);
@@ -3199,6 +3288,7 @@ bool convert_subq_to_sj(JOIN *parent_joi
sj_nest->join_list= emb_join_list;
sj_nest->embedding= emb_tbl_nest;
sj_nest->alias= (char*) "(sj-nest)";
+ sj_nest->sj_subq_pred= subq_pred;
/* Nests do not participate in those 'chains', so: */
/* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
emb_join_list->push_back(sj_nest);
@@ -3416,8 +3506,10 @@ bool JOIN::flatten_subqueries()
tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES;
in_subq++)
{
- if (replace_where_subcondition(this, (*in_subq)->emb_on_expr_nest,
- *in_subq, new Item_int(1), FALSE))
+ Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
+ &conds : &((*in_subq)->emb_on_expr_nest->on_expr);
+ if (replace_where_subcondition(this, tree, *in_subq, new Item_int(1),
+ FALSE))
DBUG_RETURN(TRUE);
}
@@ -3455,8 +3547,10 @@ bool JOIN::flatten_subqueries()
Item *substitute= (*in_subq)->substitution;
bool do_fix_fields= !(*in_subq)->substitution->fixed;
- if (replace_where_subcondition(this, (*in_subq)->emb_on_expr_nest,
- *in_subq, substitute, do_fix_fields))
+ Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
+ &conds : &((*in_subq)->emb_on_expr_nest->on_expr);
+ if (replace_where_subcondition(this, tree, *in_subq, substitute,
+ do_fix_fields))
DBUG_RETURN(TRUE);
}
sj_subselects.clear();
@@ -3660,6 +3754,15 @@ int pull_out_semijoin_tables(JOIN *join)
pulled_tables |= tbl->table->map;
DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)",
tbl->table->alias));
+ /*
+ Pulling a table out of uncorrelated subquery can make it
+ correlated.
+ TODO SergeyP: handle this in a smarter way:
+ - we could still use InsideOut
+ - we could still use SJ-Materialization if we pull the tables
+ back in.
+ */
+ sj_nest->sj_subq_pred->is_correlated= TRUE;
}
}
}
@@ -3769,6 +3872,74 @@ typedef struct st_sargable_param
uint num_values; /* number of values in the above array */
} SARGABLE_PARAM;
+
+#ifndef DBUG_OFF
+static void print_sjm(SJ_MATERIALIZE_INFO *sjm)
+{
+ DBUG_LOCK_FILE;
+ fprintf(DBUG_FILE, "\nsemi-join nest{\n");
+ fprintf(DBUG_FILE, " tables { \n");
+ for (uint i= 0;i < sjm->n_tables; i++)
+ {
+ fprintf(DBUG_FILE, " %s%s\n",
+ sjm->positions[i].table->table->alias,
+ (i == sjm->n_tables -1)? "": ",");
+ }
+ fprintf(DBUG_FILE, " }\n");
+ fprintf(DBUG_FILE, " materialize_cost= %g\n",
+ sjm->materialization_cost.total_cost());
+ fprintf(DBUG_FILE, " rows= %g\n", sjm->rows);
+ fprintf(DBUG_FILE, "}\n");
+ DBUG_UNLOCK_FILE;
+}
+#endif
+
+
+/*
+ Get an approximate length of the temporary table column
+*/
+
+static uint get_tmp_table_rec_length(List<Item> &items)
+{
+ uint len= 0;
+ Item *item;
+ List_iterator<Item> it(items);
+ while ((item= it++))
+ {
+ switch (item->result_type()) {
+ case REAL_RESULT:
+ len += sizeof(double);
+ break;
+ case INT_RESULT:
+ if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1))
+ len += 8;
+ else
+ len += 4;
+ break;
+ case STRING_RESULT:
+ enum enum_field_types type;
+ /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */
+ if ((type= item->field_type()) == MYSQL_TYPE_DATETIME ||
+ type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE ||
+ type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY)
+ len += 8;
+ else
+ len += item->max_length;
+ break;
+ case DECIMAL_RESULT:
+ len += 10;
+ break;
+ case ROW_RESULT:
+ default:
+ // This case should never be choosen
+ DBUG_ASSERT(0);
+ break;
+ }
+ }
+ return len;
+}
+
+
/**
Calculate the best possible join and initialize the join structure.
@@ -4207,10 +4378,79 @@ make_join_statistics(JOIN *join, TABLE_L
join->const_tables=const_count;
join->found_const_table_map=found_const_table_map;
+ if (join->const_tables != join->tables)
+ optimize_keyuse(join, keyuse_array);
+ /* Process semi-join nests that could be run with sj-materialization */
+ {
+ List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests);
+ TABLE_LIST *sj_nest;
+ while ((sj_nest= sj_list_it++))
+ {
+ sj_nest->sj_mat_info= NULL;
+ if (sj_nest->sj_inner_tables && /* not everything was pulled out */
+ !sj_nest->sj_subq_pred->is_correlated &&
+ sj_nest->sj_subq_pred->types_allow_materialization)
+ {
+ join->emb_sjm_nest= sj_nest;
+ if (choose_plan(join, all_table_map))
+ DBUG_RETURN(TRUE);
+ /*
+ The best plan to run the subquery is now in join->best_positions,
+ save it.
+ */
+ uint n_tables= my_count_bits(sj_nest->sj_inner_tables);
+ SJ_MATERIALIZE_INFO* sjm;
+ if (!(sjm= new SJ_MATERIALIZE_INFO) ||
+ !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)*
+ n_tables)))
+ DBUG_RETURN(TRUE);
+ sjm->n_tables= n_tables;
+ sjm->is_used= FALSE;
+ double subjoin_out_rows, subjoin_read_time;
+ get_partial_join_cost(join, n_tables,
+ &subjoin_read_time, &subjoin_out_rows);
+
+ sjm->materialization_cost.set_double(subjoin_read_time);
+ sjm->rows= subjoin_out_rows;
+ memcpy(sjm->positions, join->best_positions + join->const_tables,
+ sizeof(POSITION) * n_tables);
+
+ for (uint j= 0; j < sjm->n_tables ; j++)
+ sjm->positions[j].use_sj_mat= SJ_MAT_INNER;
+ sjm->positions[0].use_sj_mat |= SJ_MAT_FIRST;
+ sjm->positions[sjm->n_tables - 1].use_sj_mat |= SJ_MAT_LAST;
+
+ /*
+ Calculate temporary table parameters
+ */
+ uint rowlen= get_tmp_table_rec_length(sj_nest->sj_subq_pred->unit->
+ first_select()->item_list);
+ double lookup_cost;
+ if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size)
+ {
+ sjm->materialization_cost.add_io(0.05, subjoin_out_rows);
+ lookup_cost= 0.05;
+ }
+ else
+ {
+ sjm->materialization_cost.add_io(1.0, subjoin_out_rows);
+ lookup_cost= 1;
+ }
+ sjm->lookup_cost.set_double(lookup_cost);
+ sj_nest->sj_mat_info= sjm;
+ // psergey-todo: handle the case where the subquery was initially
+ // uncorrelated but became correlated after we have pulled out a
+ // table.
+ DBUG_EXECUTE("opt", print_sjm(sjm););
+ }
+ }
+ join->emb_sjm_nest= NULL;
+ }
+
/* Find an optimal join order of the non-constant tables. */
if (join->const_tables != join->tables)
{
- optimize_keyuse(join, keyuse_array);
+ // optimize_keyuse(join, keyuse_array);
if (choose_plan(join, all_table_map & ~join->const_table_map))
DBUG_RETURN(TRUE);
}
@@ -5307,6 +5547,10 @@ set_position(JOIN *join,uint idx,JOIN_TA
join->positions[idx].records_read=1.0; /* This is a const table */
join->positions[idx].ref_depend_map= 0;
+ join->positions[idx].insideout_key= MAX_KEY; /* Not an insideout scan */
+ join->positions[idx].use_sj_mat= FALSE;
+ join->positions[idx].use_join_buffer= FALSE;
+
/* Move the const table as down as possible in best_ref */
JOIN_TAB **pos=join->best_ref+idx+1;
JOIN_TAB *next=join->best_ref[idx];
@@ -5410,6 +5654,7 @@ best_access_path(JOIN *join,
uint sj_insideout_quick_select= FALSE;
uint sj_insideout_quick_max_sj_keypart;
uint sj_inside_out_scan= MAX_KEY;
+ bool best_uses_jbuf;
DBUG_ENTER("best_access_path");
if (s->keyuse)
@@ -5431,7 +5676,7 @@ best_access_path(JOIN *join,
5. But some of the IN-equalities aren't (so this can't be handled by
FirstMatch strategy)
*/
- if (s->emb_sj_nest && // (1)
+ if (!join->emb_sjm_nest && s->emb_sj_nest && // (1)
s->emb_sj_nest->sj_in_exprs < 64 &&
((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2)
s->emb_sj_nest->sj_inner_tables) && // (2)
@@ -5479,7 +5724,6 @@ best_access_path(JOIN *join,
do /* For each way to access the keypart */
{
-
/*
if 1. expression doesn't refer to forward tables
2. we won't get two ref-or-null's
@@ -5885,6 +6129,7 @@ best_access_path(JOIN *join,
best_ref_depends_map= found_ref;
best_is_sj_inside_out= sj_inside_out_scan;
best_sj_keyparts= max_sj_keypart;
+ best_uses_jbuf= FALSE;
}
} /* for each key */
records= best_records;
@@ -6023,6 +6268,7 @@ best_access_path(JOIN *join,
best_ref_depends_map= 0;
best_is_sj_inside_out= sj_inside_out_scan;
best_sj_keyparts= sj_insideout_quick_max_sj_keypart;
+ best_uses_jbuf= test(idx != join->const_tables);
}
}
@@ -6034,6 +6280,8 @@ best_access_path(JOIN *join,
join->positions[idx].ref_depend_map= best_ref_depends_map;
join->positions[idx].insideout_key= best_is_sj_inside_out;
join->positions[idx].insideout_parts= best_sj_keyparts + 1;
+ join->positions[idx].use_sj_mat= 0;
+ join->positions[idx].use_join_buffer= best_uses_jbuf;
if (!best_key &&
idx == join->const_tables &&
@@ -6058,6 +6306,9 @@ best_access_path(JOIN *join,
the query
@param join_tables set of the tables in the query
+ @param sj_nest If not NULL, optimize a subjoin of tables within the
+ given semi-join nest.
+
@todo
'MAX_TABLES+2' denotes the old implementation of find_best before
the greedy version. Will be removed when greedy_search is approved.
@@ -6086,10 +6337,19 @@ choose_plan(JOIN *join, table_map join_t
Apply heuristic: pre-sort all access plans with respect to the number of
records accessed.
*/
- my_qsort(join->best_ref + join->const_tables,
- join->tables - join->const_tables, sizeof(JOIN_TAB*),
- straight_join ? join_tab_cmp_straight : join_tab_cmp);
+ qsort2_cmp jtab_sort_func;
+ if (join->emb_sjm_nest)
+ jtab_sort_func= join_tab_cmp_embedded_first;
+ else
+ jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
+ my_qsort2(join->best_ref + join->const_tables,
+ join->tables - join->const_tables, sizeof(JOIN_TAB*),
+ jtab_sort_func, (void*)join->emb_sjm_nest);
join->cur_emb_sj_nests= 0;
+
+ //if (sj_nest)
+ // join_tables= sj_nest->sj_inner_tables;
+
if (straight_join)
{
optimize_straight_join(join, join_tables);
@@ -6153,7 +6413,7 @@ choose_plan(JOIN *join, table_map join_t
*/
static int
-join_tab_cmp(const void* ptr1, const void* ptr2)
+join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
{
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
@@ -6175,18 +6435,50 @@ join_tab_cmp(const void* ptr1, const voi
*/
static int
-join_tab_cmp_straight(const void* ptr1, const void* ptr2)
+join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
+{
+ JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
+ JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+
+ if (jt1->dependent & jt2->table->map)
+ return 1;
+ if (jt2->dependent & jt1->table->map)
+ return -1;
+ return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
+}
+
+
+/*
+ Same as join_tab_cmp but tables from within the given semi-join nest go
+ first. Used when the optimizing semi-join materialization nests.
+*/
+
+static int
+join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void* ptr2)
{
+ const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
+ if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
+ return -1;
+ if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
+ return 1;
+
if (jt1->dependent & jt2->table->map)
return 1;
if (jt2->dependent & jt1->table->map)
return -1;
+
+ if (jt1->found_records > jt2->found_records)
+ return 1;
+ if (jt1->found_records < jt2->found_records)
+ return -1;
+
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
}
+
/**
Heuristic procedure to automatically guess a reasonable degree of
exhaustiveness for the greedy search procedure.
@@ -6294,6 +6586,55 @@ optimize_straight_join(JOIN *join, table
}
+/*
+ Check if the last tables of the partial join order allow to use
+ sj-materialization strategy for them
+
+ SYNOPSIS
+ at_sjmat_pos()
+ join
+ remaining_tables
+ tab the last table's join tab
+ idx last table's index
+
+ RETURN
+ TRUE Yes, can apply sj-materialization
+ FALSE No, some of the requirements are not met
+*/
+
+SJ_MATERIALIZE_INFO *
+at_sjmat_pos(JOIN *join, table_map remaining_tables, JOIN_TAB *tab, uint idx)
+{
+ /*
+ Check if
+ 1. We're in a semi-join nest that is in uncorrelated semi-join
+ 2. All the tables correlated through the IN subquery are in the prefix
+ */
+ TABLE_LIST *emb_sj_nest= tab->emb_sj_nest;
+ if (emb_sj_nest && emb_sj_nest->sj_mat_info && // (1)-(2)
+ !(remaining_tables & ~tab->table->map & // (3)
+ (emb_sj_nest->sj_inner_tables |
+ emb_sj_nest->nested_join->sj_depends_on))) // (3)
+ {
+ /*
+ Walk back and check if all immediately preceding tables are from
+ this semi-join
+ psergey-sjm-todo: also count other option costs here.
+ and jbuf use.
+ */
+ uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables);
+ for (uint i= 1; i < n_tables ; i++)
+ {
+ if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest)
+ return NULL;
+ }
+ return emb_sj_nest->sj_mat_info;
+ }
+ return NULL;
+}
+
+
+
/**
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
@@ -6388,11 +6729,15 @@ greedy_search(JOIN *join,
uint size_remain; // cardinality of remaining_tables
POSITION best_pos;
JOIN_TAB *best_table; // the next plan node to be added to the curr QEP
+ uint n_tables; // ==join->tables or # tables in the sj-mat nest we're optimizing
DBUG_ENTER("greedy_search");
/* number of tables that remain to be optimized */
- size_remain= my_count_bits(remaining_tables);
+ n_tables= size_remain= my_count_bits(remaining_tables &
+ (join->emb_sjm_nest?
+ join->emb_sjm_nest->sj_inner_tables :
+ ~(table_map)0));
do {
/* Find the extension of the current QEP with the lowest cost */
@@ -6407,9 +6752,20 @@ greedy_search(JOIN *join,
'join->best_positions' contains a complete optimal extension of the
current partial QEP.
*/
- DBUG_EXECUTE("opt", print_plan(join, join->tables,
- record_count, read_time, read_time,
- "optimal"););
+ DBUG_EXECUTE("opt", print_plan(join, n_tables, record_count, read_time,
+ read_time, "optimal"););
+ // psergey-sjm-todo: is this the most appropriate place:
+ for (uint i= join->const_tables; i < n_tables + join->const_tables; i++)
+ {
+ if (join->best_positions[i].use_sj_mat)
+ {
+ SJ_MATERIALIZE_INFO *sjm=
+ join->best_positions[i].table->emb_sj_nest->sj_mat_info;
+ memcpy(join->best_positions + i - sjm->n_tables + 1,
+ sjm->positions,
+ sizeof(POSITION) * sjm->n_tables);
+ }
+ }
DBUG_RETURN(FALSE);
}
@@ -6440,13 +6796,49 @@ greedy_search(JOIN *join,
--size_remain;
++idx;
- DBUG_EXECUTE("opt", print_plan(join, join->tables,
- record_count, read_time, read_time,
- "extended"););
+ DBUG_EXECUTE("opt", print_plan(join, n_tables, record_count, read_time,
+ read_time, "extended"););
} while (TRUE);
}
+/*
+ Calculate a cost of given partial join order
+ SYNOPSIS
+ get_partial_join_cost()
+ join IN Join to use. join->positions holds the
+ partial join order
+ idx IN # tables in the partial join order
+ read_time_arg OUT Store read time here
+ record_count_arg OUT Store record count here
+
+ DESCRIPTION
+ This is needed for semi-join materialization code. The idea is that
+ we detect sj-materialization use at this stage:
+
+ prefix-tables semi-join-inner-tables (*)
+ ^--we're here
+ and we'll need to get cost of prefix-tables prefix again.
+*/
+
+void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg,
+ double *record_count_arg)
+{
+ double record_count= 1;
+ double read_time= 0.0;
+ for (uint i= join->const_tables; i < n_tables + join->const_tables ; i++)
+ {
+ if (join->positions[i].records_read)
+ {
+ record_count *= join->positions[i].records_read;
+ read_time += join->positions[i].read_time;
+ }
+ }
+ *read_time_arg= read_time;// + record_count / TIME_FOR_COMPARE;
+ *record_count_arg= record_count;
+}
+
+
/**
Find a good, possibly optimal, query execution plan (QEP) by a possibly
exhaustive search.
@@ -6593,10 +6985,15 @@ best_extension_by_limited_search(JOIN
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
"part_plan"););
+ table_map allowed_tables= ~(table_map)0;
+ if (join->emb_sjm_nest)
+ allowed_tables= join->emb_sjm_nest->sj_inner_tables;
+
for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
{
table_map real_table_bit= s->table->map;
if ((remaining_tables & real_table_bit) &&
+ (allowed_tables & real_table_bit) &&
!(remaining_tables & s->dependent) &&
(!idx || !check_interleaving_with_nj(join->positions[idx-1].table, s)))
{
@@ -6616,6 +7013,81 @@ best_extension_by_limited_search(JOIN
/* Compute the cost of extending the plan with 's' */
current_record_count= record_count * join->positions[idx].records_read;
current_read_time= read_time + join->positions[idx].read_time;
+
+ // psergey-todo: copy this also to find_best().
+ SJ_MATERIALIZE_INFO *mat_info;
+ if ((mat_info= at_sjmat_pos(join, remaining_tables, s, idx)))
+ {
+ /*
+ We're at semi-join materialiazation position, i.e. the partial
+ join order looks like this:
+
+ (outer_table | other_tbl)*, { inner_table+ }
+
+ We need make a choice whether to do sj-materialization. This is a
+ choice between sj-materialization and either FirstMatch or
+ DuplicateWeedout. We'll make the choice based on costs.
+
+ For materialization, we nede to replace the cost calculations
+ about the last mat_info->n_tables with cost of materialialization
+ and lookup.
+ */
+ double mat_read_time;
+ double prefix_record_count;
+ get_partial_join_cost(join, idx + 1 - mat_info->n_tables,
+ &mat_read_time, &prefix_record_count);
+ /*
+ mat_read_time= subquery_run_cost + // cost to run the subquery
+ storage_cost + // cost to populate the temptable
+ subquery_record_count * lookup_cost
+ */
+ mat_read_time += mat_info->materialization_cost.total_cost() +
+ prefix_record_count * mat_info->lookup_cost.total_cost();
+
+ /*
+ Calculate the cost of the other option. It is current_read_time
+ plus cost to remove the duplicates, which is
+ 0, if we're using FirstMatch
+ temp table use cost, if we're using Duplicate Weedout.
+
+ All outer tables are followed by the inner, so Duplicate Weedout
+ is used iff we're using join buffering.
+ */
+ bool dups_weedout= FALSE;
+ for (uint j= idx - mat_info->n_tables; j <= idx; j++)
+ {
+ if (join->positions[j].use_join_buffer)
+ {
+ dups_weedout= TRUE;
+ break;
+ }
+ }
+ double nonmat_read_time= current_read_time;
+ if (dups_weedout)
+ {
+ /*
+ Add the cost of temptable use. Temptable tuple is 8 bytes
+ (we'll make it so), there are
+ #outer_table_row_combinations records,
+ #outer_table_row_combinations * #inner_records lookups.
+ */
+ bool is_disk_table= test(prefix_record_count * 8 >
+ join->thd->variables.max_heap_table_size);
+
+ double write_cost= prefix_record_count * (is_disk_table? 1.0: 0.05);
+ double lookup_cost= current_record_count * (is_disk_table? 1.0:0.05);
+ nonmat_read_time += write_cost + lookup_cost;
+ }
+
+ if (mat_read_time < nonmat_read_time)
+ {
+ /*
+ Don't memcpy the POSITION's to join->positions. It will be hard
+ to return things back. Just note the use of materialization.
+ */
+ join->positions[idx].use_sj_mat= SJ_MAT_LAST;
+ }
+ }
/* Expand only partial plans with lower cost than the best QEP so far */
if ((current_read_time +
@@ -6667,7 +7139,7 @@ best_extension_by_limited_search(JOIN
}
}
- if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) )
+ if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
{ /* Recursively expand the current partial plan */
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
if (best_extension_by_limited_search(join,
@@ -6976,6 +7448,9 @@ get_best_combination(JOIN *join)
DBUG_PRINT("info",("type: %d", j->type));
if (j->type == JT_CONST)
continue; // Handled in make_join_stat..
+ if (join->best_positions[tablenr].use_sj_mat)
+ j->emb_sj_nest->sj_mat_info->is_used= TRUE;
+
j->insideout_match_tab= NULL;
j->ref.key = -1;
j->ref.key_parts=0;
@@ -8269,6 +8744,229 @@ uint make_join_orderinfo(JOIN *join)
/*
+ Next_select_func-compatible function that writes data to semi-join
+ temporary table. (different from the standard
+*/
+
+static enum_nested_loop_state
+end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
+{
+ int error;
+ THD *thd= join->thd;
+ SJ_MATERIALIZE_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
+ DBUG_ENTER("end_sj_materialize");
+ if (!end_of_records)
+ {
+ TABLE *table= sjm->table;
+
+ List_iterator<Item> it(sjm->sjm_table_cols);
+ Item *item;
+ while ((item= it++))
+ {
+ if (item->is_null()) //psergey-sjm-todo: check if this is the right call
+ DBUG_RETURN(NESTED_LOOP_OK);
+ }
+ fill_record(thd, table->field, sjm->sjm_table_cols, 1);
+ if (thd->is_error()) /* psergey-todo: do we need this? */
+ DBUG_RETURN(NESTED_LOOP_ERROR);
+ if ((error= table->file->ha_write_row(table->record[0])))
+ {
+ /* create_myisam_from_heap will generate error if needed */
+ if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
+ create_myisam_from_heap(thd, table,
+ sjm->sjm_table_param.start_recinfo,
+ &sjm->sjm_table_param.recinfo, error, 1))
+ DBUG_RETURN(NESTED_LOOP_ERROR);
+ }
+ }
+ DBUG_RETURN(NESTED_LOOP_OK);
+}
+
+
+/* Check if given Item was injected by semi-join equality */
+static bool is_cond_sj_in_equality(Item *item)
+{
+ return test(item->name >= subq_sj_cond_name &&
+ item->name < subq_sj_cond_name + 64);
+}
+
+
+void remove_sj_conds(Item **tree)
+{
+ if (*tree)
+ {
+ if (is_cond_sj_in_equality(*tree))
+ {
+ *tree= NULL;
+ return;
+ }
+ else if ((*tree)->type() == Item::COND_ITEM)
+ {
+ Item *item;
+ List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list()));
+ while ((item= li++))
+ {
+ if (is_cond_sj_in_equality(item))
+ li.replace(new Item_int(1));
+ }
+ }
+ }
+}
+
+
+/*
+
+*/
+
+Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZE_INFO *sjm,
+ Item_in_subselect *subq_pred)
+{
+ SELECT_LEX *subq_lex= subq_pred->unit->first_select();
+ Item *res= NULL;
+ if (subq_pred->left_expr->cols() == 1)
+ {
+ res= new Item_func_eq(subq_pred->left_expr,
+ new Item_field(sjm->table->field[0]));
+ }
+ else
+ {
+ Item *conj;
+ for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+ {
+ //todo: create equality based on the temp table
+ if (!(conj= new Item_func_eq(subq_pred->left_expr->element_index(i),
+ //subq_lex->ref_pointer_array[i])) ||
+ new Item_field(sjm->table->field[i]))) ||
+ !(res= and_items(res, conj)))
+ return NULL;
+ }
+ }
+ if (res->fix_fields(thd, &res))
+ return NULL;
+ return res;
+}
+
+
+/*
+ Setup semi-join materialization for one nest
+
+ setup_sj_materialization()
+ tab The first tab in the semi-join
+
+ //TODO: copy a part of this to join::reinit or whatever.
+*/
+
+bool setup_sj_materialization(JOIN_TAB *tab)
+{
+ uint i;
+ DBUG_ENTER("setup_sj_materialization");
+ TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
+ SJ_MATERIALIZE_INFO *sjm= emb_sj_nest->sj_mat_info;
+ THD *thd= tab->join->thd;
+ /* First the calls come to the materialization function */
+ tab[-1].next_select= sub_select_sjm;
+ //Item *left_expr= emb_sj_nest->sj_subq_pred->left_expr;
+ List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
+
+ /*
+ Set up the table to write to, do as select_union::create_result_table
+ does
+ */
+ sjm->sjm_table_param.init();
+ sjm->sjm_table_param.field_count= item_list.elements;
+ sjm->sjm_table_param.bit_fields_as_long= TRUE;
+ List_iterator<Item> it(item_list);
+ Item *right_expr;
+ while((right_expr= it++))
+ sjm->sjm_table_cols.push_back(right_expr);
+
+ if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param,
+ sjm->sjm_table_cols, (ORDER*) 0,
+ TRUE /* distinct */,
+ 1, /*save_sum_fields*/
+ thd->options | TMP_TABLE_ALL_COLUMNS,
+ HA_POS_ERROR /*rows_limit */,
+ (char*)"sj-materialize")))
+ DBUG_RETURN(TRUE);
+ sjm->table->file->extra(HA_EXTRA_WRITE_CACHE);
+ sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+ tab->join->sj_tmp_tables.push_back(sjm->table);
+
+ KEY *tmp_key; /* The only index on the temporary table. */
+ uint tmp_key_parts; /* Number of keyparts in tmp_key. */
+ tmp_key= sjm->table->key_info;
+ tmp_key_parts= tmp_key->key_parts;
+
+ /* Create/initialize execution related objects. */
+ /*
+ Create and initialize the JOIN_TAB that represents an index lookup
+ plan operator into the materialized subquery result. Notice that:
+ - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and
+ - here we initialize only those members that are used by
+ subselect_uniquesubquery_engine, so these objects are incomplete.
+ */
+ TABLE_REF *tab_ref;
+ if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF))))
+ DBUG_RETURN(TRUE);
+ tab_ref->key= 0; /* The only temp table index. */
+ tab_ref->key_length= tmp_key->key_length;
+ if (!(tab_ref->key_buff=
+ (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
+ !(tab_ref->key_copy=
+ (store_key**) thd->alloc((sizeof(store_key*) *
+ (tmp_key_parts + 1)))) ||
+ !(tab_ref->items=
+ (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts)))
+ DBUG_RETURN(TRUE);
+
+ tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length);
+ tab_ref->key_err=1;
+ tab_ref->null_rejecting= 1;
+ tab_ref->disable_cache= FALSE;
+
+ KEY_PART_INFO *cur_key_part= tmp_key->key_part;
+ store_key **ref_key= tab_ref->key_copy;
+ uchar *cur_ref_buff= tab_ref->key_buff;
+
+ //it.rewind();
+ for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
+ {
+ //tab_ref->items[i]= it++;
+ tab_ref->items[i]= emb_sj_nest->sj_subq_pred->left_expr->element_index(i);
+ int null_count= test(cur_key_part->field->real_maybe_null());
+ *ref_key= new store_key_item(thd, cur_key_part->field,
+ /* TODO:
+ the NULL byte is taken into account in
+ cur_key_part->store_length, so instead of
+ cur_ref_buff + test(maybe_null), we could
+ use that information instead.
+ */
+ cur_ref_buff + null_count,
+ null_count ? tab_ref->key_buff : 0,
+ cur_key_part->length, tab_ref->items[i]);
+ cur_ref_buff+= cur_key_part->store_length;
+ }
+ *ref_key= NULL; /* End marker. */
+ tab_ref->key_err= 1;
+ tab_ref->key_parts= tmp_key_parts;
+ sjm->tab_ref= tab_ref;
+ sjm->materialized= FALSE;
+
+ /* Remove the injected semi-join IN-equalities from join_tab conds */
+ for (i= 0; i < sjm->n_tables; i++)
+ {
+ remove_sj_conds(&tab[i].select_cond);
+ if (tab->select)
+ remove_sj_conds(&tab[i].select->cond);
+ }
+ if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm, emb_sj_nest->sj_subq_pred)))
+ DBUG_RETURN(TRUE);
+
+ DBUG_RETURN(FALSE);
+}
+
+
+/*
Plan refinement stage: do various set ups for the executioner
SYNOPSIS
@@ -8318,6 +9016,13 @@ make_join_readinfo(JOIN *join, ulonglong
(uchar*)join->thd->alloc(tab->insideout_key_len)))
return TRUE;
}
+ if (join->best_positions[i].use_sj_mat & SJ_MAT_FIRST)
+ {
+ /* This is a start of semi-join nest */
+ tab[-1].next_select= sub_select_sjm;
+ if (setup_sj_materialization(tab))
+ return TRUE;
+ }
switch (tab->type) {
case JT_SYSTEM: // Only happens with left join
table->status=STATUS_NO_RECORD;
@@ -8398,11 +9103,29 @@ make_join_readinfo(JOIN *join, ulonglong
using_join_cache= FALSE;
if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
tab->use_quick != 2 && !tab->first_inner && i <= no_jbuf_after &&
- !tab->insideout_match_tab)
+ !tab->insideout_match_tab &&
+ !(join->best_positions[i].use_sj_mat & SJ_MAT_FIRST))
{
- if ((options & SELECT_DESCRIBE) ||
- !join_init_cache(join->thd,join->join_tab+join->const_tables,
- i-join->const_tables))
+ JOIN_TAB *first_tab= join->join_tab+join->const_tables;
+ uint n_tables= i-join->const_tables;
+ /*
+ We normally put all join tables into the join buffer, except for
+ the join prefix.
+ If we're doing semi-join materialization, then we need to only
+ keep track of tables from within the semi-join nest.
+ */
+ if (join->best_positions[i].use_sj_mat)
+ {
+ first_tab= tab;
+ n_tables= 0;
+ while (!join->best_positions[n_tables].use_sj_mat & SJ_MAT_FIRST)
+ {
+ n_tables++;
+ first_tab--;
+ }
+ }
+ if ((options & SELECT_DESCRIBE) ||
+ !join_init_cache(join->thd, first_tab, n_tables))
{
using_join_cache= TRUE;
tab[-1].next_select=sub_select_cache; /* Patch previous */
@@ -9880,8 +10603,8 @@ static int compare_fields_by_table_order
- 0, otherwise.
*/
-static Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
- Item_equal *item_equal)
+Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
+ Item_equal *item_equal)
{
List<Item> eq_list;
Item_func_eq *eq_item= 0;
@@ -9920,12 +10643,65 @@ static Item *eliminate_item_equal(COND *
{
if (eq_item)
eq_list.push_back(eq_item);
- eq_item= new Item_func_eq(item_field, head);
+ /*
+ If item_field belongs to an SJM-nest, then we have this situation:
+
+ outer_tbl1 outer_tbl2 SJM( inner_tbl1 inner_tbl2 ) outer_tbl3
+
+ SJM nests are made from uncorrelated subqueries, and IN-equality
+ tables must be in the prefix (outer_tbl{1,2} on the pic), so this
+ function will never try to construct an equality like
+ inner_tblX.col = outer_tbl3.col.
+
+ We might try to construct an equality like
+
+ outer_tbl1.col = inner_tblX.col
+
+ though, and that is a problem because this equality should not be
+ created - if we attach it to inner_tbl1, it will get evaluated
+ during materialization and that will cause us to miss records.
+ */
+ TABLE_LIST *emb_nest=
+ item_field->field->table->pos_in_table_list->embedding;
+ if (!item_const && emb_nest && emb_nest->sj_mat_info &&
+ emb_nest->sj_mat_info->is_used)
+ {
+ /*
+ Find the first equal expression that refers to a table that is
+ within the semijoin nest. If we can't find it, do nothing
+ */
+ List_iterator<Item_field> fit(item_equal->fields);
+ Item_field *head_in_sjm;
+ bool found= FALSE;
+ while ((head_in_sjm= fit++))
+ {
+ if (head_in_sjm->used_tables() & emb_nest->sj_inner_tables)
+ {
+ if (head_in_sjm == item_field)
+ {
+ /* This is the first table inside the semi-join*/
+ eq_item= new Item_func_eq(item_field, head);
+ /* Tell make_cond_for_table don't use this. */
+ eq_item->marker=3;
+ }
+ else
+ {
+ eq_item= new Item_func_eq(item_field, head_in_sjm);
+ found= TRUE;
+ }
+ break;
+ }
+ }
+ if (!found)
+ continue;
+ }
+ else
+ eq_item= new Item_func_eq(item_field, head);
if (!eq_item)
return 0;
eq_item->set_cmp_func();
eq_item->quick_fix_field();
- }
+ }
}
if (!cond && !eq_list.head())
@@ -9966,6 +10742,7 @@ static Item *eliminate_item_equal(COND *
After this the function retrieves all other conjuncted
predicates substitute every field reference by the field reference
to the first equal field or equal constant if there are any.
+
@param cond condition to process
@param cond_equal multiple equalities to take into consideration
@param table_join_idx index to tables determining field preference
@@ -10408,7 +11185,7 @@ propagate_cond_constants(THD *thd, I_Lis
@param join_list list representation of the join to be converted
@param conds conditions to add on expressions for converted joins
@param top true <=> conds is the where condition
-
+ @param in_sj TRUE <=> processing semi-join nest's children
@return
- The new condition, if success
- 0, otherwise
@@ -10841,6 +11618,9 @@ static void restore_prev_nj_state(JOIN_T
}
+/*
+ Update join->cur_emb_sj_nests: add a new join tab
+*/
static
void advance_sj_state(const table_map remaining_tables, const JOIN_TAB *tab)
@@ -10857,6 +11637,7 @@ void advance_sj_state(const table_map re
/*
+ Remove the last join tab from from join->cur_emb_sj_nests bitmap
we assume remaining_tables doesnt contain @tab.
*/
@@ -13292,6 +14073,60 @@ do_select(JOIN *join,List<Item> *fields,
}
+/*
+ Semi-join materialization function
+*/
+
+enum_nested_loop_state
+sub_select_sjm(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
+{
+ int res;
+ enum_nested_loop_state rc;
+ if (end_of_records)
+ return (*join_tab->next_select)(join, join_tab + 1, end_of_records);
+
+ SJ_MATERIALIZE_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info;
+ if (!sjm->materialized)
+ {
+ /*
+ Do the materialization. First, run the join for the sjm nest (we've
+ got table dumper catching row combinations at the last semi-join-nest
+ table)
+ */
+ Next_select_func next_func= join_tab[sjm->n_tables - 1].next_select;
+ join_tab[sjm->n_tables - 1].next_select= end_sj_materialize;
+ if ((rc= sub_select(join, join_tab, FALSE)) < 0)
+ return rc; /* it's NESTED_LOOP_(ERROR|KILLED)*/
+
+ /*
+ Signal the EOF: in case there are tables using join buffering within
+ the materialization nest, this will be the signal to flush everything.
+ */
+ if ((rc= sub_select(join, join_tab, TRUE)) < 0)
+ return rc;
+
+ /*
+ This function's job is done. Call sub_select (this is what's used for
+ eq_ref access) instead of this function from now on.
+ */
+ /* Let it jump over the tables (TODO psergey: need this anymore?) */
+ join_tab[0].next_select= next_func;
+
+ join_tab->read_record.read_record= join_no_more_records;
+ sjm->materialized= TRUE;
+ }
+
+ if ((res= join_read_key2(join_tab, sjm->table, sjm->tab_ref)) == 1)
+ return NESTED_LOOP_ERROR;
+ if (res || !sjm->in_equality->val_int())
+ return NESTED_LOOP_NO_MORE_ROWS;
+
+ return (*join_tab->next_select)(join, join_tab + sjm->n_tables, FALSE);
+
+ return NESTED_LOOP_OK;
+}
+
+
enum_nested_loop_state
sub_select_cache(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
{
@@ -14125,6 +14960,8 @@ join_read_const(JOIN_TAB *tab)
static int
join_read_key(JOIN_TAB *tab)
{
+ return join_read_key2(tab, tab->table, &tab->ref);
+#if 0
int error;
TABLE *table= tab->table;
@@ -14134,7 +14971,7 @@ join_read_key(JOIN_TAB *tab)
}
/* TODO: Why don't we do "Late NULLs Filtering" here? */
- if (cmp_buffer_with_ref(tab) ||
+ if (cmp_buffer_with_ref(tab->join->thd, table, tab->ref) ||
(table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
{
if (tab->ref.key_err)
@@ -14151,9 +14988,43 @@ join_read_key(JOIN_TAB *tab)
}
table->null_row=0;
return table->status ? -1 : 0;
+#endif
}
+/*
+ ref access handler but genericized a bit to support TABLE and TABLE_REF
+ not from the join_tab
+*/
+static int
+join_read_key2(JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
+{
+ int error;
+ if (!table->file->inited)
+ {
+ table->file->ha_index_init(table_ref->key, tab->sorted);
+ }
+
+ /* TODO: Why don't we do "Late NULLs Filtering" here? */
+ if (cmp_buffer_with_ref(tab->join->thd, table, table_ref) ||
+ (table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
+ {
+ if (table_ref->key_err)
+ {
+ table->status=STATUS_NOT_FOUND;
+ return -1;
+ }
+ error=table->file->index_read_map(table->record[0],
+ table_ref->key_buff,
+ make_prev_keypart_map(table_ref->key_parts),
+ HA_READ_KEY_EXACT);
+ if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+ return report_error(table, error);
+ }
+ table->null_row=0;
+ return table->status ? -1 : 0;
+}
+
/*
ref access method implementation: "read_first" function
@@ -15011,11 +15882,11 @@ static bool test_if_ref(Item_field *left
@return <code>true</code> if there was an error, <code>false</code> if
successful.
*/
-static bool replace_where_subcondition(JOIN *join, TABLE_LIST *emb_nest,
+static bool replace_where_subcondition(JOIN *join, Item **expr,
Item *old_cond, Item *new_cond,
bool do_fix_fields)
{
- Item **expr= (emb_nest == (TABLE_LIST*)1)? &join->conds : &emb_nest->on_expr;
+ //Item **expr= (emb_nest == (TABLE_LIST*)1)? &join->conds : &emb_nest->on_expr;
if (*expr == old_cond)
{
*expr= new_cond;
@@ -16491,6 +17362,21 @@ SORT_FIELD *make_unireg_sortorder(ORDER
records
******************************************************************************/
+
+/*
+ Initialize the join buffering
+ join_init_cache()
+ thd Thread handle
+ tables First tab to put into join buffer
+ table_count Number of tables to put into the cache plus one.
+ That is, the table referred by tables[table_count]
+ will use the join buffering
+
+ RETURN
+ 0 OK
+ 1 Out of memory error
+*/
+
static int
join_init_cache(THD *thd,JOIN_TAB *tables,uint table_count)
{
@@ -16778,24 +17664,23 @@ read_cached_record(JOIN_TAB *tab)
*/
static bool
-cmp_buffer_with_ref(JOIN_TAB *tab)
+cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
{
bool no_prev_key;
- if (!tab->ref.disable_cache)
+ if (!tab_ref->disable_cache)
{
- if (!(no_prev_key= tab->ref.key_err))
+ if (!(no_prev_key= tab_ref->key_err))
{
/* Previous access found a row. Copy its key */
- memcpy(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length);
+ memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
}
}
else
no_prev_key= TRUE;
- if ((tab->ref.key_err= cp_buffer_from_ref(tab->join->thd, tab->table,
- &tab->ref)) ||
+ if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
no_prev_key)
return 1;
- return memcmp(tab->ref.key_buff2, tab->ref.key_buff, tab->ref.key_length)
+ return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
!= 0;
}
@@ -18687,8 +19572,8 @@ void select_describe(JOIN *join, bool ne
join->select_lex->select_number));
/* select_type */
item_list.push_back(new Item_string(join->select_lex->type,
- strlen(join->select_lex->type),
- cs));
+ strlen(join->select_lex->type),
+ cs));
if (tab->type == JT_ALL && tab->select && tab->select->quick)
{
quick_type= tab->select->quick->get_type();
@@ -19016,6 +19901,20 @@ void select_describe(JOIN *join, bool ne
else
extra.append(prev_table->pos_in_table_list->alias);
extra.append(STRING_WITH_LEN(")"));
+ }
+ else if (join->best_positions[i].use_sj_mat)
+ {
+ uint sjmat= join->best_positions[i].use_sj_mat;
+ if ((sjmat & (SJ_MAT_FIRST | SJ_MAT_LAST)) == (SJ_MAT_FIRST |
+ SJ_MAT_LAST))
+ extra.append(STRING_WITH_LEN("; Materialize"));
+ else
+ {
+ if (sjmat & SJ_MAT_FIRST)
+ extra.append(STRING_WITH_LEN("; Start materialize"));
+ if (sjmat & SJ_MAT_LAST)
+ extra.append(STRING_WITH_LEN("; End materialize"));
+ }
}
for (uint part= 0; part < tab->ref.key_parts; part++)
diff -Nrup a/sql/sql_select.h b/sql/sql_select.h
--- a/sql/sql_select.h 2008-06-13 18:11:29 +04:00
+++ b/sql/sql_select.h 2008-06-13 19:59:28 +04:00
@@ -303,7 +303,6 @@ typedef struct st_join_table {
/* A set of flags from the above enum */
int rowid_keep_flags;
-
/* NestedOuterJoins: Bitmap of nested joins this table is part of */
nested_join_map embedding_map;
@@ -324,6 +323,13 @@ enum_nested_loop_state end_send_group(JO
bool end_of_records);
enum_nested_loop_state end_write_group(JOIN *join, JOIN_TAB *join_tab,
bool end_of_records);
+enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab,
+ bool end_of_records);
+
+#define SJ_MAT_FIRST 1
+#define SJ_MAT_INNER 2
+#define SJ_MAT_LAST 4
+
/**
Information about a position of table within a join order. Used in join
@@ -364,6 +370,15 @@ typedef struct st_position
uint insideout_key;
/* Number of key parts to be used by insideout */
uint insideout_parts;
+
+ /*
+ 0 - not using semi-join materialization
+ sj_mat_* - using semi-join materialization, the value specifies whether
+ this is a first/last/just some inner tab.
+ */
+ uint use_sj_mat;
+
+ bool use_join_buffer;
} POSITION;
@@ -457,14 +472,21 @@ public:
- on each fetch iteration we add num_rows to fetch to fetch_limit
*/
ha_rows fetch_limit;
- POSITION positions[MAX_TABLES+1],best_positions[MAX_TABLES+1];
+ POSITION positions[MAX_TABLES+1], best_positions[MAX_TABLES+1];
- /* *
+ /*
Bitmap of nested joins embedding the position at the end of the current
partial join (valid only during join optimizer run).
*/
nested_join_map cur_embedding_map;
+ table_map cur_emb_sj_nests;
+ /*
+ pointer - we're doing optimization for a semi-join materialization nest.
+ NULL - otherwise
+ */
+ TABLE_LIST *emb_sjm_nest;
+
double best_read;
List<Item> *fields;
List<Cached_item> group_fields, group_fields_cache;
@@ -541,7 +563,7 @@ public:
TABLE_LIST *tables_list; ///<hold 'tables' parameter of mysql_select
List<TABLE_LIST> *join_list; ///< list of joined tables in reverse order
COND_EQUAL *cond_equal;
- SQL_SELECT *select; ///<created in optimisation phase
+ SQL_SELECT *select_; ///<created in optimisation phase
JOIN_TAB *return_tab; ///<used only for outer joins
Item **ref_pointer_array; ///<used pointer reference for this select
// Copy of above to be used with different lists
@@ -555,9 +577,8 @@ public:
Array<Item_in_subselect> sj_subselects;
/* Descriptions of temporary tables used to weed-out semi-join duplicates */
- SJ_TMP_TABLE *sj_tmp_tables;
-
- table_map cur_emb_sj_nests;
+ //SJ_TMP_TABLE *sj_tmp_tables;
+ List<TABLE> sj_tmp_tables;
/*
storage for caching buffers allocated during query execution.
@@ -616,7 +637,7 @@ public:
need_tmp= 0;
hidden_group_fields= 0; /*safety*/
error= 0;
- select= 0;
+ select_= 0;
return_tab= 0;
ref_pointer_array= items0= items1= items2= items3= 0;
ref_pointer_array_size= 0;
@@ -631,7 +652,7 @@ public:
tmp_table_param.init();
tmp_table_param.end_write_records= HA_POS_ERROR;
rollup.state= ROLLUP::STATE_NONE;
- sj_tmp_tables= NULL;
+ //sj_tmp_tables= NULL;
no_const_tables= FALSE;
}
diff -Nrup a/sql/table.h b/sql/table.h
--- a/sql/table.h 2008-05-21 14:04:24 +04:00
+++ b/sql/table.h 2008-06-13 19:59:28 +04:00
@@ -990,6 +990,11 @@ public:
};
+class SJ_MATERIALIZE_INFO;
+class Index_hint;
+class Item_in_subselect;
+
+
/*
Table reference in the FROM clause.
@@ -1024,7 +1029,6 @@ public:
;
*/
-class Index_hint;
struct TABLE_LIST
{
TABLE_LIST() {} /* Remove gcc warning */
@@ -1064,6 +1068,9 @@ struct TABLE_LIST
table_map sj_inner_tables;
/* Number of IN-compared expressions */
uint sj_in_exprs;
+ Item_in_subselect *sj_subq_pred;
+ SJ_MATERIALIZE_INFO *sj_mat_info;
+
/*
The structure of ON expression presented in the member above
can be changed during certain optimizations. This member
@@ -1409,6 +1416,10 @@ private:
ulong m_table_ref_version;
};
+struct st_position;
+
+class SJ_MATERIALIZE_INFO;
+
class Item;
/*
diff -Nrup a/support-files/build-tags b/support-files/build-tags
--- a/support-files/build-tags 2007-02-08 14:56:38 +03:00
+++ b/support-files/build-tags 2008-06-13 19:59:28 +04:00
@@ -2,7 +2,8 @@
rm -f TAGS
filter='\.cc$\|\.c$\|\.h$\|\.yy\|\.[ch]pp$'
-files=`bk -r sfiles -gU | grep $filter `
+#files=`bk -r sfiles -gU | grep $filter `
+files=`bzr ls --versioned | grep $filter `
for f in $files ;
do
etags -o TAGS --append $f
| Thread |
|---|
| • bk commit into 6.0 tree (sergefp:1.2651) WL#3980 | Sergey Petrunia | 13 Jun |