Below is the list of changes that have just been committed into a local
5.2 repository of timka. When timka does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2155 06/04/04 17:40:02 timour@stripped +3 -0
WL#2241: One-pass hash join.
Phase 0, step 0.1
Extended the optimizer with a simple procedure that chooses pipelined hash join
whenever it is applicable, given that the equi-join columns are indexed.
Modified EXPLAIN and the printing to the trace file to display the chosen
join algorithm.
Commented and formatted old code, and renamed some variables.
sql/sql_test.cc
1.46 06/04/04 17:39:59 timour@stripped +11 -0
- print_plan() now prints the join algorithm to the debug output.
sql/sql_select.h
1.108 06/04/04 17:39:59 timour@stripped +47 -4
- Added new member to JOIN_TAB - 'algorithm' to specify the chosen join algorithm.
- More comments.
- Changes to comply with coding conventions.
sql/sql_select.cc
1.393 06/04/04 17:39:59 timour@stripped +183 -91
- Added new member to JOIN_TAB - 'algorithm' to specify the chosen join algorithm.
- Added the entry point for join algorithm optimization and a simple implementation
that always chooses hash join if all equi-join fields are indexed.
- Modified EXPLAIN and print_plan() to print the chosen join method.
- More comments.
- Changes to comply with coding conventions.
- renamed the variable 's' in make_join_statistics to a more meaningful name 'pop'
(from Plan OPerator).
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: timour
# Host: lamia.home
# Root: /home/timka/mysql/src/5.2-2241
--- 1.392/sql/sql_select.cc 2006-04-02 18:49:29 +03:00
+++ 1.393/sql/sql_select.cc 2006-04-04 17:39:59 +03:00
@@ -35,6 +35,14 @@
"index_merge"
};
+/* Names of join algorithms. */
+const char *join_algorithm_str[]=
+{
+ "" /* Not a join */, "NLJ" /* Nested Loops Join */,
+ "HJP" /* Pipelined Hash Join */, "HJB" /* Blocking Hash Join */
+};
+
+
static void update_keyuse_statistics(JOIN *join, DYNAMIC_ARRAY *keyuse_array);
static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds,
DYNAMIC_ARRAY *keyuse);
@@ -211,6 +219,9 @@
bool distinct, const char *message=NullS);
static Item *remove_additional_cond(Item* conds);
static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
+/* Hash join (WL#2241) related prototypes. */
+static bool
+best_join_algorithm(POSITION *join_plan, uint plan_length, ulong join_mem);
/*
@@ -1959,7 +1970,7 @@
}
/*****************************************************************************
- Create JOIN_TABS, make a guess about the table types,
+ Create JOIN_TABs, make a guess about the table types,
Approximate how many records will be used in each table
*****************************************************************************/
@@ -2005,7 +2016,7 @@
table_map found_const_table_map, all_table_map, found_ref, refs;
key_map const_ref, eq_part;
TABLE **table_vector;
- JOIN_TAB *stat,*stat_end,*s,**stat_ref;
+ JOIN_TAB *stat,*stat_end,*pop,**stat_ref;
KEYUSE *keyuse,*start_keyuse;
table_map outer_join=0;
JOIN_TAB *stat_vector[MAX_TABLES+1];
@@ -2024,34 +2035,35 @@
found_const_table_map= all_table_map=0;
const_count=0;
- for (s= stat, i= 0;
+ for (pop= stat, i= 0;
tables;
- s++, tables= tables->next_leaf, i++)
+ pop++, tables= tables->next_leaf, i++)
{
TABLE_LIST *embedding= tables->embedding;
- stat_vector[i]=s;
- s->keys.init();
- s->const_keys.init();
- s->checked_keys.init();
- s->needed_reg.init();
- table_vector[i]=s->table=table=tables->table;
+ stat_vector[i]= pop;
+ pop->keys.init();
+ pop->const_keys.init();
+ pop->checked_keys.init();
+ pop->needed_reg.init();
+ table_vector[i]=pop->table=table=tables->table;
table->pos_in_table_list= tables;
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);// record count
table->quick_keys.clear_all();
- table->reginfo.join_tab=s;
+ table->reginfo.join_tab= pop;
table->reginfo.not_exists_optimize=0;
bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys);
all_table_map|= table->map;
- s->join=join;
- s->info=0; // For describe
+ pop->join=join;
+ pop->info=0; // For describe
+ pop->algorithm= JA_NLJ;
- s->dependent= tables->dep_tables;
- s->key_dependent= 0;
+ pop->dependent= tables->dep_tables;
+ pop->key_dependent= 0;
if (tables->schema_table)
table->file->records= 2;
- s->on_expr_ref= &tables->on_expr;
- if (*s->on_expr_ref)
+ pop->on_expr_ref= &tables->on_expr;
+ if (*pop->on_expr_ref)
{
/* s is the only inner table of an outer join */
#ifdef WITH_PARTITION_STORAGE_ENGINE
@@ -2060,25 +2072,25 @@
if (!table->file->records && !embedding)
#endif
{ // Empty table
- s->dependent= 0; // Ignore LEFT JOIN depend.
- set_position(join,const_count++,s,(KEYUSE*) 0);
+ pop->dependent= 0; // Ignore LEFT JOIN depend.
+ set_position(join, const_count++, pop,(KEYUSE*) 0);
continue;
}
outer_join|= table->map;
- s->embedding_map= 0;
+ pop->embedding_map= 0;
for (;embedding; embedding= embedding->embedding)
- s->embedding_map|= embedding->nested_join->nj_map;
+ pop->embedding_map|= embedding->nested_join->nj_map;
continue;
}
if (embedding)
{
/* s belongs to a nested join, maybe to several embedded joins */
- s->embedding_map= 0;
+ pop->embedding_map= 0;
do
{
NESTED_JOIN *nested_join= embedding->nested_join;
- s->embedding_map|=nested_join->nj_map;
- s->dependent|= embedding->dep_tables;
+ pop->embedding_map|=nested_join->nj_map;
+ pop->dependent|= embedding->dep_tables;
embedding= embedding->embedding;
outer_join|= nested_join->used_tables;
}
@@ -2092,11 +2104,11 @@
#endif
if ((table->s->system || table->file->records <= 1 ||
no_partitions_used) &&
- !s->dependent &&
+ !pop->dependent &&
!(table->file->table_flags() & HA_NOT_EXACT_COUNT) &&
!table->fulltext_searched)
{
- set_position(join,const_count++,s,(KEYUSE*) 0);
+ set_position(join, const_count++, pop, (KEYUSE*) 0);
}
}
stat_vector[i]=0;
@@ -2112,27 +2124,27 @@
As we use bitmaps to represent the relation the complexity
of the algorithm is O((number of tables)^2).
*/
- for (i= 0, s= stat ; i < table_count ; i++, s++)
+ for (i= 0, pop= stat ; i < table_count ; i++, pop++)
{
for (uint j= 0 ; j < table_count ; j++)
{
table= stat[j].table;
- if (s->dependent & table->map)
- s->dependent |= table->reginfo.join_tab->dependent;
+ if (pop->dependent & table->map)
+ pop->dependent |= table->reginfo.join_tab->dependent;
}
- if (s->dependent)
- s->table->maybe_null= 1;
+ if (pop->dependent)
+ pop->table->maybe_null= 1;
}
/* Catch illegal cross references for outer joins */
- for (i= 0, s= stat ; i < table_count ; i++, s++)
+ for (i= 0, pop= stat ; i < table_count ; i++, pop++)
{
- if (s->dependent & s->table->map)
+ if (pop->dependent & pop->table->map)
{
join->tables=0; // Don't use join->table
my_message(ER_WRONG_OUTER_JOIN, ER(ER_WRONG_OUTER_JOIN), MYF(0));
DBUG_RETURN(1);
}
- s->key_dependent= s->dependent;
+ pop->key_dependent= pop->dependent;
}
}
@@ -2150,16 +2162,16 @@
p_pos++)
{
int tmp;
- s= p_pos->table;
- s->type=JT_SYSTEM;
- join->const_table_map|=s->table->map;
- if ((tmp=join_read_const_table(s, p_pos)))
+ pop= p_pos->table;
+ pop->type=JT_SYSTEM;
+ join->const_table_map|=pop->table->map;
+ if ((tmp=join_read_const_table(pop, p_pos)))
{
if (tmp > 0)
DBUG_RETURN(1); // Fatal error
}
else
- found_const_table_map|= s->table->map;
+ found_const_table_map|= pop->table->map;
}
/* loop until no more const tables are found */
@@ -2174,23 +2186,23 @@
set_position() will move all const_tables first in stat_vector
*/
- for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
+ for (JOIN_TAB **pos=stat_vector+const_count ; (pop= *pos) ; pos++)
{
- table=s->table;
- if (s->dependent) // If dependent on some table
+ table=pop->table;
+ if (pop->dependent) // If dependent on some table
{
// All dep. must be constants
- if (s->dependent & ~(found_const_table_map))
+ if (pop->dependent & ~(found_const_table_map))
continue;
if (table->file->records <= 1L &&
!(table->file->table_flags() & HA_NOT_EXACT_COUNT) &&
!table->pos_in_table_list->embedding)
{ // system table
int tmp= 0;
- s->type=JT_SYSTEM;
+ pop->type=JT_SYSTEM;
join->const_table_map|=table->map;
- set_position(join,const_count++,s,(KEYUSE*) 0);
- if ((tmp= join_read_const_table(s,join->positions+const_count-1)))
+ set_position(join, const_count++, pop, (KEYUSE*) 0);
+ if ((tmp= join_read_const_table(pop, join->positions+const_count-1)))
{
if (tmp > 0)
DBUG_RETURN(1); // Fatal error
@@ -2201,14 +2213,14 @@
}
}
/* check if table can be read by key or table only uses const refs */
- if ((keyuse=s->keyuse))
+ if ((keyuse=pop->keyuse))
{
- s->type= JT_REF;
+ pop->type= JT_REF;
while (keyuse->table == table)
{
start_keyuse=keyuse;
key=keyuse->key;
- s->keys.set_bit(key); // QQ: remove this ?
+ pop->keys.set_bit(key); // QQ: remove this ?
refs=0;
const_ref.clear_all();
@@ -2236,13 +2248,13 @@
{ // Found everything for ref.
int tmp;
ref_changed = 1;
- s->type= JT_CONST;
+ pop->type= JT_CONST;
join->const_table_map|=table->map;
- set_position(join,const_count++,s,start_keyuse);
- if (create_ref_for_key(join, s, start_keyuse,
+ set_position(join, const_count++, pop, start_keyuse);
+ if (create_ref_for_key(join, pop, start_keyuse,
found_const_table_map))
DBUG_RETURN(1);
- if ((tmp=join_read_const_table(s,
+ if ((tmp=join_read_const_table(pop,
join->positions+const_count-1)))
{
if (tmp > 0)
@@ -2262,51 +2274,51 @@
/* Calc how many (possible) matched records in each table */
- for (s=stat ; s < stat_end ; s++)
+ for (pop= stat ; pop < stat_end ; pop++)
{
- if (s->type == JT_SYSTEM || s->type == JT_CONST)
+ if (pop->type == JT_SYSTEM || pop->type == JT_CONST)
{
/* Only one matching row */
- s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0;
+ pop->found_records=pop->records=pop->read_time=1; pop->worst_seeks=1.0;
continue;
}
/* Approximate found rows and time to read them */
- s->found_records=s->records=s->table->file->records;
- s->read_time=(ha_rows) s->table->file->scan_time();
+ pop->found_records=pop->records=pop->table->file->records;
+ pop->read_time=(ha_rows) pop->table->file->scan_time();
/*
Set a max range of how many seeks we can expect when using keys
This is can't be to high as otherwise we are likely to use
table scan.
*/
- s->worst_seeks= min((double) s->found_records / 10,
- (double) s->read_time*3);
- if (s->worst_seeks < 2.0) // Fix for small tables
- s->worst_seeks=2.0;
+ pop->worst_seeks= min((double) pop->found_records / 10,
+ (double) pop->read_time*3);
+ if (pop->worst_seeks < 2.0) // Fix for small tables
+ pop->worst_seeks=2.0;
/*
Add to stat->const_keys those indexes for which all group fields or
all select distinct fields participate in one index.
*/
- add_group_and_distinct_keys(join, s);
+ add_group_and_distinct_keys(join, pop);
- if (!s->const_keys.is_clear_all() &&
- !s->table->pos_in_table_list->embedding)
+ if (!pop->const_keys.is_clear_all() &&
+ !pop->table->pos_in_table_list->embedding)
{
ha_rows records;
SQL_SELECT *select;
- select= make_select(s->table, found_const_table_map,
+ select= make_select(pop->table, found_const_table_map,
found_const_table_map,
- *s->on_expr_ref ? *s->on_expr_ref : conds,
+ *pop->on_expr_ref ? *pop->on_expr_ref : conds,
1, &error);
if (!select)
DBUG_RETURN(1);
- records= get_quick_record_count(join->thd, select, s->table,
- &s->const_keys, join->row_limit);
- s->quick=select->quick;
- s->needed_reg=select->needed_reg;
+ records= get_quick_record_count(join->thd, select, pop->table,
+ &pop->const_keys, join->row_limit);
+ pop->quick=select->quick;
+ pop->needed_reg=select->needed_reg;
select->quick=0;
- if (records == 0 && s->table->reginfo.impossible_range)
+ if (records == 0 && pop->table->reginfo.impossible_range)
{
/*
Impossible WHERE or ON expression
@@ -2314,32 +2326,32 @@
In case of WHERE, don't set found_const_table_map to get the
caller to abort with a zero row result.
*/
- join->const_table_map|= s->table->map;
- set_position(join,const_count++,s,(KEYUSE*) 0);
- s->type= JT_CONST;
- if (*s->on_expr_ref)
+ join->const_table_map|= pop->table->map;
+ set_position(join, const_count++, pop, (KEYUSE*) 0);
+ pop->type= JT_CONST;
+ if (*pop->on_expr_ref)
{
/* Generate empty row */
- s->info= "Impossible ON condition";
- found_const_table_map|= s->table->map;
- s->type= JT_CONST;
- mark_as_null_row(s->table); // All fields are NULL
+ pop->info= "Impossible ON condition";
+ found_const_table_map|= pop->table->map;
+ pop->type= JT_CONST;
+ mark_as_null_row(pop->table); // All fields are NULL
}
}
if (records != HA_POS_ERROR)
{
- s->found_records=records;
- s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0);
+ pop->found_records=records;
+ pop->read_time= (ha_rows) (pop->quick ? pop->quick->read_time : 0.0);
}
delete select;
}
}
- join->join_tab=stat;
- join->map2table=stat_ref;
+ join->join_tab= stat;
+ join->map2table= stat_ref;
join->table= join->all_tables=table_vector;
- join->const_tables=const_count;
- join->found_const_table_map=found_const_table_map;
+ join->const_tables= const_count;
+ join->found_const_table_map= found_const_table_map;
/* Find an optimal join order of the non-constant tables. */
if (join->const_tables != join->tables)
@@ -4352,6 +4364,7 @@
DBUG_EXECUTE("opt", print_plan(join, current_read_time,
current_record_count, idx,
"full_plan"););
+ best_join_algorithm(join->positions, idx, 0);
}
restore_prev_nj_state(s);
}
@@ -4862,7 +4875,7 @@
THD *thd=join->thd;
DBUG_ENTER("get_best_combination");
- table_count=join->tables;
+ table_count= join->tables;
if (!(join->join_tab=join_tab=
(JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)*table_count)))
DBUG_RETURN(TRUE);
@@ -4879,6 +4892,8 @@
form->reginfo.join_tab=j;
if (!*j->on_expr_ref)
form->reginfo.not_exists_optimize=0; // Only with LEFT JOIN
+ if (tablenr == 0)
+ j->algorithm= JA_NONE;
DBUG_PRINT("info",("type: %d", j->type));
if (j->type == JT_CONST)
continue; // Handled in make_join_stat..
@@ -5107,7 +5122,7 @@
static bool
-make_simple_join(JOIN *join,TABLE *tmp_table)
+make_simple_join(JOIN *join, TABLE *tmp_table)
{
TABLE **tableptr;
JOIN_TAB *join_tab;
@@ -5136,6 +5151,7 @@
join_tab->select_cond=0;
join_tab->quick=0;
join_tab->type= JT_ALL; /* Map through all records */
+ join_tab->algorithm= JA_NONE; /* This is not a join */
join_tab->keys.init();
join_tab->keys.set_all(); /* test everything in quick */
join_tab->info=0;
@@ -13942,6 +13958,74 @@
}
}
+
+/****************************************************************************
+ Single-pass hash join (WL#2241)
+****************************************************************************/
+
+static bool
+choose_hash_join_when_possible(POSITION *join_plan, uint plan_length);
+
+
+/*
+ Assign optimal join algorithms to the joins in a complete plan.
+
+ SYNOPSIS
+
+
+ DESCRIPTION
+
+ RETURN
+*/
+
+static bool
+best_join_algorithm(POSITION *join_plan, uint plan_length, ulong join_mem)
+{
+ return choose_hash_join_when_possible(join_plan, plan_length);
+}
+
+
+/*
+ Choose pipeplined hash join as the join algorithm whenever it is
+ applicable.
+
+ SYNOPSIS
+ assign_hash_join_when_possible
+ join_plan
+ plan_lenght
+
+ DESCRIPTION
+ TODO
+
+ RETURN
+ TODO
+*/
+
+static bool
+choose_hash_join_when_possible(POSITION *join_plan, uint plan_length)
+{
+ POSITION *cur_pop_position= join_plan;
+ POSITION *last_pop_position= cur_pop_position + plan_length;
+ JOIN_TAB *cur_pop;
+
+ /*
+ For each Plan OPerator set the join algorithm to pipelined hash
+ join whenever hash join is applicable.
+ */
+ for (; cur_pop_position <= last_pop_position; cur_pop_position++)
+ {
+ cur_pop= cur_pop_position->table;
+ /* Check if cur_pop contains equi-join conditions. */
+ if (cur_pop->keyuse == NULL)
+ continue;
+
+ /* Set the join algorithm to pipelined hash join. */
+ cur_pop->algorithm= JA_HJP;
+ }
+ return TRUE;
+}
+
+
/****************************************************************************
EXPLAIN handling
@@ -14055,15 +14139,18 @@
char buff[512];
char buff1[512], buff2[512], buff3[512];
char keylen_str_buf[64];
+ char type_buff[32];
String extra(buff, sizeof(buff),cs);
char table_name_buffer[NAME_LEN];
String tmp1(buff1,sizeof(buff1),cs);
String tmp2(buff2,sizeof(buff2),cs);
String tmp3(buff3,sizeof(buff3),cs);
+ String type_str(type_buff,sizeof(type_buff),cs);
extra.length(0);
tmp1.length(0);
tmp2.length(0);
tmp3.length(0);
+ type_str.length(0);
quick_type= -1;
item_list.empty();
@@ -14119,9 +14206,14 @@
#endif
}
/* "type" column */
- item_list.push_back(new Item_string(join_type_str[tab->type],
- strlen(join_type_str[tab->type]),
- cs));
+ type_str.append(join_type_str[tab->type]);
+ if (i > 0)
+ {
+ type_str.append(STRING_WITH_LEN(", "));
+ type_str.append(join_algorithm_str[tab->algorithm]);
+ }
+ item_list.push_back(new Item_string(type_str.ptr(), type_str.length(),
+ cs));
/* Build "possible_keys" value and add it to item_list */
if (!tab->keys.is_clear_all())
{
--- 1.107/sql/sql_select.h 2006-04-02 18:49:30 +03:00
+++ 1.108/sql/sql_select.h 2006-04-04 17:39:59 +03:00
@@ -105,11 +105,25 @@
/*
-** The structs which holds the join connections and join states
+ Strucutres that represent Plan OPerations (POPs), partial and complete query
+ plans used during query optimization and execution.
*/
-enum join_type { JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF,
- JT_ALL, JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL,
- JT_UNIQUE_SUBQUERY, JT_INDEX_SUBQUERY, JT_INDEX_MERGE};
+
+enum join_type
+{
+ JT_UNKNOWN,JT_SYSTEM,JT_CONST,JT_EQ_REF,JT_REF,JT_MAYBE_REF, JT_ALL,
+ JT_RANGE, JT_NEXT, JT_FT, JT_REF_OR_NULL, JT_UNIQUE_SUBQUERY,
+ JT_INDEX_SUBQUERY, JT_INDEX_MERGE
+};
+
+/* Types of join algorithms. */
+enum join_algorithm
+{
+ JA_NONE /* Not a join */,
+ JA_NLJ /* Nested Loops Join */,
+ JA_HJP /* Pipelined Hash Join */,
+ JA_HJB /* Blocking Hash Join */
+};
class JOIN;
@@ -125,6 +139,18 @@
typedef int (*Read_record_func)(struct st_join_table *tab);
Next_select_func setup_end_select_func(JOIN *join);
+/*
+ Query Plan OPeration (POP).
+
+ TODO:
+ Add detailed description once agreed on the terminology in WL#2241.
+
+ A POP specifies two things:
+ - the table access method to retrieve data from 'table',
+ - the join method to join 'table' with the (partial) result
+ from the previous POP in the join plan.
+*/
+
typedef struct st_join_table {
st_join_table() {} /* Remove gcc warning */
TABLE *table;
@@ -154,7 +180,18 @@
uint use_quick,index;
uint status; // Save status for cache
uint used_fields,used_fieldlength,used_blobs;
+ /*
+ Access method to data in 'table'. In some cases also describes a
+ variation of the join algorithm (e.g. index-nested loops join if
+ JT_EQ_REF).
+ */
enum join_type type;
+ /*
+ Join algorithm between this and the previous plan operator. Meaningful
+ only for join queries when this plan operator is at position > 0
+ in the join plan.
+ */
+ enum join_algorithm algorithm;
bool cached_eq_ref_table,eq_ref_table,not_used_in_distinct;
bool sorted;
TABLE_REF ref;
@@ -203,6 +240,10 @@
} ROLLUP;
+/*
+ Query execution plan (QEP).
+*/
+
class JOIN :public Sql_alloc
{
JOIN(const JOIN &rhs); /* not implemented */
@@ -435,6 +476,8 @@
} SELECT_CHECK;
extern const char *join_type_str[];
+extern const char *join_algorithm_str[];
+
void TEST_join(JOIN *join);
/* Extern functions in sql_select.cc */
--- 1.45/sql/sql_test.cc 2006-02-02 15:48:06 +02:00
+++ 1.46/sql/sql_test.cc 2006-04-04 17:39:59 +03:00
@@ -288,6 +288,17 @@
fputs(" BEST_REF: ", DBUG_FILE);
for (plan_nodes= join->best_ref ; *plan_nodes ; plan_nodes++)
{
+ /*
+ The join algorithm is stored with the right join operand, so we
+ skip the first table, and add the join algorithm just before each
+ right join table.
+ */
+ if (plan_nodes > join->best_ref)
+ {
+ fputs(join_algorithm_str[join_table->algorithm], DBUG_FILE);
+ fputc(' ', DBUG_FILE);
+ }
+
join_table= (*plan_nodes);
fputs(join_table->table->s->table_name.str, DBUG_FILE);
fprintf(DBUG_FILE, "(%lu,%lu,%lu)",
| Thread |
|---|
| • bk commit into 5.2 tree (timour:1.2155) | timour | 4 Apr |