From: Date: March 26 2005 3:12pm Subject: bk commit into 4.1 tree (sergefp:1.2148) BUG#8877 List-Archive: http://lists.mysql.com/internals/23386 X-Bug: 8877 Message-Id: <20050326141210.88BDE2BFDA4@newbox.mylan> Below is the list of changes that have just been committed into a local 4.1 repository of psergey. When psergey does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2148 05/03/26 17:12:05 sergefp@stripped +4 -0 Fix for BUG#8877 'ref'-type join on '=' reads through all NULL values: * make make_join_select/make_cond_for_table() to detect "t2.key = t1.field" conditions that are used for ref access and add "t1.field IS NOT NULL" to condition on table t1. In addition, some related code was restructured and comments added. (this cset combines together all previous csets related to this bug) sql/table.cc 1.127 05/03/26 17:12:02 sergefp@stripped +0 -7 Don't set HA_PART_KEY_SEG flag for key parts that may be NULL. The check in sql_select.cc that was dependent on this was modified accordingly in this cset. sql/sql_select.h 1.73 05/03/26 17:12:02 sergefp@stripped +4 -1 Added comments sql/sql_select.cc 1.390 05/03/26 17:12:02 sergefp@stripped +229 -25 Fix for BUG#8877 'ref'-type join on '=' reads through all NULL values: * make make_join_select/make_cond_for_table() to detect "t2.key = t1.field" conditions that are used for ref access and add "t1.field IS NOT NULL" to condition on table t1. In addition, some related code was restructured and comments added. sql/field.h 1.125 05/03/26 17:12:01 sergefp@stripped +5 -1 Added comment about Field::field_length # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: sergefp # Host: newbox.mylan # Root: /home/psergey/mysql-4.1-nulls-scan-t4 --- 1.124/sql/field.h 2005-02-28 12:59:41 +03:00 +++ 1.125/sql/field.h 2005-03-26 17:12:01 +03:00 @@ -88,7 +88,11 @@ }; utype unireg_check; - uint32 field_length; // Length of field + /* + Length of table field, or length ok key tuple segment if this instance of + Field_* object is used pointed by KEY_PART_INFO::field. + */ + uint32 field_length; uint16 flags; uchar null_bit; // Bit used to test null bit --- 1.389/sql/sql_select.cc 2005-03-22 03:36:14 +03:00 +++ 1.390/sql/sql_select.cc 2005-03-26 17:12:02 +03:00 @@ -58,7 +58,7 @@ KEY_PART_INFO *key_part, char *key_buff, uint maybe_null); static bool make_simple_join(JOIN *join,TABLE *tmp_table); -static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item); +static int make_join_select(JOIN *join,SQL_SELECT *select,COND *item); static void make_join_readinfo(JOIN *join,uint options); static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables); static void update_depend_map(JOIN *join); @@ -114,7 +114,8 @@ static int join_read_always_key_or_null(JOIN_TAB *tab); static int join_read_next_same_or_null(READ_RECORD *info); static COND *make_cond_for_table(COND *cond,table_map table, - table_map used_table); + table_map used_table, + Item_field **notnull_item); static Item* part_of_refkey(TABLE *form,Field *field); static uint find_shortest_key(TABLE *table, const key_map *usable_keys); static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order, @@ -599,7 +600,14 @@ DBUG_PRINT("error",("Error: make_select() failed")); DBUG_RETURN(1); } - if (make_join_select(this, select, conds)) + int res= make_join_select(this, select, conds); + if (res == -1) + { + error= -1; + DBUG_PRINT("error",("Error: OOM in make_join_select()")); + DBUG_RETURN(1); + } + else if (res) { zero_result_cause= "Impossible WHERE noticed after reading const tables"; @@ -1395,9 +1403,10 @@ table_map used_tables= (curr_join->const_table_map | curr_table->table->map); + Item_field *dummy=NULL; Item* sort_table_cond= make_cond_for_table(curr_join->tmp_having, used_tables, - used_tables); + used_tables, &dummy); if (sort_table_cond) { if (!curr_table->select) @@ -1421,9 +1430,10 @@ curr_table->select_cond->top_level_item(); DBUG_EXECUTE("where",print_where(curr_table->select->cond, "select and having");); + Item_field *dummy= NULL; curr_join->tmp_having= make_cond_for_table(curr_join->tmp_having, ~ (table_map) 0, - ~used_tables); + ~used_tables, &dummy); DBUG_EXECUTE("where",print_where(conds,"having after sort");); } } @@ -3410,13 +3420,36 @@ } -static bool +inline void add_cond_and_fix(Item **e1, Item *e2) +{ + if (*e1) + { + Item *res; + if ((res= new Item_cond_and(*e1, e2))) + { + *e1= res; + res->quick_fix_field(); + } + } + else + *e1= e2; +} + + +/* + RETURN + 0 - Ok + 1 - Error + -1 - Out of memory error. +*/ +static int make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) { DBUG_ENTER("make_join_select"); if (select) { table_map used_tables; + Item_field *not_null_item; if (join->tables > 1) cond->update_used_tables(); // Tablenr may have changed if (join->const_tables == join->tables && @@ -3424,8 +3457,8 @@ &join->thd->lex->unit) // not upper level SELECT join->const_table_map|=RAND_TABLE_BIT; { // Check const tables - COND *const_cond= - make_cond_for_table(cond,join->const_table_map,(table_map) 0); + COND *const_cond= make_cond_for_table(cond,join->const_table_map, + (table_map) 0, ¬_null_item); DBUG_EXECUTE("where",print_where(const_cond,"constants");); if (const_cond && !const_cond->val_int()) { @@ -3461,7 +3494,24 @@ join->best_positions[i].records_read= rows2double(tab->quick->records); } - COND *tmp=make_cond_for_table(cond,used_tables,current_map); + not_null_item= NULL; + COND *tmp= make_cond_for_table(cond, used_tables, current_map, + ¬_null_item); + if (not_null_item && not_null_item != (Item_field*)1) + { + JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab; + Item_func_isnotnull *null_rej; + if (!(null_rej= new Item_func_isnotnull(not_null_item))) + return -1; + + null_rej->quick_fix_field(); + DBUG_EXECUTE("where",print_where(null_rej,"not-null");); + + add_cond_and_fix(&referred_tab->select_cond, null_rej); + if (referred_tab->select) + add_cond_and_fix(&referred_tab->select->cond, null_rej); + } + if (!tmp && tab->quick) { // Outer join /* @@ -3476,7 +3526,7 @@ SQL_SELECT *sel=tab->select=(SQL_SELECT*) join->thd->memdup((gptr) select, sizeof(SQL_SELECT)); if (!sel) - DBUG_RETURN(1); // End of memory + DBUG_RETURN(-1); // End of memory tab->select_cond=sel->cond=tmp; sel->head=tab->table; if (tab->quick) @@ -3586,7 +3636,7 @@ if ((tmp=make_cond_for_table(cond, join->const_table_map | current_map, - current_map))) + current_map, ¬_null_item))) { DBUG_EXECUTE("where",print_where(tmp,"cache");); tab->cache.select=(SQL_SELECT*) @@ -6805,20 +6855,84 @@ in sorted order. *****************************************************************************/ -/* Return 1 if right_item is used removable reference key on left_item */ +/* + Check if validity of condition "left_item OP right_item" (OP is '=' or + '<=>') is guaranteed by use of ref/eq_ref method on left_item's underlying + table. + + SYNOPSIS + test_if_ref() + left_item + right_item + eq_func true - the predicate is '=' + false - the predicate is '<=>' + notnull_item OUT: + *notnull_item = pointer to item that must be NOT NULL + *notnull_item = 1 + otherwise *notnull_item is not modified. + + NOTES + The join structures are assumed to have been set accordingly. + + The criteria is as follows: + * condition has form "t1.field1 = t2.field" or "t1.field1 = const_expr" + * condition is the "same" as one used for ref/eq_ref scan. + * No value truncation/conversion is performed when obtaining key value + for ref scan. + * It's impossible for left_item to have NULL value when evaluated in + the context of the WHERE clause: + - left_item->field is defined as NOT NULL, and + - left_item underlying table is not inner wrt some outer join + (right item can have NULL value, there will be no match generated + for it) -static bool test_if_ref(Item_field *left_item,Item *right_item) + RETURN + 1 - The ref/eq_ref access method is used for left_item's underlying table + and the condition validity is guaranteed for all rows returned by the + access method. + 0 - Otherwise +*/ + +static bool test_if_ref(Item_field *left_item, Item *right_item, + bool eq_func, Item_field **notnull_item) { Field *field=left_item->field; - // No need to change const test. We also have to keep tests on LEFT JOIN + bool field_maybe_null; + /* + No need to remove the condition if left_item refers to const table. + We also have to keep test in WHERE on LEFT JOIN + */ if (!field->table->const_table && !field->table->maybe_null) { - Item *ref_item=part_of_refkey(field->table,field); + field_maybe_null= field->real_maybe_null(); + /* value of ref_item will be used as field value in ref/eq_ref access */ + Item *ref_item= part_of_refkey(field->table, field); if (ref_item && ref_item->eq(right_item,1)) { if (right_item->type() == Item::FIELD_ITEM) + { + if (field_maybe_null) + { + if (eq_func) + { + /* + The condition is "tbl.key_maybe_null = ref_item", so + "ref_item IS NOT NULL" can be added to it. + */ + *notnull_item= (Item_field*)ref_item; + } + else + { + /* The condition is "tbl.key_maybe_null <=> ref_item" */ + *notnull_item= (Item_field*)1; + } + return 0; + } + else return (field->eq_def(((Item_field *) right_item)->field)); - if (right_item->const_item() && !(right_item->is_null())) + } + if (!eq_func && !field_maybe_null && + right_item->const_item() && !right_item->is_null()) { /* We can remove binary fields and numerical fields except float, @@ -6839,13 +6953,48 @@ } +/* + Extract the condition that can be checked when current record of + used_table becomes available, assuming condition for previous tables (in + "tables" bitmap) has already been extracted/checked. + + SYNOPSIS + cond WHERE condition + tables Assume current records are available for these tables, + condition on these table has been checked. + used_table "current" table(s) todo: this can be non-singleton! + notnull_item Store there: + - Item_field that refers to previous table and cannot be + NULL (ok to add it to condition on that previous table) + - "1" if the above Item_field cannot be produced (special + value used internally in AND/OR recursion) + - Don't store anything if neither of the above was detected + NOTES + The function uses COND::marker to store info between consecutive calls. + The condition extraction does the following: + * Do AND/OR recursion on expression tree + * Extract Items to check based on Item::used_tables() + * Eliminate '=' predicates that are guaranteed to be true by used + ref/eq_ref access method (note that this doesn't apply to 'func'). + * For ref/eq_ref via "t2.key = t1.field" detect the case when + "t1.field IS NOT NULL" can be added to WHERE condition and set + *notnull_item = "t1.field" when called for table t2. + + RETURN + cond Condition that depends on (tables|used_table). + NULL If no condition could be extracted. +*/ + static COND * -make_cond_for_table(COND *cond, table_map tables, table_map used_table) +make_cond_for_table(COND *cond, table_map tables, table_map used_table, + Item_field **notnull_item) { if (used_table && !(cond->used_tables() & used_table)) return (COND*) 0; // Already checked if (cond->type() == Item::COND_ITEM) { + Item_field *referred= NULL; + Item_field *ref_combined= NULL; if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) { /* Create new top level AND item */ @@ -6856,20 +7005,36 @@ Item *item; while ((item=li++)) { - Item *fix=make_cond_for_table(item,tables,used_table); + Item *fix=make_cond_for_table(item,tables,used_table, &referred); if (fix) new_cond->argument_list()->push_back(fix); + /* + Rules for notnull_item construction with AND: + {some Item} takes over {no item} takes over NULL. + e.g. for "t2.key = t1.field AND t2.key <=> t1.field" we get + *notnull_item = t1.field as result. + */ + if (referred) + { + if (referred != (Item_field*)1) + ref_combined= referred; + else + if (!ref_combined) + ref_combined=referred; + } } switch (new_cond->argument_list()->elements) { case 0: return (COND*) 0; // Always true case 1: + *notnull_item= ref_combined; return new_cond->argument_list()->head(); default: /* Item_cond_and do not need fix_fields for execution, its parameters are fixed or do not need fix_fields, too */ + *notnull_item= ref_combined; new_cond->quick_fix_field(); new_cond->used_tables_cache= ((Item_cond_and*) cond)->used_tables_cache & @@ -6886,13 +7051,28 @@ Item *item; while ((item=li++)) { - Item *fix=make_cond_for_table(item,tables,0L); + Item *fix=make_cond_for_table(item, tables, 0L, &referred); if (!fix) return (COND*) 0; // Always true new_cond->argument_list()->push_back(fix); + /* + Rules for notnull_item construction with OR: + {no item} takes over {some Item} takes over NULL. + e.g. for "t2.key = t1.field OR t2.key <=> t1.field" we get + *notnull_item= "no item" in result. + */ + if (referred) + { + if (referred == (Item_field*)1) + ref_combined= referred; + else + if (!ref_combined) + ref_combined= referred; + } } + *notnull_item= ref_combined; /* - Item_cond_and do not need fix_fields for execution, its parameters + Item_cond_or do not need fix_fields for execution, its parameters are fixed or do not need fix_fields, too */ new_cond->quick_fix_field(); @@ -6913,23 +7093,43 @@ if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK) return cond; // Not boolean op - if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC) + Item_func::Functype func_type = ((Item_func*) cond)->functype(); + + /* + Do two things at once: + 1) Check if cond is "t2.key=t1.somefield" where t2.key is used by + ref/eq_ref and eliminate cond if ref/eq_ref access guarantees its + validity. + 2) For the above, check if from "t2.key=t1.somefield" and the use of ref + access method we can infer that "t1.somefield IS NOT NULL". + */ + if (func_type == Item_func::EQ_FUNC || func_type == Item_func::EQUAL_FUNC) { Item *left_item= ((Item_func*) cond)->arguments()[0]; Item *right_item= ((Item_func*) cond)->arguments()[1]; + bool is_eq_func= (func_type == Item_func::EQ_FUNC); + if (left_item->type() == Item::FIELD_ITEM && - test_if_ref((Item_field*) left_item,right_item)) + test_if_ref((Item_field*) left_item, right_item, + is_eq_func, notnull_item)) + { + if (is_eq_func) { cond->marker=3; // Checked when read return (COND*) 0; } + } if (right_item->type() == Item::FIELD_ITEM && - test_if_ref((Item_field*) right_item,left_item)) + test_if_ref((Item_field*) right_item, left_item, + is_eq_func, notnull_item)) + { + if (is_eq_func) { cond->marker=3; // Checked when read return (COND*) 0; } } + } cond->marker=2; return cond; } @@ -6949,8 +7149,10 @@ for (uint part=0 ; part < ref_parts ; part++,key_part++) if (field->eq(key_part->field) && !(key_part->key_part_flag & HA_PART_KEY_SEG)) + { return table->reginfo.join_tab->ref.items[part]; } + } return (Item*) 0; } @@ -7423,12 +7625,14 @@ #ifdef NOT_YET static bool fix_having(JOIN *join, Item **having) { + Item *dummy; (*having)->update_used_tables(); // Some tables may have been const JOIN_TAB *table=&join->join_tab[join->const_tables]; table_map used_tables= join->const_table_map | table->table->map; DBUG_EXECUTE("where",print_where(*having,"having");); - Item* sort_table_cond=make_cond_for_table(*having,used_tables,used_tables); + Item* sort_table_cond=make_cond_for_table(*having,used_tables,used_tables, + &dummy); if (sort_table_cond) { if (!table->select) @@ -7446,7 +7650,7 @@ table->select_cond->top_level_item(); DBUG_EXECUTE("where",print_where(table->select_cond, "select and having");); - *having=make_cond_for_table(*having,~ (table_map) 0,~used_tables); + *having=make_cond_for_table(*having,~ (table_map) 0,~used_tables, &dummy); DBUG_EXECUTE("where",print_where(*having,"having after make_cond");); } return 0; --- 1.72/sql/sql_select.h 2005-03-16 09:46:11 +03:00 +++ 1.73/sql/sql_select.h 2005-03-26 17:12:02 +03:00 @@ -100,7 +100,9 @@ key_map keys; /* all keys with can be used */ ha_rows records,found_records,read_time; table_map dependent,key_dependent; - uint use_quick,index; + /* 1 - use quick select, 2 - do "range checked for each record" */ + uint use_quick; + uint index; uint status; // Save status for cache uint used_fields,used_fieldlength,used_blobs; enum join_type type; @@ -115,6 +117,7 @@ typedef struct st_position /* Used in find_best */ { + /* # records to read for each combination of rows from prev. tables */ double records_read; JOIN_TAB *table; KEYUSE *key; --- 1.126/sql/table.cc 2005-03-04 00:51:18 +03:00 +++ 1.127/sql/table.cc 2005-03-26 17:12:02 +03:00 @@ -664,13 +664,6 @@ field->field_length=key_part->length; } } - /* - If the field can be NULL, don't optimize away the test - key_part_column = expression from the WHERE clause - as we need to test for NULL = NULL. - */ - if (field->real_maybe_null()) - key_part->key_part_flag|= HA_PART_KEY_SEG; } else { // Error: shorten key