Hi Gluh,
The patch in itself makes sense and is hereby approved.
However, I think that you should file a new bug for longer-term cleanup of this
feature. As far as I can see, the information requested is still not aggregated
in the most appropriate place.
The basic problem here is that used table information from different select_lex
objects are not compatible and should not be aggregated into the same field.
I think that a correct fix would look something like this:
- Aggregate used tables information for selected expressions into the
select_lex object where the
tables are referenced in the from clause.
- If there is no such select_lex object (for single-table insert, update and
delete), aggregate the
information on the LEX object instead.
- This information should be sufficient for proper optimization of a UNION
query, such as:
SELECT t11.a, t11.b FROM t11 JOIN t12 USING(c1)
UNION
SELECT t21.a, t22.b FROM t21 JOIN t22 USING(c2);
In the current code, the information aggregated into lex->used_tables is [0,1].
This is correct for the second query of the UNION, but incorrect for the
first query.
Hence, the desired optimization is not carried out for the first query.
- The proposed change also means that used tables information is propagated
correctly also for
subqueries, but notice that the general subquery optimizations are more
useful than this optimization
in this case.
- For semijoin transformation, it is not necessary to propagate used tables
information
from subqueries into the parent level.
Thanks,
Roy
On 21.06.11 12.03, Sergey Glukhov wrote:
> #At file:///home/gluh/MySQL/mysql-5.1/ based on
> revid:sergey.glukhov@stripped
>
> 3649 Sergey Glukhov 2011-06-21
> Bug#11766594 59736: SELECT DISTINCT.. INCORRECT RESULT WITH DETERMINISTIC
> FUNCTION IN WHERE C
> There is an optimization of DISTINCT in JOIN::optimize()
> which depends on THD::used_tables value. Each SELECT statement
> inside SP resets used_tables value(see mysql_select()) and it
> leads to wrong result. The fix is to replace THD::used_tables
> with LEX::used_tables.
> @ mysql-test/r/sp.result
> test case
> @ mysql-test/t/sp.test
> test case
> @ sql/sql_base.cc
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_class.cc
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_class.h
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_insert.cc
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_lex.cc
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_lex.h
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_prepare.cc
> THD::used_tables is replaced with LEX::used_tables
> @ sql/sql_select.cc
> THD::used_tables is replaced with LEX::used_tables
>
> modified:
> mysql-test/r/sp.result
> mysql-test/t/sp.test
> sql/sql_base.cc
> sql/sql_class.cc
> sql/sql_class.h
> sql/sql_insert.cc
> sql/sql_lex.cc
> sql/sql_lex.h
> sql/sql_prepare.cc
> sql/sql_select.cc
> === modified file 'mysql-test/r/sp.result'
> --- a/mysql-test/r/sp.result 2010-02-09 10:30:50 +0000
> +++ b/mysql-test/r/sp.result 2011-06-21 10:03:04 +0000
> @@ -7053,6 +7053,25 @@ init_connect
> SET @@GLOBAL.init_connect= @old_init_connect;
> DROP PROCEDURE p2;
> DROP PROCEDURE p5;
> +#
> +# Bug#11766594 59736: SELECT DISTINCT.. INCORRECT RESULT WITH DETERMINISTIC
> FUNCTION IN WHERE C
> +#
> +CREATE TABLE t1 (a INT, b INT, KEY(b));
> +CREATE TABLE t2 (c INT, d INT, KEY(c));
> +INSERT INTO t1 VALUES (1,1),(1,1),(1,2);
> +INSERT INTO t2 VALUES (1,1),(1,2);
> +CREATE FUNCTION f1() RETURNS INT DETERMINISTIC
> +BEGIN
> +DECLARE a int;
> +-- SQL statement inside
> +SELECT 1 INTO a;
> +RETURN a;
> +END $
> +SELECT COUNT(DISTINCT d) FROM t1, t2 WHERE a = c AND b = f1();
> +COUNT(DISTINCT d)
> +2
> +DROP FUNCTION f1;
> +DROP TABLE t1, t2;
> # ------------------------------------------------------------------
> # -- End of 5.1 tests
> # ------------------------------------------------------------------
>
> === modified file 'mysql-test/t/sp.test'
> --- a/mysql-test/t/sp.test 2009-12-23 13:44:03 +0000
> +++ b/mysql-test/t/sp.test 2011-06-21 10:03:04 +0000
> @@ -8350,6 +8350,33 @@ SET @@GLOBAL.init_connect= @old_init_con
> DROP PROCEDURE p2;
> DROP PROCEDURE p5;
>
> +--echo #
> +--echo # Bug#11766594 59736: SELECT DISTINCT.. INCORRECT RESULT WITH DETERMINISTIC
> FUNCTION IN WHERE C
> +--echo #
> +
> +CREATE TABLE t1 (a INT, b INT, KEY(b));
> +CREATE TABLE t2 (c INT, d INT, KEY(c));
> +INSERT INTO t1 VALUES (1,1),(1,1),(1,2);
> +INSERT INTO t2 VALUES (1,1),(1,2);
> +
> +DELIMITER $;
> +
> +CREATE FUNCTION f1() RETURNS INT DETERMINISTIC
> +BEGIN
> + DECLARE a int;
> + -- SQL statement inside
> + SELECT 1 INTO a;
> + RETURN a;
> +END $
> +
> +DELIMITER ;$
> +
> +SELECT COUNT(DISTINCT d) FROM t1, t2 WHERE a = c AND b = f1();
> +
> +DROP FUNCTION f1;
> +DROP TABLE t1, t2;
> +
> +
> --echo # ------------------------------------------------------------------
> --echo # -- End of 5.1 tests
> --echo # ------------------------------------------------------------------
>
> === modified file 'sql/sql_base.cc'
> --- a/sql/sql_base.cc 2011-05-16 20:04:01 +0000
> +++ b/sql/sql_base.cc 2011-06-21 10:03:04 +0000
> @@ -7576,7 +7576,7 @@ bool setup_fields(THD *thd, Item **ref_p
> if (item->with_sum_func&& item->type() !=
> Item::SUM_FUNC_ITEM&&
> sum_func_list)
> item->split_sum_func(thd, ref_pointer_array, *sum_func_list);
> - thd->used_tables|= item->used_tables();
> + thd->lex->used_tables|= item->used_tables();
> thd->lex->current_select->cur_pos_in_select_list++;
> }
> thd->lex->current_select->is_item_list_lookup=
> save_is_item_list_lookup;
> @@ -7923,7 +7923,7 @@ insert_fields(THD *thd, Name_resolution_
> views and natural joins this update is performed inside the loop below.
> */
> if (table)
> - thd->used_tables|= table->map;
> + thd->lex->used_tables|= table->map;
>
> /*
> Initialize a generic field iterator for the current table reference.
> @@ -8008,7 +8008,7 @@ insert_fields(THD *thd, Name_resolution_
> field_table= nj_col->table_ref->table;
> if (field_table)
> {
> - thd->used_tables|= field_table->map;
> + thd->lex->used_tables|= field_table->map;
> field_table->covering_keys.intersect(field->part_of_key);
> field_table->merge_keys.merge(field->part_of_key);
> field_table->used_fields++;
> @@ -8016,7 +8016,7 @@ insert_fields(THD *thd, Name_resolution_
> }
> }
> else
> - thd->used_tables|= item->used_tables();
> + thd->lex->used_tables|= item->used_tables();
> thd->lex->current_select->cur_pos_in_select_list++;
> }
> /*
>
> === modified file 'sql/sql_class.cc'
> --- a/sql/sql_class.cc 2011-05-21 08:43:54 +0000
> +++ b/sql/sql_class.cc 2011-06-21 10:03:04 +0000
> @@ -647,7 +647,6 @@ THD::THD()
> is_slave_error= thread_specific_used= FALSE;
> hash_clear(&handler_tables_hash);
> tmp_table=0;
> - used_tables=0;
> cuted_fields= sent_row_count= row_count= 0L;
> limit_found_rows= 0;
> row_count_func= -1;
>
> === modified file 'sql/sql_class.h'
> --- a/sql/sql_class.h 2011-05-18 14:40:01 +0000
> +++ b/sql/sql_class.h 2011-06-21 10:03:04 +0000
> @@ -1732,13 +1732,6 @@ public:
> */
> ha_rows examined_row_count;
>
> - /*
> - The set of those tables whose fields are referenced in all subqueries
> - of the query.
> - TODO: possibly this it is incorrect to have used tables in THD because
> - with more than one subquery, it is not clear what does the field mean.
> - */
> - table_map used_tables;
> USER_CONN *user_connect;
> CHARSET_INFO *db_charset;
> /*
>
> === modified file 'sql/sql_insert.cc'
> --- a/sql/sql_insert.cc 2011-05-16 20:04:01 +0000
> +++ b/sql/sql_insert.cc 2011-06-21 10:03:04 +0000
> @@ -629,7 +629,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
> lock_type= table_list->lock_type;
>
> thd_proc_info(thd, "init");
> - thd->used_tables=0;
> + thd->lex->used_tables=0;
> values= its++;
> value_count= values->elements;
>
> @@ -777,7 +777,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *t
> }
> else
> {
> - if (thd->used_tables) // Column used in values()
> + if (thd->lex->used_tables) // Column used in values()
> restore_record(table,s->default_values); // Get empty record
> else
> {
>
> === modified file 'sql/sql_lex.cc'
> --- a/sql/sql_lex.cc 2011-02-22 21:03:32 +0000
> +++ b/sql/sql_lex.cc 2011-06-21 10:03:04 +0000
> @@ -358,6 +358,7 @@ void lex_start(THD *thd)
> lex->server_options.port= -1;
>
> lex->is_lex_started= TRUE;
> + lex->used_tables= 0;
> DBUG_VOID_RETURN;
> }
>
>
> === modified file 'sql/sql_lex.h'
> --- a/sql/sql_lex.h 2011-02-22 21:03:32 +0000
> +++ b/sql/sql_lex.h 2011-06-21 10:03:04 +0000
> @@ -1828,6 +1828,14 @@ typedef struct st_lex : public Query_tab
> uint create_select_pos;
> bool create_select_in_comment;
>
> + /*
> + The set of those tables whose fields are referenced in all subqueries
> + of the query.
> + TODO: possibly this it is incorrect to have used tables in THD because
> + with more than one subquery, it is not clear what does the field mean.
> + */
> + table_map used_tables;
> +
> st_lex();
>
> virtual ~st_lex()
>
> === modified file 'sql/sql_prepare.cc'
> --- a/sql/sql_prepare.cc 2011-05-16 20:04:01 +0000
> +++ b/sql/sql_prepare.cc 2011-06-21 10:03:04 +0000
> @@ -1382,7 +1382,7 @@ static int mysql_test_select(Prepared_st
> if (open_normal_and_derived_tables(thd, tables, 0))
> goto error;
>
> - thd->used_tables= 0; // Updated by setup_fields
> + thd->lex->used_tables= 0; // Updated by setup_fields
>
> /*
> JOIN::prepare calls
> @@ -1551,7 +1551,7 @@ static bool select_like_stmt_test(Prepar
> if (specific_prepare&& (*specific_prepare)(thd))
> DBUG_RETURN(TRUE);
>
> - thd->used_tables= 0; // Updated by setup_fields
> + thd->lex->used_tables= 0; // Updated by setup_fields
>
> /* Calls JOIN::prepare */
> DBUG_RETURN(lex->unit.prepare(thd, 0, setup_tables_done_option));
>
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2011-04-22 07:20:55 +0000
> +++ b/sql/sql_select.cc 2011-06-21 10:03:04 +0000
> @@ -406,7 +406,7 @@ fix_inner_refs(THD *thd, List<Item> &all
>
> if (!ref->fixed&& ref->fix_fields(thd, 0))
> return TRUE;
> - thd->used_tables|= item->used_tables();
> + thd->lex->used_tables|= item->used_tables();
> }
> return false;
> }
> @@ -1632,7 +1632,7 @@ JOIN::optimize()
>
> if (exec_tmp_table1->distinct)
> {
> - table_map used_tables= thd->used_tables;
> + table_map used_tables= thd->lex->used_tables;
> JOIN_TAB *last_join_tab= join_tab+tables-1;
> do
> {
> @@ -2526,7 +2526,7 @@ mysql_select(THD *thd, Item ***rref_poin
> if (!(join= new JOIN(thd, fields, select_options, result)))
> DBUG_RETURN(TRUE);
> thd_proc_info(thd, "init");
> - thd->used_tables=0; // Updated by setup_fields
> + thd->lex->used_tables=0; // Updated by
> setup_fields
> err= join->prepare(rref_pointer_array, tables, wild_num,
> conds, og_num, order, group, having, proc_param,
> select_lex, unit);
> @@ -16949,7 +16949,7 @@ static void select_describe(JOIN *join,
> need_order=0;
> extra.append(STRING_WITH_LEN("; Using filesort"));
> }
> - if (distinct& test_all_bits(used_tables,thd->used_tables))
> + if (distinct& test_all_bits(used_tables,thd->lex->used_tables))
> extra.append(STRING_WITH_LEN("; Distinct"));
>
> for (uint part= 0; part< tab->ref.key_parts; part++)
>
>
>
>