Hi Martin,
I general the fix looks good, but I have few notes. Please, see them inline.
BTW, have you checked this fix with valgrind to make absolutely sure that
nothing leaks?
Regards, Evgen.
On 05/19/10 18:18, Martin Hansson wrote:
> #At file:///data0/martin/bzr/bug41660/5.1bt/ based on
> revid:mattias.jonsson@stripped
>
> 3378 Martin Hansson 2010-05-19
> Bug#41660: Sort-index_merge for non-first join table may
> require O(#scans) memory
>
> When an index merge operation was restarted, it would
> re-allocate the Unique object controlling the duplicate row
> ID elimination. Fixed by making the Unique object a member
> of QUICK_INDEX_MERGE_SELECT and thus reusing it throughout
> the lifetime of this object.
>
> modified:
> mysql-test/r/error_simulation.result
> mysql-test/t/error_simulation.test
> sql/opt_range.cc
> sql/opt_range.h
> sql/sql_class.h
> === modified file 'mysql-test/r/error_simulation.result'
> --- a/mysql-test/r/error_simulation.result 2010-04-25 11:06:40 +0000
> +++ b/mysql-test/r/error_simulation.result 2010-05-19 14:18:09 +0000
> @@ -39,5 +39,40 @@ a
> 2
> DROP TABLE t1;
> #
> +# Bug#41660: Sort-index_merge for non-first join table may require
> +# O(#scans) memory
> +#
> +CREATE TABLE t1 (a INT);
> +INSERT INTO t1 VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
> +CREATE TABLE t2 (a INT, b INT, filler CHAR(100), KEY(a), KEY(b));
> +INSERT INTO t2 SELECT 1000, 1000, 'filler' FROM t1 A, t1 B, t1 C;
> +INSERT INTO t2 VALUES (1, 1, 'data');
> +# the example query uses LEFT JOIN only for the sake of being able to
> +# demonstrate the issue with a very small dataset. (left outer join
> +# disables the use of join buffering, so we get the second table
> +# re-scanned for every record in the outer table. if we used inner join,
> +# we would need to have thousands of records and/or more columns in both
> +# tables so that the join buffer is filled and re-scans are triggered).
> +SET SESSION debug = '+d,only_one_Unique_may_be_created';
> +EXPLAIN
> +SELECT * FROM t1 LEFT JOIN t2 ON ( t2.a< 10 OR t2.b< 10 );
> +id select_type table type possible_keys key key_len ref rows Extra
> +x x x x x x x x x
> +x x x x x x x x x Using sort_union(a,b); Using where
> +SELECT * FROM t1 LEFT JOIN t2 ON ( t2.a< 10 OR t2.b< 10 );
> +a a b filler
> +0 1 1 data
> +1 1 1 data
> +2 1 1 data
> +3 1 1 data
> +4 1 1 data
> +5 1 1 data
> +6 1 1 data
> +7 1 1 data
> +8 1 1 data
> +9 1 1 data
> +SET SESSION debug = DEFAULT;
> +DROP TABLE t1, t2;
> +#
> # End of 5.1 tests
> #
>
> === modified file 'mysql-test/t/error_simulation.test'
> --- a/mysql-test/t/error_simulation.test 2010-04-25 11:06:40 +0000
> +++ b/mysql-test/t/error_simulation.test 2010-05-19 14:18:09 +0000
> @@ -45,6 +45,35 @@ SHOW CREATE TABLE t1;
> SELECT * FROM t1;
> DROP TABLE t1;
>
> +-- echo #
> +-- echo # Bug#41660: Sort-index_merge for non-first join table may require
> +-- echo # O(#scans) memory
> +-- echo #
> +
> +CREATE TABLE t1 (a INT);
> +INSERT INTO t1 VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
> +
> +CREATE TABLE t2 (a INT, b INT, filler CHAR(100), KEY(a), KEY(b));
> +INSERT INTO t2 SELECT 1000, 1000, 'filler' FROM t1 A, t1 B, t1 C;
> +INSERT INTO t2 VALUES (1, 1, 'data');
> +
> +--echo # the example query uses LEFT JOIN only for the sake of being able to
> +--echo # demonstrate the issue with a very small dataset. (left outer join
> +--echo # disables the use of join buffering, so we get the second table
> +--echo # re-scanned for every record in the outer table. if we used inner join,
> +--echo # we would need to have thousands of records and/or more columns in both
> +--echo # tables so that the join buffer is filled and re-scans are triggered).
> +
> +SET SESSION debug = '+d,only_one_Unique_may_be_created';
> +
> +--replace_column 1 x 2 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x
> +EXPLAIN
> +SELECT * FROM t1 LEFT JOIN t2 ON ( t2.a< 10 OR t2.b< 10 );
> +SELECT * FROM t1 LEFT JOIN t2 ON ( t2.a< 10 OR t2.b< 10 );
> +
> +SET SESSION debug = DEFAULT;
> +
> +DROP TABLE t1, t2;
>
> --echo #
> --echo # End of 5.1 tests
>
> === modified file 'sql/opt_range.cc'
> --- a/sql/opt_range.cc 2010-05-10 07:23:23 +0000
> +++ b/sql/opt_range.cc 2010-05-19 14:18:09 +0000
> @@ -1194,7 +1194,7 @@ QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT(
>
> QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
> TABLE *table)
> - :pk_quick_select(NULL), thd(thd_param)
> + :unique(NULL), pk_quick_select(NULL), thd(thd_param)
> {
> DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
> index= MAX_KEY;
> @@ -1236,6 +1236,7 @@ QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_M
> List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
> QUICK_RANGE_SELECT* quick;
> DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
> + delete unique;
> quick_it.rewind();
> while ((quick= quick_it++))
> quick->file= NULL;
> @@ -8153,7 +8154,6 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_
> List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
> QUICK_RANGE_SELECT* cur_quick;
> int result;
> - Unique *unique;
> handler *file= head->file;
> DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::read_keys_and_merge");
>
> @@ -8172,9 +8172,19 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_
> if (cur_quick->init() || cur_quick->reset())
> DBUG_RETURN(1);
>
> - unique= new Unique(refpos_order_cmp, (void *)file,
> - file->ref_length,
> - thd->variables.sortbuff_size);
> + if (unique == NULL)
> + {
> + DBUG_EXECUTE_IF("crashme", abort(); );
"crashme" doesn't sound like really unique. Probably it's worth renaming it to
"crashme41660"?
> + DBUG_EXECUTE_IF("only_one_Unique_may_be_created", DBUG_SET("+d,crashme"); );
> +
> + unique= new Unique(refpos_order_cmp, (void *)file,
> + file->ref_length,
> + thd->variables.sortbuff_size);
> + }
My guess that here should be
else
unique->reset();
> +
> + DBUG_ASSERT(file->ref_length == unique->get_size());
> + DBUG_ASSERT(thd->variables.sortbuff_size ==
> unique->get_max_in_memory_size());
> +
> if (!unique)
> DBUG_RETURN(1);
> for (;;)
> @@ -8189,10 +8199,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_
> if (cur_quick->file->inited != handler::NONE)
> cur_quick->file->ha_index_end();
> if (cur_quick->init() || cur_quick->reset())
> - {
> - delete unique;
> DBUG_RETURN(1);
> - }
> }
>
> if (result)
> @@ -8200,17 +8207,13 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_
> if (result != HA_ERR_END_OF_FILE)
> {
> cur_quick->range_end();
> - delete unique;
> DBUG_RETURN(result);
> }
> break;
> }
>
> if (thd->killed)
> - {
> - delete unique;
> DBUG_RETURN(1);
> - }
>
> /* skip row if it will be retrieved by clustered PK scan */
> if (pk_quick_select&& pk_quick_select->row_in_ranges())
> @@ -8219,10 +8222,7 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_
> cur_quick->file->position(cur_quick->record);
> result= unique->unique_add((char*)cur_quick->file->ref);
> if (result)
> - {
> - delete unique;
> DBUG_RETURN(1);
> - }
> }
>
> /*
> @@ -8231,7 +8231,6 @@ int QUICK_INDEX_MERGE_SELECT::read_keys_
> sequence.
> */
> result= unique->get(head);
> - delete unique;
> doing_pk_scan= FALSE;
> /* index_merge currently doesn't support "using index" at all */
> head->set_keyread(FALSE);
> @@ -10276,7 +10275,7 @@ QUICK_GROUP_MIN_MAX_SELECT(TABLE *table,
> uint use_index, double read_cost_arg,
> ha_rows records_arg, uint key_infix_len_arg,
> uchar *key_infix_arg, MEM_ROOT *parent_alloc)
> - :join(join_arg), index_info(index_info_arg),
> + :file(table->file), join(join_arg), index_info(index_info_arg),
> group_prefix_len(group_prefix_len_arg),
> group_key_parts(group_key_parts_arg), have_min(have_min_arg),
> have_max(have_max_arg), seen_first_key(FALSE),
> @@ -10285,7 +10284,6 @@ QUICK_GROUP_MIN_MAX_SELECT(TABLE *table,
> max_functions_it(NULL)
> {
> head= table;
> - file= head->file;
> index= use_index;
> record= head->record[0];
> tmp_record= head->record[1];
>
> === modified file 'sql/opt_range.h'
> --- a/sql/opt_range.h 2010-05-10 07:23:23 +0000
> +++ b/sql/opt_range.h 2010-05-19 14:18:09 +0000
> @@ -500,6 +500,7 @@ public:
>
> class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
> {
> + Unique *unique;
> public:
> QUICK_INDEX_MERGE_SELECT(THD *thd, TABLE *table);
> ~QUICK_INDEX_MERGE_SELECT();
> @@ -684,7 +685,7 @@ private:
> class QUICK_GROUP_MIN_MAX_SELECT : public QUICK_SELECT_I
> {
> private:
> - handler *file; /* The handler used to get data. */
> + handler * const file; /* The handler used to get data. */
> JOIN *join; /* Descriptor of the current query */
> KEY *index_info; /* The index chosen for data access */
> uchar *record; /* Buffer where the next record is returned. */
>
> === modified file 'sql/sql_class.h'
> --- a/sql/sql_class.h 2010-04-14 09:53:59 +0000
> +++ b/sql/sql_class.h 2010-05-19 14:18:09 +0000
> @@ -2923,6 +2923,9 @@ public:
> void reset();
> bool walk(tree_walk_action action, void *walk_action_arg);
>
> + uint get_size() { return size; }
> + ulonglong get_max_in_memory_size() { return max_in_memory_size; }
> +
> friend int unique_write_to_file(uchar* key, element_count count, Unique
> *unique);
> friend int unique_write_to_ptrs(uchar* key, element_count count, Unique
> *unique);
> };
>
>
>
>
>