Martin,
After our discussion, it seems that the 5.0 patch will become
also complex, so I suggest to push this change in 5.0 as well.
However, I will ask you below to add more extensive tests.
<cut>
> ChangeSet@stripped, 2007-12-12 12:29:46+01:00, mhansson@stripped +4 -0
> Bug#32798: DISTINCT in GROUP_CONCAT clause fails when ordering by a column
> with null values
>
> For queries containing GROUP_CONCAT(DISTINCT fields ORDER BY fields), there
> was a limitation that the DISTINCT fields had to be the same as ORDER BY
> fields, owing to the fact that one single sorted tree was used for keeping
> track of tuples, ordering and uniqueness. Fixed in 5.1 and later by
Remove the comment referring to 5.1.
> introducing a second structure to handle uniqueness so that the original
> structure has only to order the result.
>
> mysql-test/r/func_gconcat.result@stripped, 2007-12-12 12:29:44+01:00,
> mhansson@stripped +29 -1
> Bug#32798:
> - Wrong test result turned correct after fix.
> - Correct test result
>
> mysql-test/t/func_gconcat.test@stripped, 2007-12-12 12:29:44+01:00,
> mhansson@stripped +29 -0
> Bug#32798: Test case
>
> sql/item_sum.cc@stripped, 2007-12-12 12:29:44+01:00, mhansson@stripped +72 -70
> Bug#32798: Implementation of fix. Dead code removal.
>
> - removed comment describing this bug
> - removed function group_concat_key_cmp_with_distinct
> - removed function group_concat_key_cmp_with_distinct_and_order
> - Added a Unique object to maintain uniqueness of values.
> - Added a comparison method to compare tuples based only on selected fields.
>
> sql/item_sum.h@stripped, 2007-12-12 12:29:44+01:00, mhansson@stripped +16 -8
> Bug#32798: Declarations and comments.
>
> diff -Nrup a/mysql-test/r/func_gconcat.result b/mysql-test/r/func_gconcat.result
> --- a/mysql-test/r/func_gconcat.result 2007-07-20 01:04:53 +02:00
> +++ b/mysql-test/r/func_gconcat.result 2007-12-12 12:29:44 +01:00
> @@ -271,7 +271,7 @@ group_concat(distinct s1 order by s2)
> c,b,a
> select group_concat(distinct s1 order by s2) from t1;
> group_concat(distinct s1 order by s2)
> -c,b,a,c
> +c,b,a
> drop table t1;
> create table t1 (a int, c int);
> insert into t1 values (1, 2), (2, 3), (2, 4), (3, 5);
> @@ -825,4 +825,32 @@ id group_concat(b.name)
> 1 óra,óra
> 2 óra,óra
> drop table t1;
> +CREATE TABLE t1 (a INT, b INT);
> +INSERT INTO t1 VALUES (1, 1), (2, 2), (2, 3);
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b) FROM t1;
> +GROUP_CONCAT(DISTINCT a ORDER BY b)
> +1,2
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b DESC) FROM t1;
> +GROUP_CONCAT(DISTINCT a ORDER BY b DESC)
> +2,1
> +SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
> +GROUP_CONCAT(DISTINCT a)
> +1,2
> +CREATE TABLE t3 (a INT, b INT, c INT, d INT);
> +INSERT INTO t3 VALUES (1,1, 1,1), (1,1, 2,2), (1,2, 2,1), (2,1, 1,2);
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, d) FROM t3;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY c, d)
> +11,21,12
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY d, c) FROM t3;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY d, c)
> +11,12,21
> +CREATE TABLE t4 (a INT, b INT, c INT);
> +INSERT INTO t4 VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1);
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, c) FROM t4;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY b, c)
> +11,21,32
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, b) FROM t4;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY c, b)
> +11,32,21
> +DROP TABLE t1, t3, t4;
> End of 5.0 tests
> diff -Nrup a/mysql-test/t/func_gconcat.test b/mysql-test/t/func_gconcat.test
> --- a/mysql-test/t/func_gconcat.test 2007-07-20 01:04:53 +02:00
> +++ b/mysql-test/t/func_gconcat.test 2007-12-12 12:29:44 +01:00
> @@ -562,4 +562,33 @@ insert into t1 (id, name) values (2, "ór
> select b.id, group_concat(b.name) from t1 a, t1 b group by b.id;
> drop table t1;
>
> +#
> +# Bug#32798: DISTINCT in GROUP_CONCAT clause fails when ordering by a column
> +# with null values
> +#
> +CREATE TABLE t1 (a INT, b INT);
> +
> +INSERT INTO t1 VALUES (1, 1), (2, 2), (2, 3);
> +
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b) FROM t1;
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b DESC) FROM t1;
> +SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
> +
> +CREATE TABLE t3 (a INT, b INT, c INT, d INT);
> +
> +# There is one duplicate in the expression list: 1,10
> +# There is one duplicate in ORDER BY list, but that shouldnt matter: 1,10
> +INSERT INTO t3 VALUES (1,1, 1,1), (1,1, 2,2), (1,2, 2,1), (2,1, 1,2);
> +
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, d) FROM t3;
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY d, c) FROM t3;
> +
> +CREATE TABLE t4 (a INT, b INT, c INT);
> +
> +INSERT INTO t4 VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1);
> +
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, c) FROM t4;
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, b) FROM t4;
> +DROP TABLE t1, t3, t4;
> +
Please add the following test cases:
- expressions both in DISTINCT and ORDER BY, and only in one of them,
- the prefix tests from your 5.0 patch,
- tests where neither list is a prefix, but they overlap
> --echo End of 5.0 tests
> diff -Nrup a/sql/item_sum.cc b/sql/item_sum.cc
> --- a/sql/item_sum.cc 2007-10-14 10:30:25 +02:00
> +++ b/sql/item_sum.cc 2007-12-12 12:29:44 +01:00
> @@ -2849,45 +2849,45 @@ String *Item_sum_udf_str::val_str(String
> concat of values from "group by" operation
>
> BUGS
> - DISTINCT and ORDER BY only works if ORDER BY uses all fields and only fields
> - in expression list
> Blobs doesn't work with DISTINCT or ORDER BY
> *****************************************************************************/
>
> -/*
> - function of sort for syntax:
> - GROUP_CONCAT(DISTINCT expr,...)
> +
> +/**
This comment is not fully in Doxygen style:
> + Compares the values for fields in expr list of GROUP_CONCAT.
> +
Add @note (or @notes) here (check).
> + GROUP_CONCAT([DISTINCT] expr [,expr ...]
> + [ORDER BY {unsigned_integer | col_name | expr}
> + [ASC | DESC] [,col_name ...]]
> + [SEPARATOR str_val])
> +
> + @return
Add @retval before each return value.
> + - -1 : key1 < key2
> + - 0 : key1 = key2
> + - 1 : key1 > key2
> */
>
> -int group_concat_key_cmp_with_distinct(void* arg, uchar* key1,
> - uchar* key2)
> +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void*
> key2)
> {
> - Item_func_group_concat* grp_item= (Item_func_group_concat*)arg;
> - TABLE *table= grp_item->table;
> - Item **field_item, **end;
> + Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
> + TABLE *table= item_func->table;
>
> - for (field_item= grp_item->args, end= field_item +
> grp_item->arg_count_field;
> - field_item < end;
> - field_item++)
> + for (uint i= 0; i < item_func->arg_count_field; i++)
> {
> + Item *item= item_func->args[i];
> + if (item->const_item())
> + continue;
> /*
> We have to use get_tmp_table_field() instead of
> real_item()->get_tmp_table_field() because we want the field in
> the temporary table, not the original field
> */
> - Field *field= (*field_item)->get_tmp_table_field();
> - /*
> - If field_item is a const item then either get_tmp_table_field returns 0
> - or it is an item over a const table.
> - */
> - if (field && !(*field_item)->const_item())
> - {
> - int res;
> - uint offset= (field->offset(field->table->record[0]) -
> - table->s->null_bytes);
> - if ((res= field->cmp(key1 + offset, key2 + offset)))
> - return res;
> - }
> + Field *field= item->get_tmp_table_field();
> + DBUG_ASSERT(field);
No need for this obvious assert - the next line will crash anyway if
field == NULL.
> + int res;
> + uint offset=
> field->offset(field->table->record[0])-table->s->null_bytes;
> + if((res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset)))
> + return res;
> }
> return 0;
> }
> @@ -2938,25 +2938,6 @@ int group_concat_key_cmp_with_order(void
>
>
> /*
> - function of sort for syntax:
> - GROUP_CONCAT(DISTINCT expr,... ORDER BY col,... )
> -
> - BUG:
> - This doesn't work in the case when the order by contains data that
> - is not part of the field list because tree-insert will not notice
> - the duplicated values when inserting things sorted by ORDER BY
> -*/
> -
> -int group_concat_key_cmp_with_distinct_and_order(void* arg,uchar* key1,
> - uchar* key2)
> -{
> - if (!group_concat_key_cmp_with_distinct(arg,key1,key2))
> - return 0;
> - return(group_concat_key_cmp_with_order(arg,key1,key2));
> -}
> -
> -
> -/*
> Append data from current leaf to item->result
> */
>
> @@ -3041,7 +3022,7 @@ Item_func_group_concat(Name_resolution_c
> bool distinct_arg, List<Item> *select_list,
> SQL_LIST *order_list, String *separator_arg)
> :tmp_table_param(0), warning(0),
> - separator(separator_arg), tree(0), table(0),
> + separator(separator_arg), tree(0), unique_filter(NULL), table(0),
> order(0), context(context_arg),
> arg_count_order(order_list ? order_list->elements : 0),
> arg_count_field(select_list->elements),
> @@ -3096,6 +3077,7 @@ Item_func_group_concat::Item_func_group_
> warning(item->warning),
> separator(item->separator),
> tree(item->tree),
> + unique_filter(item->unique_filter),
> table(item->table),
> order(item->order),
> context(item->context),
> @@ -3146,6 +3128,11 @@ void Item_func_group_concat::cleanup()
> delete_tree(tree);
> tree= 0;
> }
> + if (unique_filter)
> + {
> + delete unique_filter;
> + unique_filter= NULL;
> + }
> if (warning)
> {
> char warn_buff[MYSQL_ERRMSG_SIZE];
> @@ -3175,6 +3162,8 @@ void Item_func_group_concat::clear()
> no_appended= TRUE;
> if (tree)
> reset_tree(tree);
> + if (distinct)
> + unique_filter->reset();
> /* No need to reset the table as we never call write_row */
> }
>
> @@ -3198,9 +3187,20 @@ bool Item_func_group_concat::add()
> }
>
> null_value= FALSE;
> + bool row_eligible= TRUE;
> +
> + if (distinct)
> + {
> + /* Filter out duplicate rows. */
> + DBUG_ASSERT(unique_filter);
No need for this obvious assert - the next line will crash anyway if
unique_filter == NULL.
> + uint count= unique_filter->elements_in_tree();
> + unique_filter->unique_add(table->record[0] + table->s->null_bytes);
> + if (count == unique_filter->elements_in_tree())
> + row_eligible= FALSE;
> + }
>
> TREE_ELEMENT *el= 0; // Only for safety
> - if (tree)
> + if (row_eligible && tree)
> el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0,
> tree->custom_arg);
> /*
> @@ -3208,7 +3208,7 @@ bool Item_func_group_concat::add()
> we can dump the row here in case of GROUP_CONCAT(DISTINCT...)
> instead of doing tree traverse later.
> */
> - if (!warning_for_row &&
> + if (row_eligible && !warning_for_row &&
> (!tree || (el->count == 1 && distinct &&
> !arg_count_order)))
> dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
>
> @@ -3355,38 +3355,33 @@ bool Item_func_group_concat::setup(THD *
> table->file->extra(HA_EXTRA_NO_ROWS);
> table->no_rows= 1;
>
> + /*
> + Need sorting or uniqueness: init tree and choose a function to sort.
> + Don't reserve space for NULLs: if any of gconcat arguments is NULL,
> + the row is not added to the result.
> + */
> + uint tree_key_length= table->s->reclength - table->s->null_bytes;
>
> - if (distinct || arg_count_order)
> + if (arg_count_order)
> {
> - /*
> - Need sorting: init tree and choose a function to sort.
> - Don't reserve space for NULLs: if any of gconcat arguments is NULL,
> - the row is not added to the result.
> - */
> - uint tree_key_length= table->s->reclength - table->s->null_bytes;
> -
> tree= &tree_base;
> - if (arg_count_order)
> - {
> - if (distinct)
> - compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct_and_order;
> - else
> - compare_key= (qsort_cmp2) group_concat_key_cmp_with_order;
> - }
> - else
> - {
> - compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct;
> - }
> + compare_key= (qsort_cmp2) group_concat_key_cmp_with_order;
> /*
> - Create a tree for sorting. The tree is used to sort and to remove
> - duplicate values (according to the syntax of this function). If there
> - is no DISTINCT or ORDER BY clauses, we don't create this tree.
> + Create a tree for sorting. The tree is used to sort (according to the
> + syntax of this function). If there is no ORDER BY clause, we don't
> + create this tree.
> */
> init_tree(tree, (uint) min(thd->variables.max_heap_table_size,
> thd->variables.sortbuff_size/16), 0,
> tree_key_length, compare_key, 0, NULL, (void*) this);
> }
>
> + if (distinct)
> + unique_filter= new Unique(group_concat_key_cmp_with_distinct,
> + (void*)this,
> + tree_key_length,
> + thd->variables.max_heap_table_size);
> +
> DBUG_RETURN(FALSE);
> }
>
> @@ -3455,4 +3450,11 @@ void Item_func_group_concat::print(Strin
> str->append(STRING_WITH_LEN(" separator \'"));
> str->append(*separator);
> str->append(STRING_WITH_LEN("\')"));
> +}
> +
> +
> +Item_func_group_concat::~Item_func_group_concat()
> +{
> + if (unique_filter)
> + delete unique_filter;
I think "delete NULL;" is safe, so no need for IF.
> }
> diff -Nrup a/sql/item_sum.h b/sql/item_sum.h
> --- a/sql/item_sum.h 2007-07-01 05:25:43 +02:00
> +++ b/sql/item_sum.h 2007-12-12 12:29:44 +01:00
> @@ -1173,11 +1173,22 @@ class Item_func_group_concat : public It
> String *separator;
> TREE tree_base;
> TREE *tree;
> +
> + /**
> + If DISTINCT is used with this GROUP_CONCAT, this member is used to filter
> + out duplicates.
> + @see Item_func_group_concat::setup
> + @see Item_func_group_concat::add
> + @see Item_func_group_concat::clear
> + */
> + Unique *unique_filter;
> TABLE *table;
> ORDER **order;
> Name_resolution_context *context;
> - uint arg_count_order; // total count of ORDER BY items
> - uint arg_count_field; // count of arguments
> + /** The number of ORDER BY items. */
> + uint arg_count_order;
> + /** The number of selected items, aka the expr list. */
> + uint arg_count_field;
> uint count_cut_values;
> bool distinct;
> bool warning_for_row;
> @@ -1190,13 +1201,10 @@ class Item_func_group_concat : public It
> */
> Item_func_group_concat *original;
>
> - friend int group_concat_key_cmp_with_distinct(void* arg, uchar* key1,
> - uchar* key2);
Leave the signature of this function, why change?
> + friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
> + const void* key2);
> friend int group_concat_key_cmp_with_order(void* arg, uchar* key1,
> uchar* key2);
> - friend int group_concat_key_cmp_with_distinct_and_order(void* arg,
> - uchar* key1,
> - uchar* key2);
> friend int dump_leaf_key(uchar* key,
> element_count count __attribute__((unused)),
> Item_func_group_concat *group_concat_item);
> @@ -1207,7 +1215,7 @@ public:
> SQL_LIST *is_order, String *is_separator);
>
> Item_func_group_concat(THD *thd, Item_func_group_concat *item);
> - ~Item_func_group_concat() {}
> + ~Item_func_group_concat();
> void cleanup();
>
> enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;}
>