MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Timour Katchaounov Date:December 13 2007 10:48am
Subject:Re: bk commit into 5.1 tree (mhansson:1.2587) BUG#32798
View as plain text  
Martin,

After our discussion, it seems that the 5.0 patch will become
also complex, so I suggest to push this change in 5.0 as well.
However, I will ask you below to add more extensive tests.

<cut>
> ChangeSet@stripped, 2007-12-12 12:29:46+01:00, mhansson@stripped +4 -0
>   Bug#32798: DISTINCT in GROUP_CONCAT clause fails when ordering by a column 
>   with null values
>   
>   For queries containing GROUP_CONCAT(DISTINCT fields ORDER BY fields), there 
>   was a limitation that the DISTINCT fields had to be the same as ORDER BY 
>   fields, owing to the fact that one single sorted tree was used for keeping 
>   track of tuples, ordering and uniqueness. Fixed in 5.1 and later by 

Remove the comment referring to 5.1.

>   introducing a second structure to handle uniqueness so that the original 
>   structure has only to order the result.
> 
>   mysql-test/r/func_gconcat.result@stripped, 2007-12-12 12:29:44+01:00,
> mhansson@stripped +29 -1
>     Bug#32798:
>     - Wrong test result turned correct after fix.
>     - Correct test result
> 
>   mysql-test/t/func_gconcat.test@stripped, 2007-12-12 12:29:44+01:00,
> mhansson@stripped +29 -0
>     Bug#32798: Test case
> 
>   sql/item_sum.cc@stripped, 2007-12-12 12:29:44+01:00, mhansson@stripped +72 -70
>     Bug#32798: Implementation of fix. Dead code removal.
>     
>     - removed comment describing this bug
>     - removed function group_concat_key_cmp_with_distinct
>     - removed function group_concat_key_cmp_with_distinct_and_order
>     - Added a Unique object to maintain uniqueness of values.
>     - Added a comparison method to compare tuples based only on selected fields.
> 
>   sql/item_sum.h@stripped, 2007-12-12 12:29:44+01:00, mhansson@stripped +16 -8
>     Bug#32798: Declarations and comments.
> 
> diff -Nrup a/mysql-test/r/func_gconcat.result b/mysql-test/r/func_gconcat.result
> --- a/mysql-test/r/func_gconcat.result	2007-07-20 01:04:53 +02:00
> +++ b/mysql-test/r/func_gconcat.result	2007-12-12 12:29:44 +01:00
> @@ -271,7 +271,7 @@ group_concat(distinct s1 order by s2)
>  c,b,a
>  select group_concat(distinct s1 order by s2) from t1;
>  group_concat(distinct s1 order by s2)
> -c,b,a,c
> +c,b,a
>  drop table t1;
>  create table t1 (a int, c int);
>  insert into t1 values (1, 2), (2, 3), (2, 4), (3, 5);
> @@ -825,4 +825,32 @@ id	group_concat(b.name)
>  1	óra,óra
>  2	óra,óra
>  drop table t1;
> +CREATE TABLE t1 (a INT, b INT);
> +INSERT INTO t1 VALUES (1, 1), (2, 2), (2, 3);
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b) FROM t1;
> +GROUP_CONCAT(DISTINCT a ORDER BY b)
> +1,2
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b DESC) FROM t1;
> +GROUP_CONCAT(DISTINCT a ORDER BY b DESC)
> +2,1
> +SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
> +GROUP_CONCAT(DISTINCT a)
> +1,2
> +CREATE TABLE t3 (a INT, b INT, c INT, d INT);
> +INSERT INTO t3 VALUES (1,1, 1,1), (1,1, 2,2), (1,2, 2,1), (2,1, 1,2);
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, d) FROM t3;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY c, d)
> +11,21,12
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY d, c) FROM t3;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY d, c)
> +11,12,21
> +CREATE TABLE t4 (a INT, b INT, c INT);
> +INSERT INTO t4 VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1);
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, c) FROM t4;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY b, c)
> +11,21,32
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, b) FROM t4;
> +GROUP_CONCAT(DISTINCT a, b ORDER BY c, b)
> +11,32,21
> +DROP TABLE t1, t3, t4;
>  End of 5.0 tests
> diff -Nrup a/mysql-test/t/func_gconcat.test b/mysql-test/t/func_gconcat.test
> --- a/mysql-test/t/func_gconcat.test	2007-07-20 01:04:53 +02:00
> +++ b/mysql-test/t/func_gconcat.test	2007-12-12 12:29:44 +01:00
> @@ -562,4 +562,33 @@ insert into t1 (id, name) values (2, "ór
>  select b.id, group_concat(b.name) from t1 a, t1 b group by b.id;
>  drop table t1;
>  
> +#
> +# Bug#32798: DISTINCT in GROUP_CONCAT clause fails when ordering by a column
> +# with null values
> +#
> +CREATE TABLE t1 (a INT, b INT);
> +
> +INSERT INTO t1 VALUES (1, 1), (2, 2), (2, 3);
> +
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b) FROM t1;
> +SELECT GROUP_CONCAT(DISTINCT a ORDER BY b DESC) FROM t1;
> +SELECT GROUP_CONCAT(DISTINCT a) FROM t1;
> +
> +CREATE TABLE t3 (a INT, b INT, c INT, d INT);
> +
> +# There is one duplicate in the expression list: 1,10
> +# There is one duplicate in ORDER BY list, but that shouldnt matter: 1,10
> +INSERT INTO t3 VALUES (1,1, 1,1), (1,1, 2,2), (1,2, 2,1), (2,1, 1,2);
> +
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, d) FROM t3;
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY d, c) FROM t3;
> +
> +CREATE TABLE t4 (a INT, b INT, c INT);
> +
> +INSERT INTO t4 VALUES (1, 1, 1), (2, 1, 2), (3, 2, 1);
> +
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY b, c) FROM t4;
> +SELECT GROUP_CONCAT(DISTINCT a, b ORDER BY c, b) FROM t4;
> +DROP TABLE t1, t3, t4;
> +

Please add the following test cases:
- expressions both in DISTINCT and ORDER BY, and only in one of them,
- the prefix tests from your 5.0 patch,
- tests where neither list is a prefix, but they overlap

>  --echo End of 5.0 tests
> diff -Nrup a/sql/item_sum.cc b/sql/item_sum.cc
> --- a/sql/item_sum.cc	2007-10-14 10:30:25 +02:00
> +++ b/sql/item_sum.cc	2007-12-12 12:29:44 +01:00
> @@ -2849,45 +2849,45 @@ String *Item_sum_udf_str::val_str(String
>   concat of values from "group by" operation
>  
>   BUGS
> -   DISTINCT and ORDER BY only works if ORDER BY uses all fields and only fields
> -   in expression list
>     Blobs doesn't work with DISTINCT or ORDER BY
>  *****************************************************************************/
>  
> -/*
> -  function of sort for syntax:
> -  GROUP_CONCAT(DISTINCT expr,...)
> +
> +/** 

This comment is not fully in Doxygen style:

> +  Compares the values for fields in expr list of GROUP_CONCAT. 
> +       

Add @note (or @notes) here (check).

> +     GROUP_CONCAT([DISTINCT] expr [,expr ...]
> +              [ORDER BY {unsigned_integer | col_name | expr}
> +                  [ASC | DESC] [,col_name ...]]
> +              [SEPARATOR str_val])
> + 
> +  @return

Add @retval before each return value.

> +  - -1 : key1 < key2 
> +  -  0 : key1 = key2
> +  -  1 : key1 > key2 
>  */
>  
> -int group_concat_key_cmp_with_distinct(void* arg, uchar* key1,
> -				       uchar* key2)
> +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void*
> key2)
>  {
> -  Item_func_group_concat* grp_item= (Item_func_group_concat*)arg;
> -  TABLE *table= grp_item->table;
> -  Item **field_item, **end;
> +  Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
> +  TABLE *table= item_func->table;
>  
> -  for (field_item= grp_item->args, end= field_item +
> grp_item->arg_count_field;
> -       field_item < end;
> -       field_item++)
> +  for (uint i= 0; i < item_func->arg_count_field; i++)
>    {
> +    Item *item= item_func->args[i];
> +    if (item->const_item())
> +      continue;
>      /*
>        We have to use get_tmp_table_field() instead of
>        real_item()->get_tmp_table_field() because we want the field in
>        the temporary table, not the original field
>      */
> -    Field *field= (*field_item)->get_tmp_table_field();
> -    /* 
> -      If field_item is a const item then either get_tmp_table_field returns 0
> -      or it is an item over a const table. 
> -    */
> -    if (field && !(*field_item)->const_item())
> -    {
> -      int res;
> -      uint offset= (field->offset(field->table->record[0]) - 
> -                    table->s->null_bytes);
> -      if ((res= field->cmp(key1 + offset, key2 + offset)))
> -	return res;
> -    }
> +    Field *field= item->get_tmp_table_field();
> +    DBUG_ASSERT(field);

No need for this obvious assert - the next line will crash anyway if
field == NULL.

> +    int res;
> +    uint offset=
> field->offset(field->table->record[0])-table->s->null_bytes;
> +    if((res= field->cmp((uchar*)key1 + offset, (uchar*)key2 + offset)))
> +      return res;
>    }
>    return 0;
>  }
> @@ -2938,25 +2938,6 @@ int group_concat_key_cmp_with_order(void
>  
>  
>  /*
> -  function of sort for syntax:
> -  GROUP_CONCAT(DISTINCT expr,... ORDER BY col,... )
> -
> -  BUG:
> -    This doesn't work in the case when the order by contains data that
> -    is not part of the field list because tree-insert will not notice
> -    the duplicated values when inserting things sorted by ORDER BY
> -*/
> -
> -int group_concat_key_cmp_with_distinct_and_order(void* arg,uchar* key1,
> -						 uchar* key2)
> -{
> -  if (!group_concat_key_cmp_with_distinct(arg,key1,key2))
> -    return 0;
> -  return(group_concat_key_cmp_with_order(arg,key1,key2));
> -}
> -
> -
> -/*
>    Append data from current leaf to item->result
>  */
>  
> @@ -3041,7 +3022,7 @@ Item_func_group_concat(Name_resolution_c
>                         bool distinct_arg, List<Item> *select_list,
>                         SQL_LIST *order_list, String *separator_arg)
>    :tmp_table_param(0), warning(0),
> -   separator(separator_arg), tree(0), table(0),
> +   separator(separator_arg), tree(0), unique_filter(NULL), table(0),
>     order(0), context(context_arg),
>     arg_count_order(order_list ? order_list->elements : 0),
>     arg_count_field(select_list->elements),
> @@ -3096,6 +3077,7 @@ Item_func_group_concat::Item_func_group_
>    warning(item->warning),
>    separator(item->separator),
>    tree(item->tree),
> +  unique_filter(item->unique_filter),
>    table(item->table),
>    order(item->order),
>    context(item->context),
> @@ -3146,6 +3128,11 @@ void Item_func_group_concat::cleanup()
>          delete_tree(tree);
>          tree= 0;
>        }
> +      if (unique_filter)
> +      {
> +        delete unique_filter;
> +        unique_filter= NULL;
> +      }
>        if (warning)
>        {
>          char warn_buff[MYSQL_ERRMSG_SIZE];
> @@ -3175,6 +3162,8 @@ void Item_func_group_concat::clear()
>    no_appended= TRUE;
>    if (tree)
>      reset_tree(tree);
> +  if (distinct)
> +    unique_filter->reset();
>    /* No need to reset the table as we never call write_row */
>  }
>  
> @@ -3198,9 +3187,20 @@ bool Item_func_group_concat::add()
>    }
>  
>    null_value= FALSE;
> +  bool row_eligible= TRUE;
> +
> +  if (distinct) 
> +  {
> +    /* Filter out duplicate rows. */
> +    DBUG_ASSERT(unique_filter);

No need for this obvious assert - the next line will crash anyway if
unique_filter == NULL.

> +    uint count= unique_filter->elements_in_tree();
> +    unique_filter->unique_add(table->record[0] + table->s->null_bytes);
> +    if (count == unique_filter->elements_in_tree())
> +      row_eligible= FALSE;
> +  }
>  
>    TREE_ELEMENT *el= 0;                          // Only for safety
> -  if (tree)
> +  if (row_eligible && tree)
>      el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0,
>                      tree->custom_arg);
>    /*
> @@ -3208,7 +3208,7 @@ bool Item_func_group_concat::add()
>      we can dump the row here in case of GROUP_CONCAT(DISTINCT...)
>      instead of doing tree traverse later.
>    */
> -  if (!warning_for_row &&
> +  if (row_eligible && !warning_for_row &&
>        (!tree || (el->count == 1 && distinct &&
> !arg_count_order)))
>      dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
>  
> @@ -3355,38 +3355,33 @@ bool Item_func_group_concat::setup(THD *
>    table->file->extra(HA_EXTRA_NO_ROWS);
>    table->no_rows= 1;
>  
> +  /*
> +     Need sorting or uniqueness: init tree and choose a function to sort.
> +     Don't reserve space for NULLs: if any of gconcat arguments is NULL,
> +     the row is not added to the result.
> +  */
> +  uint tree_key_length= table->s->reclength - table->s->null_bytes;
>  
> -  if (distinct || arg_count_order)
> +  if (arg_count_order)
>    {
> -    /*
> -      Need sorting: init tree and choose a function to sort.
> -      Don't reserve space for NULLs: if any of gconcat arguments is NULL,
> -      the row is not added to the result.
> -    */
> -    uint tree_key_length= table->s->reclength - table->s->null_bytes;
> -
>      tree= &tree_base;
> -    if (arg_count_order)
> -    {
> -      if (distinct)
> -        compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct_and_order;
> -      else
> -        compare_key= (qsort_cmp2) group_concat_key_cmp_with_order;
> -    }
> -    else
> -    {
> -      compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct;
> -    }
> +    compare_key= (qsort_cmp2) group_concat_key_cmp_with_order;
>      /*
> -      Create a tree for sorting. The tree is used to sort and to remove
> -      duplicate values (according to the syntax of this function). If there
> -      is no DISTINCT or ORDER BY clauses, we don't create this tree.
> +      Create a tree for sorting. The tree is used to sort (according to the
> +      syntax of this function). If there is no ORDER BY clause, we don't
> +      create this tree.
>      */
>      init_tree(tree, (uint) min(thd->variables.max_heap_table_size,
>                                 thd->variables.sortbuff_size/16), 0,
>                tree_key_length, compare_key, 0, NULL, (void*) this);
>    }
>  
> +  if (distinct)
> +    unique_filter= new Unique(group_concat_key_cmp_with_distinct,
> +                              (void*)this,
> +                              tree_key_length,
> +                              thd->variables.max_heap_table_size);
> +  
>    DBUG_RETURN(FALSE);
>  }
>  
> @@ -3455,4 +3450,11 @@ void Item_func_group_concat::print(Strin
>    str->append(STRING_WITH_LEN(" separator \'"));
>    str->append(*separator);
>    str->append(STRING_WITH_LEN("\')"));
> +}
> +
> +
> +Item_func_group_concat::~Item_func_group_concat()
> +{
> +  if (unique_filter) 
> +    delete unique_filter;    

I think "delete NULL;" is safe, so no need for IF.

>  }
> diff -Nrup a/sql/item_sum.h b/sql/item_sum.h
> --- a/sql/item_sum.h	2007-07-01 05:25:43 +02:00
> +++ b/sql/item_sum.h	2007-12-12 12:29:44 +01:00
> @@ -1173,11 +1173,22 @@ class Item_func_group_concat : public It
>    String *separator;
>    TREE tree_base;
>    TREE *tree;
> +
> +  /**
> +     If DISTINCT is used with this GROUP_CONCAT, this member is used to filter
> +     out duplicates. 
> +     @see Item_func_group_concat::setup
> +     @see Item_func_group_concat::add
> +     @see Item_func_group_concat::clear
> +   */
> +  Unique *unique_filter;
>    TABLE *table;
>    ORDER **order;
>    Name_resolution_context *context;
> -  uint arg_count_order;               // total count of ORDER BY items
> -  uint arg_count_field;               // count of arguments
> +  /** The number of ORDER BY items. */
> +  uint arg_count_order;
> +  /** The number of selected items, aka the expr list. */
> +  uint arg_count_field;
>    uint count_cut_values;
>    bool distinct;
>    bool warning_for_row;
> @@ -1190,13 +1201,10 @@ class Item_func_group_concat : public It
>    */
>    Item_func_group_concat *original;
>  
> -  friend int group_concat_key_cmp_with_distinct(void* arg, uchar* key1,
> -					      uchar* key2);

Leave the signature of this function, why change?

> +  friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
> +                                                const void* key2);
>    friend int group_concat_key_cmp_with_order(void* arg, uchar* key1,
>  					     uchar* key2);
> -  friend int group_concat_key_cmp_with_distinct_and_order(void* arg,
> -							uchar* key1,
> -							uchar* key2);
>    friend int dump_leaf_key(uchar* key,
>                             element_count count __attribute__((unused)),
>  			   Item_func_group_concat *group_concat_item);
> @@ -1207,7 +1215,7 @@ public:
>                           SQL_LIST *is_order, String *is_separator);
>  
>    Item_func_group_concat(THD *thd, Item_func_group_concat *item);
> -  ~Item_func_group_concat() {}
> +  ~Item_func_group_concat();
>    void cleanup();
>  
>    enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;}
> 

Thread
bk commit into 5.1 tree (mhansson:1.2587) BUG#32798mhansson12 Dec
  • Re: bk commit into 5.1 tree (mhansson:1.2587) BUG#32798Timour Katchaounov13 Dec