List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:December 7 2010 10:33am
Subject:Re: bzr commit into mysql-5.5-bugteam branch (guilhem:3125) Bug#57932
View as plain text  
Hi Guilhem,

thanks for working on this, and for considering so many different solutions.

I like this solution best, as it simplifies the logic in Item_sum_xxx::add() by 
"hiding" the complexity into virtual function implementations. It means that if 
we were to ever implement unit testing for the Item tree, the task would be 
simplified because we could rely on the underlying classes being tested, instead 
of testing all the complex logic in the add() function itself. There is still 
the test on type in add(), but that test is necessary as long as the item tree 
is not type-aware.

I will approve this patch, but please consider a few suggestions below.

On 02.12.10 14.38, Guilhem Bichot wrote:
> #At file:///home/mysql_src/bzrrepos_new/5.5-bugteam/ based on
> revid:guilhem@stripped
>
>   3125 Guilhem Bichot	2010-12-02
>        Fix for Bug#57932 "query with avg returns incorrect results":
>        when there was one NULL value, AVG(DISTINCT) could forget about other values.
>        Fourth possible patch suggested by Joro.
>       @ mysql-test/r/func_group.result
>          before the code fix, both SELECTs would return NULL
>       @ sql/item_sum.cc
>          This time the fix is to let the aggregator tell whether the value
>          it just saw was NULL. The aggregator knows where to get the info
>          thanks to virtual functions. Item_sum_sum::add() now asks
>          the aggregator. Item_sum_avg() also asks the aggregator
>          and then knows it shouldn't increment "count".
>
>      modified:
>        mysql-test/r/func_group.result
>        mysql-test/t/func_group.test
>        sql/item_sum.cc
>        sql/item_sum.h
> === modified file 'mysql-test/r/func_group.result'
> --- a/mysql-test/r/func_group.result	2010-09-10 18:48:13 +0000
> +++ b/mysql-test/r/func_group.result	2010-12-02 13:38:12 +0000
> @@ -1746,3 +1746,18 @@ MAX(c1)	MIN(c1)
>   -00:00:01	-00:00:01
>   DROP TABLE t1;
>   # End of the bug#56120
> +#
> +# Bug#57932 "query with AVG(DISTINCT) returns NULL if last
> +# aggregated value was NULL"
> +#
> +CREATE TABLE t1 (col_int_nokey int(11));
> +INSERT INTO t1 VALUES (7),(8),(NULL);
> +SELECT AVG(DISTINCT col_int_nokey) FROM t1;
> +AVG(DISTINCT col_int_nokey)
> +7.5000
> +SELECT AVG(DISTINCT outr.col_int_nokey) FROM t1 AS outr LEFT JOIN t1 AS outr2 ON
> +outr.col_int_nokey = outr2.col_int_nokey;
> +AVG(DISTINCT outr.col_int_nokey)
> +7.5000
> +DROP TABLE t1;
> +# End of the bug#56120
>
> === modified file 'mysql-test/t/func_group.test'
> --- a/mysql-test/t/func_group.test	2010-09-10 18:48:13 +0000
> +++ b/mysql-test/t/func_group.test	2010-12-02 13:38:12 +0000
> @@ -1118,3 +1118,14 @@ SELECT MAX(c1),MIN(c1) FROM t1;
>   DROP TABLE t1;
>   --echo # End of the bug#56120
>
> +--echo #
> +--echo # Bug#57932 "query with AVG(DISTINCT) returns NULL if last
> +--echo # aggregated value was NULL"
> +--echo #
> +CREATE TABLE t1 (col_int_nokey int(11));
> +INSERT INTO t1 VALUES (7),(8),(NULL);
> +SELECT AVG(DISTINCT col_int_nokey) FROM t1;
> +SELECT AVG(DISTINCT outr.col_int_nokey) FROM t1 AS outr LEFT JOIN t1 AS outr2 ON
> +outr.col_int_nokey = outr2.col_int_nokey;
> +DROP TABLE t1;
> +--echo # End of the bug#56120

Bug number is wrong in the end comment.
> === modified file 'sql/item_sum.cc'
> --- a/sql/item_sum.cc	2010-09-02 13:57:59 +0000
> +++ b/sql/item_sum.cc	2010-12-02 13:38:12 +0000
> @@ -1325,27 +1325,11 @@ void Item_sum_sum::fix_length_and_dec()
>   bool Item_sum_sum::add()
>   {
>     DBUG_ENTER("Item_sum_sum::add");
> -  bool arg_is_null;
>     if (hybrid_type == DECIMAL_RESULT)
>     {
> -    my_decimal value, *val;
> -    if (aggr->use_distinct_values)
> -    {
> -      /*
> -        We are aggregating distinct rows. Get the value from the distinct
> -        table pointer
> -      */
> -      Aggregator_distinct *daggr= (Aggregator_distinct *)aggr;
> -      val= daggr->table->field[0]->val_decimal (&value);
> -      arg_is_null= daggr->table->field[0]->is_null();
> -    }
> -    else
> -    {
> -      /* non-distinct aggregation */
> -      val= args[0]->val_decimal(&value);
> -      arg_is_null= args[0]->null_value;
> -    }
> -    if (!arg_is_null)
> +    my_decimal value;
> +    const my_decimal *val= aggr->arg_val_decimal(&value);
> +    if (!aggr->arg_is_null())
>       {
>         my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
>                        val, dec_buffs + curr_dec_buff);
> @@ -1355,25 +1339,8 @@ bool Item_sum_sum::add()
>     }
>     else
>     {
> -    double val;
> -    if (aggr->use_distinct_values)
> -    {
> -      /*
> -        We are aggregating distinct rows. Get the value from the distinct
> -        table pointer
> -      */
> -      Aggregator_distinct *daggr= (Aggregator_distinct *)aggr;
> -      val= daggr->table->field[0]->val_real ();
> -      arg_is_null= daggr->table->field[0]->is_null();
> -    }
> -    else
> -    {
> -      /* non-distinct aggregation */
> -      val= args[0]->val_real();
> -      arg_is_null= args[0]->null_value;
> -    }
> -    sum+= val;
> -    if (!arg_is_null)
> +    sum+= aggr->arg_val_real();
> +    if (!aggr->arg_is_null())
>         null_value= 0;
>     }
>     DBUG_RETURN(0);
> @@ -1471,6 +1438,45 @@ Aggregator_distinct::~Aggregator_distinc
>   }
>
>
> +my_decimal *Aggregator_simple::arg_val_decimal(my_decimal *value)
> +{
> +  return item_sum->args[0]->val_decimal(value);
> +}
> +
> +
> +double Aggregator_simple::arg_val_real()
> +{
> +  return item_sum->args[0]->val_real();
> +}
> +
> +
> +bool Aggregator_simple::arg_is_null()
> +{
> +  return item_sum->args[0]->null_value;
> +}
> +
> +
> +my_decimal *Aggregator_distinct::arg_val_decimal(my_decimal * value)
> +{
> +  return use_distinct_values ? table->field[0]->val_decimal(value) :
> +    item_sum->args[0]->val_decimal(value);
> +}

I am not sure about this, but it seems that the only reason for 
use_distinct_values to exist is to be able to pick the correct subclass type in 
the old Item_sum_sum::add() implementation. By relying on virtual functions 
instead, I wonder if the member can be deleted alltogether, and the branch that 
uses item_sum->args[0]->val_decimal can be deleted. The same would also apply to 
arg_val_real() and to arg_is_null().


> +
> +
> +double Aggregator_distinct::arg_val_real()
> +{
> +  return use_distinct_values ? table->field[0]->val_real() :
> +    item_sum->args[0]->val_real();
> +}
> +
> +
> +bool Aggregator_distinct::arg_is_null()
> +{
> +  return use_distinct_values ? table->field[0]->is_null() :
> +    item_sum->args[0]->null_value;
> +}
> +
> +
>   Item *Item_sum_count::copy_or_same(THD* thd)
>   {
>     return new (thd->mem_root) Item_sum_count(thd, this);
> @@ -1574,11 +1580,10 @@ void Item_sum_avg::clear()
>
>   bool Item_sum_avg::add()
>   {
> -  if (Item_sum_sum::add())
> -    return TRUE;
> -  if (!args[0]->null_value)
> +  const bool rc= Item_sum_sum::add();
> +  if (!aggr->arg_is_null())
>       count++;
> -  return FALSE;
> +  return rc;
>   }

This can be slightly simpler with "rc" removed:

bool Item_sum_avg::add()
  {
   if (Item_sum_sum::add()
     return true;
   if (!aggr->arg_is_null())
      count++;
   return false;
  }

If you are worried about the cost of aggr->arg_is_null(), you may reimplement 
Item_sum_avg::add() by copying the code for Item_sum_sum::add() and adding code 
that increments count. It is less code to duplicate now than it used to be, with 
two if tests eliminated.
>
>   double Item_sum_avg::val_real()
>
> === modified file 'sql/item_sum.h'
> --- a/sql/item_sum.h	2010-08-30 07:36:04 +0000
> +++ b/sql/item_sum.h	2010-12-02 13:38:12 +0000
> @@ -101,6 +101,15 @@ public:
>     */
>     virtual void endup() = 0;
>
> +  /** Decimal value of being-aggregated argument */
> +  virtual my_decimal *arg_val_decimal(my_decimal * value) = 0;
> +  /** Floating point value of being-aggregated argument */
> +  virtual double arg_val_real() = 0;
> +  /**
> +     NULLness of being-aggregated argument; can be called only after
> +     arg_val_decimal() or arg_val_real().
> +  */
> +  virtual bool arg_is_null() = 0;
>   };
>
>
> @@ -304,6 +313,7 @@ class st_select_lex;
>   class Item_sum :public Item_result_field
>   {
>     friend class Aggregator_distinct;
> +  friend class Aggregator_simple;
>
>   protected:
>     /**
> @@ -600,6 +610,9 @@ public:
>     void clear();
>     bool add();
>     void endup();
> +  my_decimal *arg_val_decimal(my_decimal * value);
> +  double arg_val_real();
> +  bool arg_is_null();

My personal preference is that you add "virtual" in front of the function headers.
>
>     bool unique_walk_function(void *element);
>     static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
> @@ -623,6 +636,9 @@ public:
>     void clear() { item_sum->clear(); }
>     bool add() { return item_sum->add(); }
>     void endup() {};
> +  my_decimal *arg_val_decimal(my_decimal * value);
> +  double arg_val_real();
> +  bool arg_is_null();
>   };
Thanks,
Roy
Thread
bzr commit into mysql-5.5-bugteam branch (guilhem:3125) Bug#57932Guilhem Bichot2 Dec
Re: bzr commit into mysql-5.5-bugteam branch (guilhem:3125) Bug#57932Roy Lyseng7 Dec