#At file:///home/mysql_src/bzrrepos_new/5.5-bugteam/ based on revid:guilhem@stripped
3125 Guilhem Bichot 2010-12-01
Fix for Bug#57932 "query with avg returns incorrect results":
when there was one NULL value, AVG(DISTINCT) could forget about other values.
First possible patch.
@ mysql-test/r/func_group.result
before the code fix, both SELECTs would return NULL
@ sql/item_sum.cc
Assume we are executing "SELECT AVG([DISTINCT] some_field) FROM some_table".
and some_field is the single field of some_table for simplicity.
Each time a row is processed (evaluate_join_record()->
end_send_group()->update_sum_func()) an aggregator is notified,
which itself notifies an Item_sum_avg.
Without DISTINCT, this Item_sum_avg immediately increments its
internal "sum of values" and "count of values" (the latter being
Item_sum_avg::count). The count is incremented only if the row's value
is not NULL (in Item_sum_avg::add()), per AVG() semantices. This row's value
is available in args[0] of Item_sum_avg ("args[0]" stands for
"the first argument of the item": it's an Item_field which automatically
receives the row's value when a row is read from the table).
bool Item_sum_avg::add()
{
if (Item_sum_sum::add()) << calculates the sum (ignores NULL)
return TRUE;
if (!args[0]->null_value)<<if added value is not NULL
count++; <<increment "count"
return FALSE;
}
and everything works.
With DISTINCT, when a row is processed by evaluate_join_record(),
Item_sum_avg does no immediate computation, rather stores
the row's value in a tree (to throw the value away if it is a duplicate
of previous value, otherwise to remember all
distinct values). It's only when it's time to send the average to the
user (at end of the query:
sub_select(end_of_records=true)->end_send_group()->
select_send->send_data()->Protocol::send_result_set_row()->
Item::send()->Item_sum_avg->val_str()), that we iterate over the tree,
compute the sum and count: for this, for each element of the tree,
Item_sum_avg::add() is called and has the same two steps as before:
* Item_sum_sum::add() updates the sum (finding the tree element's value
correctly, and determining correctly its NULLness - look for "arg_is_null"
in that function)
* the "if (!args[0]->null_value)" test right after, breaks: it uses args[0], which
isn't the tree's element but rather the value for the last row processed
by evaluate_join_record(). So if that last row was NULL, "count" stays 0
for each row, and AVG() then returns NULL (count==0 => NULL, per AVG() semantics).
The fix is to use the correct NULLness in the if(), which is as determined by
Item_sum_sum::add().
We do this by moving Item_sum_avg::count to Item_sum_sum, and letting
Item_sum_sum::add() use its local "arg_is_null" to correctly increment "count".
modified:
mysql-test/r/func_group.result
mysql-test/t/func_group.test
sql/item_sum.cc
sql/item_sum.h
=== modified file 'mysql-test/r/func_group.result'
--- a/mysql-test/r/func_group.result 2010-09-10 18:48:13 +0000
+++ b/mysql-test/r/func_group.result 2010-12-01 10:35:31 +0000
@@ -1746,3 +1746,18 @@ MAX(c1) MIN(c1)
-00:00:01 -00:00:01
DROP TABLE t1;
# End of the bug#56120
+#
+# Bug#57932 "query with AVG(DISTINCT) returns NULL if last
+# aggregated value was NULL"
+#
+CREATE TABLE t1 (col_int_nokey int(11));
+INSERT INTO t1 VALUES (7),(8),(NULL);
+SELECT AVG(DISTINCT col_int_nokey) FROM t1;
+AVG(DISTINCT col_int_nokey)
+7.5000
+SELECT AVG(DISTINCT outr.col_int_nokey) FROM t1 AS outr LEFT JOIN t1 AS outr2 ON
+outr.col_int_nokey = outr2.col_int_nokey;
+AVG(DISTINCT outr.col_int_nokey)
+7.5000
+DROP TABLE t1;
+# End of the bug#56120
=== modified file 'mysql-test/t/func_group.test'
--- a/mysql-test/t/func_group.test 2010-09-10 18:48:13 +0000
+++ b/mysql-test/t/func_group.test 2010-12-01 10:35:31 +0000
@@ -1118,3 +1118,14 @@ SELECT MAX(c1),MIN(c1) FROM t1;
DROP TABLE t1;
--echo # End of the bug#56120
+--echo #
+--echo # Bug#57932 "query with AVG(DISTINCT) returns NULL if last
+--echo # aggregated value was NULL"
+--echo #
+CREATE TABLE t1 (col_int_nokey int(11));
+INSERT INTO t1 VALUES (7),(8),(NULL);
+SELECT AVG(DISTINCT col_int_nokey) FROM t1;
+SELECT AVG(DISTINCT outr.col_int_nokey) FROM t1 AS outr LEFT JOIN t1 AS outr2 ON
+outr.col_int_nokey = outr2.col_int_nokey;
+DROP TABLE t1;
+--echo # End of the bug#56120
=== modified file 'sql/item_sum.cc'
--- a/sql/item_sum.cc 2010-09-02 13:57:59 +0000
+++ b/sql/item_sum.cc 2010-12-01 10:35:31 +0000
@@ -1249,7 +1249,7 @@ Field *Item_sum_hybrid::create_tmp_field
check if the following assignments are really needed
*/
Item_sum_sum::Item_sum_sum(THD *thd, Item_sum_sum *item)
- :Item_sum_num(thd, item), hybrid_type(item->hybrid_type),
+ :Item_sum_num(thd, item), hybrid_type(item->hybrid_type), count(item->count),
curr_dec_buff(item->curr_dec_buff)
{
/* TODO: check if the following assignments are really needed */
@@ -1279,6 +1279,7 @@ void Item_sum_sum::clear()
}
else
sum= 0.0;
+ count= 0;
DBUG_VOID_RETURN;
}
@@ -1351,6 +1352,7 @@ bool Item_sum_sum::add()
val, dec_buffs + curr_dec_buff);
curr_dec_buff^= 1;
null_value= 0;
+ count++;
}
}
else
@@ -1374,7 +1376,10 @@ bool Item_sum_sum::add()
}
sum+= val;
if (!arg_is_null)
+ {
null_value= 0;
+ count++;
+ }
}
DBUG_RETURN(0);
}
@@ -1565,22 +1570,6 @@ Field *Item_sum_avg::create_tmp_field(bo
}
-void Item_sum_avg::clear()
-{
- Item_sum_sum::clear();
- count=0;
-}
-
-
-bool Item_sum_avg::add()
-{
- if (Item_sum_sum::add())
- return TRUE;
- if (!args[0]->null_value)
- count++;
- return FALSE;
-}
-
double Item_sum_avg::val_real()
{
DBUG_ASSERT(fixed == 1);
=== modified file 'sql/item_sum.h'
--- a/sql/item_sum.h 2010-08-30 07:36:04 +0000
+++ b/sql/item_sum.h 2010-12-01 10:35:31 +0000
@@ -677,12 +677,14 @@ class Item_sum_sum :public Item_sum_num
protected:
Item_result hybrid_type;
double sum;
+ ulonglong count; ///< count of non-NULL [optionally DISTINCT] values
my_decimal dec_buffs[2];
uint curr_dec_buff;
void fix_length_and_dec();
public:
- Item_sum_sum(Item *item_par, bool distinct) :Item_sum_num(item_par)
+ Item_sum_sum(Item *item_par, bool distinct) :Item_sum_num(item_par),
+ count(0)
{
set_distinct(distinct);
}
@@ -706,6 +708,11 @@ public:
return has_with_distinct() ? "sum(distinct " : "sum(";
}
Item *copy_or_same(THD* thd);
+ void cleanup()
+ {
+ count= 0;
+ Item_sum_num::cleanup();
+ }
};
@@ -793,24 +800,19 @@ public:
class Item_sum_avg :public Item_sum_sum
{
public:
- ulonglong count;
uint prec_increment;
uint f_precision, f_scale, dec_bin_size;
- Item_sum_avg(Item *item_par, bool distinct)
- :Item_sum_sum(item_par, distinct), count(0)
+ Item_sum_avg(Item *item_par, bool distinct) : Item_sum_sum(item_par, distinct)
{}
Item_sum_avg(THD *thd, Item_sum_avg *item)
- :Item_sum_sum(thd, item), count(item->count),
- prec_increment(item->prec_increment) {}
+ : Item_sum_sum(thd, item), prec_increment(item->prec_increment) {}
void fix_length_and_dec();
enum Sumfunctype sum_func () const
{
return has_with_distinct() ? AVG_DISTINCT_FUNC : AVG_FUNC;
}
- void clear();
- bool add();
double val_real();
// In SPs we might force the "wrong" type with select into a declare variable
longlong val_int() { return (longlong) rint(val_real()); }
@@ -827,11 +829,6 @@ public:
}
Item *copy_or_same(THD* thd);
Field *create_tmp_field(bool group, TABLE *table, uint convert_blob_length);
- void cleanup()
- {
- count= 0;
- Item_sum_sum::cleanup();
- }
};
class Item_sum_variance;
Attachment: [text/bzr-bundle] bzr/guilhem@mysql.com-20101201103531-wvfg60qf52sauln4.bundle
| Thread |
|---|
| • bzr commit into mysql-5.5-bugteam branch (guilhem:3125) Bug#57932 | Guilhem Bichot | 1 Dec |