Below is the list of changes that have just been committed into a local
6.0 repository of kgeorge. When kgeorge does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-12-06 15:46:49+02:00, gkodinov@stripped +12 -0
WL#3220: Loose index scan for aggregate functions
Currently MySQL executes COUNT(DISTINCT expr1, ..., exprN) by executing
a table or index scan that fills a temporary data structure (table or
tree) with all distinct values, and then returning the count of the
distinct values in that temporary data structure.
In cases when there are applicable indexes it is possible to execute
such queries much faster via the "loose index scan" technique employed
for execution of GROUP BY and DISTINCT queries, as implemented by
WL#1724.
Covered COUNT(DISTINCT), AVG(DISTINCT) and SUM(DISTINCT).
mysql-test/r/bench_count_distinct.result@stripped, 2007-12-06 15:46:45+02:00,
gkodinov@stripped +1 -1
WL#3220: Loose index scan for aggregate functions
- testsuite updated
mysql-test/r/group_min_max.result@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped
+47 -2
WL#3220: Loose index scan for aggregate functions
- testsuite updated
- added test cases of the new functionality
mysql-test/t/group_min_max.test@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped +34
-0
WL#3220: Loose index scan for aggregate functions
- testsuite updated
- added test cases of the new functionality
sql/field.h@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped +6 -3
WL3220: extended the fix for bug 30219
sql/item_sum.cc@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped +651 -594
WL#3220: Loose index scan for aggregate functions
- implementation and usage of the new Aggregator classes
- united the Item_sum_distinct and Item_sum_count_distinct
- caching of values inside Item_sum for repetative val_xxx()
calls.
sql/item_sum.h@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped +288 -192
WL#3220: Loose index scan for aggregate functions
- implementation and usage of the new Aggregator classes
- united the Item_sum_distinct and Item_sum_count_distinct
- caching of values inside Item_sum for repetative val_xxx()
calls.
sql/opt_range.cc@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped +73 -53
WL#3220: Loose index scan for aggregate functions
- proper selection of loose index scan for aggregate distinct
functions
sql/opt_range.h@stripped, 2007-12-06 15:46:45+02:00, gkodinov@stripped +4 -1
WL#3220: Loose index scan for aggregate functions
- proper selection of loose index scan for aggregate distinct
functions
sql/opt_sum.cc@stripped, 2007-12-06 15:46:46+02:00, gkodinov@stripped +8 -2
L#3220: Loose index scan for aggregate functions
- call new Aggregator classes
sql/sql_select.cc@stripped, 2007-12-06 15:46:46+02:00, gkodinov@stripped +112 -4
WL#3220: Loose index scan for aggregate functions
- use the new Aggregator classes
sql/sql_select.h@stripped, 2007-12-06 15:46:46+02:00, gkodinov@stripped +7 -0
WL#3220: Loose index scan for aggregate functions
- proper selection of loose index scan for aggregate distinct
functions
sql/sql_yacc.yy@stripped, 2007-12-06 15:46:46+02:00, gkodinov@stripped +3 -3
WL#3220: Loose index scan for aggregate functions
- United the Item_sum_distinct and Item_sum_count_distinct.
- distinct is not a separate class anymore.
diff -Nrup a/mysql-test/r/bench_count_distinct.result
b/mysql-test/r/bench_count_distinct.result
--- a/mysql-test/r/bench_count_distinct.result 2006-07-28 20:26:55 +03:00
+++ b/mysql-test/r/bench_count_distinct.result 2007-12-06 15:46:45 +02:00
@@ -5,7 +5,7 @@ count(distinct n)
100
explain extended select count(distinct n) from t1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 index NULL n 4 NULL 200 100.00 Using index
+1 SIMPLE t1 range NULL n 4 NULL 10 100.00 Using index for group-by
Warnings:
Note 1003 select count(distinct `test`.`t1`.`n`) AS `count(distinct n)` from `test`.`t1`
drop table t1;
diff -Nrup a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result
--- a/mysql-test/r/group_min_max.result 2007-11-26 16:21:52 +02:00
+++ b/mysql-test/r/group_min_max.result 2007-12-06 15:46:45 +02:00
@@ -1800,13 +1800,13 @@ b
a
explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using where; Using index
+1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by
explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and
(c = 'i121');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL idx_t1_1 163 NULL 128 Using where; Using index
explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 >
'a') and (b = 'c');
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 index idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_2 147 NULL 128 75.00 Using where;
Using index
+1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 14 100.00 Using where;
Using index for group-by
Warnings:
Note 1003 select count(distinct `test`.`t1`.`a1`,`test`.`t1`.`a2`,`test`.`t1`.`b`) AS
`count(distinct a1,a2,b)` from `test`.`t1` where ((`test`.`t1`.`b` = _latin1'c') and
(`test`.`t1`.`a1` > _latin1'a') and (`test`.`t1`.`a2` > _latin1'a'))
explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
@@ -2371,4 +2371,49 @@ a MIN(b) MAX(b) AVG(b)
3 1 3 2.0000
2 1 3 2.0000
1 1 3 2.0000
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b));
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 5 NULL 9 Using index for group-by
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 10 NULL 9 Using index for group-by
+SELECT COUNT(DISTINCT a,b) FROM t1;
+COUNT(DISTINCT a,b)
+16
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range NULL a 10 NULL 9 Using index for group-by
+SELECT COUNT(DISTINCT b,a) FROM t1;
+COUNT(DISTINCT b,a)
+16
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 16 Using index
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 16 Using index
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 16 Using index
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 16 Using index
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 having COUNT(DISTINCT b) < 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index NULL a 10 NULL 16 Using index
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1_1 index NULL a 10 NULL 16 Using index; Using temporary; Using filesort
+1 SIMPLE t1_2 index NULL a 10 NULL 16 Using index; Using join buffer
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+COUNT(DISTINCT a) 12
+2 12
DROP TABLE t1;
diff -Nrup a/mysql-test/t/group_min_max.test b/mysql-test/t/group_min_max.test
--- a/mysql-test/t/group_min_max.test 2007-11-26 13:33:34 +02:00
+++ b/mysql-test/t/group_min_max.test 2007-12-06 15:46:45 +02:00
@@ -942,3 +942,37 @@ SELECT a, MIN(b), MAX(b), AVG(b) FROM t1
SELECT a, MIN(b), MAX(b), AVG(b) FROM t1 GROUP BY a ORDER BY a DESC;
DROP TABLE t1;
+
+#
+# WL#3220 (Loose index scan for COUNT DISTINCT)
+#
+
+CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b));
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+SELECT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+SELECT COUNT(DISTINCT a,b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+SELECT COUNT(DISTINCT b,a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 having COUNT(DISTINCT b) < 10;
+
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+
+DROP TABLE t1;
diff -Nrup a/sql/field.h b/sql/field.h
--- a/sql/field.h 2007-11-19 10:45:26 +02:00
+++ b/sql/field.h 2007-12-06 15:46:45 +02:00
@@ -1684,9 +1684,12 @@ public:
virtual bool str_needs_quotes() { return TRUE; }
my_decimal *val_decimal(my_decimal *);
int cmp(const uchar *a, const uchar *b)
- {
- DBUG_ASSERT(ptr == a);
- return Field_bit::key_cmp(b, bytes_in_rec+test(bit_len));
+ {
+ DBUG_ASSERT(ptr == a || ptr == b);
+ if (ptr == a)
+ return Field_bit::key_cmp(b, bytes_in_rec+test(bit_len));
+ else
+ return Field_bit::key_cmp(a, bytes_in_rec+test(bit_len)) * -1;
}
int cmp_binary_offset(uint row_offset)
{ return cmp_offset(row_offset); }
diff -Nrup a/sql/item_sum.cc b/sql/item_sum.cc
--- a/sql/item_sum.cc 2007-10-29 12:54:19 +02:00
+++ b/sql/item_sum.cc 2007-12-06 15:46:45 +02:00
@@ -61,6 +61,7 @@ bool Item_sum::init_sum_func_check(THD *
/* Save a pointer to object to be used in items for nested set functions */
thd->lex->in_sum_func= this;
nest_level= thd->lex->current_select->nest_level;
+ nest_level_tables_count= thd->lex->current_select->join->tables;
ref_by= 0;
aggr_level= -1;
aggr_sel= NULL;
@@ -307,6 +308,7 @@ Item_sum::Item_sum(List<Item> &list) :ar
}
}
mark_as_sum_func();
+ init_aggregator();
list.empty(); // Fields are used
}
@@ -320,7 +322,8 @@ Item_sum::Item_sum(THD *thd, Item_sum *i
aggr_sel(item->aggr_sel),
nest_level(item->nest_level), aggr_level(item->aggr_level),
quick_group(item->quick_group), used_tables_cache(item->used_tables_cache),
- forced_const(item->forced_const)
+ forced_const(item->forced_const),
+ nest_level_tables_count(item->nest_level_tables_count)
{
if (arg_count <= 2)
args=tmp_args;
@@ -328,6 +331,10 @@ Item_sum::Item_sum(THD *thd, Item_sum *i
if (!(args= (Item**) thd->alloc(sizeof(Item*)*arg_count)))
return;
memcpy(args, item->args, sizeof(Item*)*arg_count);
+ init_aggregator();
+ with_distinct= item->with_distinct;
+ if (item->aggr)
+ set_aggregator(item->aggr->Aggrtype());
}
@@ -472,6 +479,558 @@ void Item_sum::update_used_tables ()
}
+int Item_sum::set_aggregator(Aggregator::Aggregator_type aggregator)
+{
+ switch (aggregator)
+ {
+ case Aggregator::DISTINCT_AGGREGATOR:
+ aggr= new Aggregator_distinct(this);
+ break;
+
+ case Aggregator::SIMPLE_AGGREGATOR:
+ aggr= new Aggregator_simple(this);
+ break;
+ };
+ return aggr ? FALSE : TRUE;
+}
+
+
+void Item_sum::cleanup()
+{
+ if (aggr)
+ {
+ delete aggr;
+ aggr= NULL;
+ }
+ Item_result_field::cleanup();
+ forced_const= FALSE;
+}
+
+
+/*
+ compare an entire key as array of binary bytes
+
+ SYNOPSIS
+ simple_raw_key_cmp()
+ arg Points to uint (Aggregator_distinct::tree_key_length)
+ key1 left key image
+ key2 right key image
+
+ DESCRIPTION
+ Used by the Unique class to compare keys. Will do binary comparisons.
+
+ RETURN VALUE
+ < 0 if key1 < key2
+ = 0 if key1 = key2
+ > 0 if key1 > key2
+*/
+static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
+{
+ return memcmp(key1, key2, *(uint *) arg);
+}
+
+
+/*
+ compare keys consisting of single field that cannot be compared as binary
+
+ SYNOPSIS
+ simple_str_key_cmp()
+ arg Pointer to the relevant Field class instance
+ key1 left key image
+ key2 right key image
+
+ DESCRIPTION
+ Used by the Unique class to compare keys. Will do correct comparisons
+ for all field types.
+
+ RETURN VALUE
+ < 0 if key1 < key2
+ = 0 if key1 = key2
+ > 0 if key1 > key2
+*/
+static int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2)
+{
+ Field *f= (Field*) arg;
+ return f->cmp(key1, key2);
+}
+
+
+/*
+ correctly compare composite keys
+
+ SYNOPSIS
+ composite_key_cmp()
+ arg Pointer to the relevant Aggregator_distinct instance
+ key1 left key image
+ key2 right key image
+
+ DESCRIPTION
+ Used by the Unique class to compare keys. Will do correct comparisons
+ for composite keys with various field types.
+
+ RETURN VALUE
+ < 0 if key1 < key2
+ = 0 if key1 = key2
+ > 0 if key1 > key2
+*/
+int Aggregator_distinct::composite_key_cmp(void* arg, uchar* key1, uchar* key2)
+{
+ Aggregator_distinct *aggr= (Aggregator_distinct *) arg;
+ Field **field = aggr->table->field;
+ Field **field_end= field + aggr->table->s->fields;
+ uint32 *lengths=aggr->field_lengths;
+ for (; field < field_end; ++field)
+ {
+ Field* f = *field;
+ int len = *lengths++;
+ int res = f->cmp(key1, key2);
+ if (res)
+ return res;
+ key1 += len;
+ key2 += len;
+ }
+ return 0;
+}
+
+static enum enum_field_types
+calc_tmp_field_type(enum enum_field_types table_field_type,
+ Item_result result_type)
+{
+ /* Adjust tmp table type according to the chosen aggregation type */
+ switch (result_type) {
+ case STRING_RESULT:
+ case REAL_RESULT:
+ if (table_field_type != MYSQL_TYPE_FLOAT)
+ table_field_type= MYSQL_TYPE_DOUBLE;
+ break;
+ case INT_RESULT:
+ table_field_type= MYSQL_TYPE_LONGLONG;
+ /* fallthrough */
+ case DECIMAL_RESULT:
+ if (table_field_type != MYSQL_TYPE_LONGLONG)
+ table_field_type= MYSQL_TYPE_NEWDECIMAL;
+ break;
+ case ROW_RESULT:
+ default:
+ DBUG_ASSERT(0);
+ }
+ return table_field_type;
+}
+
+
+/*
+ called before feeding the first row. Used to allocate/setup
+ the internal structures used for aggregation.
+
+ SYNOPSIS
+ setup()
+ thd Thread descriptor
+
+ DESCRIPTION
+ Prepares Aggregator_distinct to process the incoming stream.
+ Creates the temporary table and the Unique class if needed.
+ Called by Item_sum::aggregator_setup()
+
+ RETURN VALUE
+ FALSE success
+ TRUE failure
+*/
+bool Aggregator_distinct::setup(THD *thd)
+{
+ endup_done= FALSE;
+ /*
+ Setup can be called twice for ROLLUP items. This is a bug.
+ Please add DBUG_ASSERT(tree == 0) here when it's fixed.
+ */
+ if (tree || table || tmp_table_param)
+ return FALSE;
+
+ if (item_sum->setup(thd))
+ return TRUE;
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ List<Item> list;
+ SELECT_LEX *select_lex= thd->lex->current_select;
+
+ if (!(tmp_table_param= new TMP_TABLE_PARAM))
+ return TRUE;
+
+ /* Create a table with an unique key over all parameters */
+ for (uint i=0; i < item_sum->arg_count ; i++)
+ {
+ Item *item=item_sum->args[i];
+ if (list.push_back(item))
+ return TRUE; // End of memory
+ if (item->const_item() && item->is_null())
+ always_null=1;
+ }
+ if (always_null)
+ return FALSE;
+ count_field_types(select_lex,tmp_table_param,list,0);
+ tmp_table_param->force_copy_fields= item_sum->force_copy_fields;
+ DBUG_ASSERT(table == 0);
+ /*
+ Make create_tmp_table() convert BIT columns to BIGINT.
+ This is needed because BIT fields store parts of their data in
+ table's
+ null bits, and we don't have methods to compare two table
+ records, which
+ is needed by Unique which is used when HEAP table is
+ used.
+ */
+ {
+ List_iterator_fast<Item> li(list);
+ Item *item;
+ while ((item= li++))
+ {
+ if (item->type() == Item::FIELD_ITEM &&
+ ((Item_field*)item)->field->type() == FIELD_TYPE_BIT)
+ item->marker=4;
+ }
+ }
+ if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
+ 0,
+ (select_lex->options | thd->options),
+ HA_POS_ERROR, (char*)"")))
+ return TRUE;
+ table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
+ table->no_rows=1;
+
+ if (table->s->db_type() == heap_hton)
+ {
+ /*
+ No blobs, otherwise it would have been MyISAM: set up a compare
+ function and its arguments to use with Unique.
+ */
+ qsort_cmp2 compare_key;
+ void* cmp_arg;
+ Field **field= table->field;
+ Field **field_end= field + table->s->fields;
+ bool all_binary= TRUE;
+
+ for (tree_key_length= 0; field < field_end; ++field)
+ {
+ Field *f= *field;
+ enum enum_field_types type= f->type();
+ tree_key_length+= f->pack_length();
+ if ((type == MYSQL_TYPE_VARCHAR) ||
+ !f->binary() && (type == MYSQL_TYPE_STRING ||
+ type == MYSQL_TYPE_VAR_STRING))
+ {
+ all_binary= FALSE;
+ break;
+ }
+ }
+ if (all_binary)
+ {
+ cmp_arg= (void*) &tree_key_length;
+ compare_key= (qsort_cmp2) simple_raw_key_cmp;
+ }
+ else
+ {
+ if (table->s->fields == 1)
+ {
+ /*
+ If we have only one field, which is the most common use of
+ count(distinct), it is much faster to use a simpler key
+ compare method that can take advantage of not having to worry
+ about other fields.
+ */
+ compare_key= (qsort_cmp2) simple_str_key_cmp;
+ cmp_arg= (void*) table->field[0];
+ /* tree_key_length has been set already */
+ }
+ else
+ {
+ uint32 *length;
+ compare_key= (qsort_cmp2) composite_key_cmp;
+ cmp_arg= (void*) this;
+ field_lengths= (uint32*) thd->alloc(table->s->fields *
sizeof(uint32));
+ for (tree_key_length= 0, length= field_lengths, field= table->field;
+ field < field_end; ++field, ++length)
+ {
+ *length= (*field)->pack_length();
+ tree_key_length+= *length;
+ }
+ }
+ }
+ DBUG_ASSERT(tree == 0);
+ tree= new Unique(compare_key, cmp_arg, tree_key_length,
+ thd->variables.max_heap_table_size);
+ /*
+ The only time tree_key_length could be 0 is if someone does
+ count(distinct) on a char(0) field - stupid thing to do,
+ but this has to be handled - otherwise someone can crash
+ the server with a DoS attack
+ */
+ if (! tree)
+ return TRUE;
+ }
+ return FALSE;
+ }
+ else
+ {
+ List<Create_field> field_list;
+ Create_field field_def; /* field definition */
+ DBUG_ENTER("Item_sum_distinct::setup");
+ DBUG_ASSERT(tree == 0);
+
+ /*
+ Virtual table and the tree are created anew on each re-execution of
+ PS/SP. Hence all further allocations are performed in the runtime
+ mem_root.
+ */
+ if (field_list.push_back(&field_def))
+ return TRUE;
+
+ item_sum->null_value= item_sum->maybe_null= 1;
+ item_sum->quick_group= 0;
+
+ DBUG_ASSERT(item_sum->args[0]->fixed);
+
+ if (item_sum->args[0]->const_item())
+ {
+ (void) item_sum->args[0]->val_int();
+ if (item_sum->args[0]->null_value)
+ always_null=1;
+ }
+
+ if (always_null)
+ return FALSE;
+
+ enum enum_field_types field_type;
+
+ field_type= calc_tmp_field_type(item_sum->args[0]->field_type(),
+ item_sum->args[0]->result_type());
+ field_def.init_for_tmp_table(field_type,
+ item_sum->args[0]->max_length,
+ item_sum->args[0]->decimals,
+ item_sum->args[0]->maybe_null,
+ item_sum->args[0]->unsigned_flag);
+
+ if (! (table= create_virtual_tmp_table(thd, field_list)))
+ return TRUE;
+
+ /* XXX: check that the case of CHAR(0) works OK */
+ tree_key_length= table->s->reclength - table->s->null_bytes;
+
+ /*
+ Unique handles all unique elements in a tree until they can't fit
+ in. Then the tree is dumped to the temporary file. We can use
+ simple_raw_key_cmp because the table contains numbers only; decimals
+ are converted to binary representation as well.
+ */
+ tree= new Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length,
+ thd->variables.max_heap_table_size);
+
+ DBUG_RETURN(tree == 0);
+ }
+}
+
+
+/*
+ Invalidate calculated value and clear the distinct rows
+
+ SYNOPSIS
+ clear()
+
+ DESCRIPTION
+ Frees space used by the internal data structures.
+ Removes the accumulated distinct rows. Invalidates the calculated result.
+*/
+void Aggregator_distinct::clear()
+{
+ endup_done= FALSE;
+ item_sum->clear();
+ if (tree)
+ tree->reset();
+ /* tree and table can be both null only if always_null */
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ if (!tree && table)
+ {
+ table->file->extra(HA_EXTRA_NO_CACHE);
+ table->file->delete_all_rows();
+ table->file->extra(HA_EXTRA_WRITE_CACHE);
+ }
+ }
+ else
+ {
+ item_sum->null_value= 1;
+ }
+}
+
+
+/*
+ Process incoming row. Add it to Unique/temp hash table if it's unique.
+ Skip the row if not unique.
+
+ SYNOPSIS
+ setup()
+
+ DESCRIPTION
+ Prepares Aggregator_distinct to process the incoming stream.
+ Creates the temporary table and the Unique class if needed.
+ Called by Item_sum::aggregator_add().
+ To actually get the result value in item_sum's buffers
+ Aggregator_distinct::endup() must be called.
+
+ RETURN VALUE
+ FALSE success
+ TRUE failure
+*/
+bool Aggregator_distinct::add()
+{
+ if (always_null)
+ return 0;
+
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ int error;
+ copy_fields(tmp_table_param);
+ copy_funcs(tmp_table_param->items_to_copy);
+
+ for (Field **field=table->field ; *field ; field++)
+ if ((*field)->is_real_null(0))
+ return 0; // Don't count NULL
+
+ if (tree)
+ {
+ /*
+ The first few bytes of record (at least one) are just markers
+ for deleted and NULLs. We want to skip them since they will
+ bloat the tree without providing any valuable info. Besides,
+ key_length used to initialize the tree didn't include space for them.
+ */
+ return tree->unique_add(table->record[0] + table->s->null_bytes);
+ }
+ if ((error= table->file->ha_write_row(table->record[0])) &&
+ table->file->is_fatal_error(error, HA_CHECK_DUP))
+ return TRUE;
+ return FALSE;
+ }
+ else
+ {
+ item_sum->args[0]->save_in_field(table->field[0], FALSE);
+ if (table->field[0]->is_null())
+ return 0;
+ DBUG_ASSERT(tree);
+ item_sum->null_value= 0;
+ /*
+ '0' values are also stored in the tree. This doesn't matter
+ for SUM(DISTINCT), but is important for AVG(DISTINCT)
+ */
+ return tree->unique_add(table->field[0]->ptr);
+ }
+}
+
+
+/*
+ Aggregate a distinct row from the distinct hash table
+
+ SYNOPSIS
+ setup()
+ element pointer to the row data.
+ num_of_dups number of rows in the input data with the same key
+ item pointer to to relevant Aggregator_distinct instance
+
+ DESCRIPTION
+ This is a wrapper static function that passes control to
+ Aggregator_distinct::unique_walk_function().
+ Called for each row into the hash table 'Aggregator_distinct::table'.
+ Calls Aggregator_distinct::unique_walk_function() member function
+ to include the current distinct row into the calculation of the
+ aggregate value.
+ This function is used for AVG/SUM(DISTINCT). For COUNT(DISTINCT)
+ it's called only when there are no blob arguments and the data don't
+ fit into memory (so Unique makes persisted trees on disk).
+
+ RETURN VALUE
+ FALSE success
+ TRUE failure
+*/
+static int item_sum_distinct_walk(void *element, element_count num_of_dups,
+ void *item)
+{
+ Aggregator_distinct *daggr= (Aggregator_distinct *)item;
+ DBUG_ASSERT(daggr->Aggrtype() == Aggregator::DISTINCT_AGGREGATOR);
+ return daggr->unique_walk_function(element);
+}
+
+
+/*
+ Calculate the aggregate function value
+
+ SYNOPSIS
+ endup()
+
+ DESCRIPTION
+ Since Distinct_aggregator::add() just collects the distinct rows,
+ we must go over the distinct rows and feed them to the aggregation
+ function before returning its value.
+ This is what endup () does. It also sets the result validity flag
+ endup_done to TRUE so it will not recalculate the aggregate value
+ again if the Item_sum hasn't been reset.
+
+ RETURN VALUE
+*/
+void Aggregator_distinct::endup()
+{
+ /* prevent consecutive recalculations */
+ if (endup_done)
+ return;
+
+ /* we are going to calculate the aggregate value afresh */
+ item_sum->clear();
+
+ /* The result will definitely be null : no more calculations needed */
+ if (always_null)
+ return;
+
+ if (item_sum->sum_func() == Item_sum::COUNT_FUNC ||
+ item_sum->sum_func() == Item_sum::COUNT_DISTINCT_FUNC)
+ {
+ DBUG_ASSERT(item_sum->fixed == 1);
+ Item_sum_count *sum= (Item_sum_count *)item_sum;
+ if (tree && tree->elements == 0)
+ {
+ /* everything fits in memory */
+ sum->count= (longlong) tree->elements_in_tree();
+ endup_done= TRUE;
+ }
+ if (!tree)
+ {
+ /* there were blobs */
+ table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
+ sum->count= table->file->stats.records;
+ endup_done= TRUE;
+ }
+ }
+ else
+ {
+ /*
+ We don't have a tree only if 'setup()' hasn't been called;
+ this is the case of sql_select.cc:return_zero_rows.
+ */
+ if (tree)
+ table->field[0]->set_notnull();
+ }
+
+ if (tree && !endup_done)
+ {
+ /* go over the tree of distinct keys and calculate the aggregate value */
+ use_distinct_values= TRUE;
+ tree->walk(item_sum_distinct_walk, (void*) this);
+ use_distinct_values= FALSE;
+ }
+ /* prevent consecutive recalculations */
+ endup_done= TRUE;
+}
+
+
String *
Item_sum_num::val_str(String *str)
{
@@ -731,10 +1290,27 @@ void Item_sum_sum::fix_length_and_dec()
bool Item_sum_sum::add()
{
DBUG_ENTER("Item_sum_sum::add");
+ bool arg_is_null;
if (hybrid_type == DECIMAL_RESULT)
{
- my_decimal value, *val= args[0]->val_decimal(&value);
- if (!args[0]->null_value)
+ my_decimal value, *val;
+ if (aggr->use_distinct_values)
+ {
+ /*
+ We are aggregating distinct rows. Get the value from the distinct
+ table pointer
+ */
+ Aggregator_distinct *daggr= (Aggregator_distinct *)aggr;
+ val= daggr->table->field[0]->val_decimal (&value);
+ arg_is_null= daggr->table->field[0]->is_null();
+ }
+ else
+ {
+ /* non-distinct aggregation */
+ val= args[0]->val_decimal(&value);
+ arg_is_null= args[0]->null_value;
+ }
+ if (!arg_is_null)
{
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
val, dec_buffs + curr_dec_buff);
@@ -744,8 +1320,25 @@ bool Item_sum_sum::add()
}
else
{
- sum+= args[0]->val_real();
- if (!args[0]->null_value)
+ double val;
+ if (aggr->use_distinct_values)
+ {
+ /*
+ We are aggregating distinct rows. Get the value from the distinct
+ table pointer
+ */
+ Aggregator_distinct *daggr= (Aggregator_distinct *)aggr;
+ val= daggr->table->field[0]->val_real ();
+ arg_is_null= daggr->table->field[0]->is_null();
+ }
+ else
+ {
+ /* non-distinct aggregation */
+ val= args[0]->val_real();
+ arg_is_null= args[0]->null_value;
+ }
+ sum+= val;
+ if (!arg_is_null)
null_value= 0;
}
DBUG_RETURN(0);
@@ -755,6 +1348,8 @@ bool Item_sum_sum::add()
longlong Item_sum_sum::val_int()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
{
longlong result;
@@ -769,6 +1364,8 @@ longlong Item_sum_sum::val_int()
double Item_sum_sum::val_real()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
my_decimal2double(E_DEC_FATAL_ERROR, dec_buffs + curr_dec_buff, &sum);
return sum;
@@ -777,6 +1374,8 @@ double Item_sum_sum::val_real()
String *Item_sum_sum::val_str(String *str)
{
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
@@ -785,6 +1384,8 @@ String *Item_sum_sum::val_str(String *st
my_decimal *Item_sum_sum::val_decimal(my_decimal *val)
{
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
return (dec_buffs + curr_dec_buff);
return val_decimal_from_real(val);
@@ -792,300 +1393,50 @@ my_decimal *Item_sum_sum::val_decimal(my
/***************************************************************************/
-C_MODE_START
-
-/* Declarations for auxilary C-callbacks */
-
-static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
-{
- return memcmp(key1, key2, *(uint *) arg);
-}
-
-
-static int item_sum_distinct_walk(void *element, element_count num_of_dups,
- void *item)
-{
- return ((Item_sum_distinct*) (item))->unique_walk_function(element);
-}
-
-C_MODE_END
-
-/* Item_sum_distinct */
-
-Item_sum_distinct::Item_sum_distinct(Item *item_arg)
- :Item_sum_num(item_arg), tree(0)
-{
- /*
- quick_group is an optimizer hint, which means that GROUP BY can be
- handled with help of index on grouped columns.
- By setting quick_group to zero we force creation of temporary table
- to perform GROUP BY.
- */
- quick_group= 0;
-}
-
-
-Item_sum_distinct::Item_sum_distinct(THD *thd, Item_sum_distinct *original)
- :Item_sum_num(thd, original), val(original->val), tree(0),
- table_field_type(original->table_field_type)
-{
- quick_group= 0;
-}
-
-
/*
- Behaves like an Integer except to fix_length_and_dec().
- Additionally div() converts val with this traits to a val with true
- decimal traits along with conversion of integer value to decimal value.
- This is to speedup SUM/AVG(DISTINCT) evaluation for 8-32 bit integer
- values.
-*/
-struct Hybrid_type_traits_fast_decimal: public
- Hybrid_type_traits_integer
-{
- virtual Item_result type() const { return DECIMAL_RESULT; }
- virtual void fix_length_and_dec(Item *item, Item *arg) const
- { Hybrid_type_traits_decimal::instance()->fix_length_and_dec(item, arg); }
-
- virtual void div(Hybrid_type *val, ulonglong u) const
- {
- int2my_decimal(E_DEC_FATAL_ERROR, val->integer, 0, val->dec_buf);
- val->used_dec_buf_no= 0;
- val->traits= Hybrid_type_traits_decimal::instance();
- val->traits->div(val, u);
- }
- static const Hybrid_type_traits_fast_decimal *instance();
- Hybrid_type_traits_fast_decimal() {};
-};
-
-static const Hybrid_type_traits_fast_decimal fast_decimal_traits_instance;
+ Aggregate a distinct row from the distinct hash table
+
+ SYNOPSIS
+ unique_walk_function()
+ element pointer to the row data.
-const Hybrid_type_traits_fast_decimal
- *Hybrid_type_traits_fast_decimal::instance()
-{
- return &fast_decimal_traits_instance;
-}
+ DESCRIPTION
+ Called for each row into the hash table 'Aggregator_distinct::table'.
+ Includes the current distinct row into the calculation of the
+ aggregate value. Uses the Field classes to get the value from the row.
+ This function is used for AVG/SUM(DISTINCT). For COUNT(DISTINCT)
+ it's called only when there are no blob arguments and the data don't
+ fit into memory (so Unique makes persisted trees on disk).
+
+ RETURN VALUE
+ FALSE success
+ TRUE failure
+*/
-void Item_sum_distinct::fix_length_and_dec()
+bool Aggregator_distinct::unique_walk_function(void *element)
{
- DBUG_ASSERT(args[0]->fixed);
-
- table_field_type= args[0]->field_type();
-
- /* Adjust tmp table type according to the chosen aggregation type */
- switch (args[0]->result_type()) {
- case STRING_RESULT:
- case REAL_RESULT:
- val.traits= Hybrid_type_traits::instance();
- if (table_field_type != MYSQL_TYPE_FLOAT)
- table_field_type= MYSQL_TYPE_DOUBLE;
- break;
- case INT_RESULT:
- /*
- Preserving int8, int16, int32 field types gives ~10% performance boost
- as the size of result tree becomes significantly smaller.
- Another speed up we gain by using longlong for intermediate
- calculations. The range of int64 is enough to hold sum 2^32 distinct
- integers each <= 2^32.
- */
- if (table_field_type == MYSQL_TYPE_INT24 ||
- table_field_type >= MYSQL_TYPE_TINY &&
- table_field_type <= MYSQL_TYPE_LONG)
- {
- val.traits= Hybrid_type_traits_fast_decimal::instance();
- break;
- }
- table_field_type= MYSQL_TYPE_LONGLONG;
- /* fallthrough */
- case DECIMAL_RESULT:
- val.traits= Hybrid_type_traits_decimal::instance();
- if (table_field_type != MYSQL_TYPE_LONGLONG)
- table_field_type= MYSQL_TYPE_NEWDECIMAL;
- break;
- case ROW_RESULT:
- default:
- DBUG_ASSERT(0);
- }
- val.traits->fix_length_and_dec(this, args[0]);
+ memcpy(table->field[0]->ptr, element, tree_key_length);
+ item_sum->add();
+ return 0;
}
-bool Item_sum_distinct::setup(THD *thd)
+Aggregator_distinct::~Aggregator_distinct()
{
- List<Create_field> field_list;
- Create_field field_def; /* field definition */
- DBUG_ENTER("Item_sum_distinct::setup");
- /* It's legal to call setup() more than once when in a subquery */
if (tree)
- DBUG_RETURN(FALSE);
-
- /*
- Virtual table and the tree are created anew on each re-execution of
- PS/SP. Hence all further allocations are performed in the runtime
- mem_root.
- */
- if (field_list.push_back(&field_def))
- DBUG_RETURN(TRUE);
-
- null_value= maybe_null= 1;
- quick_group= 0;
-
- DBUG_ASSERT(args[0]->fixed);
-
- field_def.init_for_tmp_table(table_field_type, args[0]->max_length,
- args[0]->decimals, args[0]->maybe_null,
- args[0]->unsigned_flag);
-
- if (! (table= create_virtual_tmp_table(thd, field_list)))
- DBUG_RETURN(TRUE);
-
- /* XXX: check that the case of CHAR(0) works OK */
- tree_key_length= table->s->reclength - table->s->null_bytes;
-
- /*
- Unique handles all unique elements in a tree until they can't fit
- in. Then the tree is dumped to the temporary file. We can use
- simple_raw_key_cmp because the table contains numbers only; decimals
- are converted to binary representation as well.
- */
- tree= new Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length,
- thd->variables.max_heap_table_size);
-
- is_evaluated= FALSE;
- DBUG_RETURN(tree == 0);
-}
-
-
-bool Item_sum_distinct::add()
-{
- args[0]->save_in_field(table->field[0], FALSE);
- is_evaluated= FALSE;
- if (!table->field[0]->is_null())
{
- DBUG_ASSERT(tree);
- null_value= 0;
- /*
- '0' values are also stored in the tree. This doesn't matter
- for SUM(DISTINCT), but is important for AVG(DISTINCT)
- */
- return tree->unique_add(table->field[0]->ptr);
+ delete tree;
+ tree= NULL;
}
- return 0;
-}
-
-
-bool Item_sum_distinct::unique_walk_function(void *element)
-{
- memcpy(table->field[0]->ptr, element, tree_key_length);
- ++count;
- val.traits->add(&val, table->field[0]);
- return 0;
-}
-
-
-void Item_sum_distinct::clear()
-{
- DBUG_ENTER("Item_sum_distinct::clear");
- DBUG_ASSERT(tree != 0); /* we always have a tree */
- null_value= 1;
- tree->reset();
- is_evaluated= FALSE;
- DBUG_VOID_RETURN;
-}
-
-void Item_sum_distinct::cleanup()
-{
- Item_sum_num::cleanup();
- delete tree;
- tree= 0;
- table= 0;
- is_evaluated= FALSE;
-}
-
-Item_sum_distinct::~Item_sum_distinct()
-{
- delete tree;
- /* no need to free the table */
-}
-
-
-void Item_sum_distinct::calculate_val_and_count()
-{
- if (!is_evaluated)
+ if (table)
{
- count= 0;
- val.traits->set_zero(&val);
- /*
- We don't have a tree only if 'setup()' hasn't been called;
- this is the case of sql_select.cc:return_zero_rows.
- */
- if (tree)
- {
- table->field[0]->set_notnull();
- tree->walk(item_sum_distinct_walk, (void*) this);
- }
- is_evaluated= TRUE;
+ free_tmp_table(table->in_use, table);
+ table=NULL;
}
-}
-
-
-double Item_sum_distinct::val_real()
-{
- calculate_val_and_count();
- return val.traits->val_real(&val);
-}
-
-
-my_decimal *Item_sum_distinct::val_decimal(my_decimal *to)
-{
- calculate_val_and_count();
- if (null_value)
- return 0;
- return val.traits->val_decimal(&val, to);
-}
-
-
-longlong Item_sum_distinct::val_int()
-{
- calculate_val_and_count();
- return val.traits->val_int(&val, unsigned_flag);
-}
-
-
-String *Item_sum_distinct::val_str(String *str)
-{
- calculate_val_and_count();
- if (null_value)
- return 0;
- return val.traits->val_str(&val, str, decimals);
-}
-
-/* end of Item_sum_distinct */
-
-/* Item_sum_avg_distinct */
-
-void
-Item_sum_avg_distinct::fix_length_and_dec()
-{
- Item_sum_distinct::fix_length_and_dec();
- prec_increment= current_thd->variables.div_precincrement;
- /*
- AVG() will divide val by count. We need to reserve digits
- after decimal point as the result can be fractional.
- */
- decimals= min(decimals + prec_increment, NOT_FIXED_DEC);
-}
-
-
-void
-Item_sum_avg_distinct::calculate_val_and_count()
-{
- if (!is_evaluated)
+ if (tmp_table_param)
{
- Item_sum_distinct::calculate_val_and_count();
- if (count)
- val.traits->div(&val, count);
- is_evaluated= TRUE;
+ delete tmp_table_param;
+ tmp_table_param= NULL;
}
}
@@ -1112,6 +1463,8 @@ bool Item_sum_count::add()
longlong Item_sum_count::val_int()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
return (longlong) count;
}
@@ -1201,6 +1554,8 @@ bool Item_sum_avg::add()
double Item_sum_avg::val_real()
{
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (!count)
{
null_value=1;
@@ -1215,6 +1570,8 @@ my_decimal *Item_sum_avg::val_decimal(my
my_decimal sum_buff, cnt;
const my_decimal *sum_dec;
DBUG_ASSERT(fixed == 1);
+ if (aggr)
+ aggr->endup();
if (!count)
{
null_value=1;
@@ -1229,6 +1586,8 @@ my_decimal *Item_sum_avg::val_decimal(my
String *Item_sum_avg::val_str(String *str)
{
+ if (aggr)
+ aggr->endup();
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
@@ -1923,6 +2282,7 @@ void Item_sum_hybrid::reset_field()
void Item_sum_sum::reset_field()
{
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
@@ -1947,6 +2307,7 @@ void Item_sum_count::reset_field()
{
uchar *res=result_field->ptr;
longlong nr=0;
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (!args[0]->maybe_null || !args[0]->is_null())
nr=1;
@@ -1957,6 +2318,7 @@ void Item_sum_count::reset_field()
void Item_sum_avg::reset_field()
{
uchar *res=result_field->ptr;
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (hybrid_type == DECIMAL_RESULT)
{
longlong tmp;
@@ -2010,6 +2372,7 @@ void Item_sum_bit::update_field()
void Item_sum_sum::update_field()
{
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
@@ -2062,6 +2425,9 @@ void Item_sum_avg::update_field()
{
longlong field_count;
uchar *res=result_field->ptr;
+
+ DBUG_ASSERT (aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR);
+
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
@@ -2356,315 +2722,6 @@ double Item_variance_field::val_real()
return 0.0;
return variance_fp_recurrence_result(recurrence_s, count, sample);
-}
-
-
-/****************************************************************************
-** COUNT(DISTINCT ...)
-****************************************************************************/
-
-int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2)
-{
- Field *f= (Field*) arg;
- return f->cmp(key1, key2);
-}
-
-/*
- Did not make this one static - at least gcc gets confused when
- I try to declare a static function as a friend. If you can figure
- out the syntax to make a static function a friend, make this one
- static
-*/
-
-int composite_key_cmp(void* arg, uchar* key1, uchar* key2)
-{
- Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
- Field **field = item->table->field;
- Field **field_end= field + item->table->s->fields;
- uint32 *lengths=item->field_lengths;
- for (; field < field_end; ++field)
- {
- Field* f = *field;
- int len = *lengths++;
- int res = f->cmp(key1, key2);
- if (res)
- return res;
- key1 += len;
- key2 += len;
- }
- return 0;
-}
-
-
-C_MODE_START
-
-static int count_distinct_walk(void *elem, element_count count, void *arg)
-{
- (*((ulonglong*)arg))++;
- return 0;
-}
-
-C_MODE_END
-
-
-void Item_sum_count_distinct::cleanup()
-{
- DBUG_ENTER("Item_sum_count_distinct::cleanup");
- Item_sum_int::cleanup();
-
- /* Free objects only if we own them. */
- if (!original)
- {
- /*
- We need to delete the table and the tree in cleanup() as
- they were allocated in the runtime memroot. Using the runtime
- memroot reduces memory footprint for PS/SP and simplifies setup().
- */
- delete tree;
- tree= 0;
- is_evaluated= FALSE;
- if (table)
- {
- free_tmp_table(table->in_use, table);
- table= 0;
- }
- delete tmp_table_param;
- tmp_table_param= 0;
- }
- always_null= FALSE;
- DBUG_VOID_RETURN;
-}
-
-
-/* This is used by rollup to create a separate usable copy of the function */
-
-void Item_sum_count_distinct::make_unique()
-{
- table=0;
- original= 0;
- force_copy_fields= 1;
- tree= 0;
- is_evaluated= FALSE;
- tmp_table_param= 0;
- always_null= FALSE;
-}
-
-
-Item_sum_count_distinct::~Item_sum_count_distinct()
-{
- cleanup();
-}
-
-
-bool Item_sum_count_distinct::setup(THD *thd)
-{
- List<Item> list;
- SELECT_LEX *select_lex= thd->lex->current_select;
-
- /*
- Setup can be called twice for ROLLUP items. This is a bug.
- Please add DBUG_ASSERT(tree == 0) here when it's fixed.
- It's legal to call setup() more than once when in a subquery
- */
- if (tree || table || tmp_table_param)
- return FALSE;
-
- if (!(tmp_table_param= new TMP_TABLE_PARAM))
- return TRUE;
-
- /* Create a table with an unique key over all parameters */
- for (uint i=0; i < arg_count ; i++)
- {
- Item *item=args[i];
- if (list.push_back(item))
- return TRUE; // End of memory
- if (item->const_item() && item->is_null())
- always_null= 1;
- }
- if (always_null)
- return FALSE;
- count_field_types(select_lex, tmp_table_param, list, 0);
- tmp_table_param->force_copy_fields= force_copy_fields;
- DBUG_ASSERT(table == 0);
- /*
- Make create_tmp_table() convert BIT columns to BIGINT.
- This is needed because BIT fields store parts of their data in table's
- null bits, and we don't have methods to compare two table records, which
- is needed by Unique which is used when HEAP table is used.
- */
- {
- List_iterator_fast<Item> li(list);
- Item *item;
- while ((item= li++))
- {
- if (item->type() == Item::FIELD_ITEM &&
- ((Item_field*)item)->field->type() == FIELD_TYPE_BIT)
- item->marker=4;
- }
- }
-
- if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
- 0,
- (select_lex->options | thd->options),
- HA_POS_ERROR, (char*)"")))
- return TRUE;
- table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
- table->no_rows=1;
-
- if (table->s->db_type() == heap_hton)
- {
- /*
- No blobs, otherwise it would have been MyISAM: set up a compare
- function and its arguments to use with Unique.
- */
- qsort_cmp2 compare_key;
- void* cmp_arg;
- Field **field= table->field;
- Field **field_end= field + table->s->fields;
- bool all_binary= TRUE;
-
- for (tree_key_length= 0; field < field_end; ++field)
- {
- Field *f= *field;
- enum enum_field_types f_type= f->type();
- tree_key_length+= f->pack_length();
- if ((f_type == MYSQL_TYPE_VARCHAR) ||
- !f->binary() && (f_type == MYSQL_TYPE_STRING ||
- f_type == MYSQL_TYPE_VAR_STRING))
- {
- all_binary= FALSE;
- break;
- }
- }
- if (all_binary)
- {
- cmp_arg= (void*) &tree_key_length;
- compare_key= (qsort_cmp2) simple_raw_key_cmp;
- }
- else
- {
- if (table->s->fields == 1)
- {
- /*
- If we have only one field, which is the most common use of
- count(distinct), it is much faster to use a simpler key
- compare method that can take advantage of not having to worry
- about other fields.
- */
- compare_key= (qsort_cmp2) simple_str_key_cmp;
- cmp_arg= (void*) table->field[0];
- /* tree_key_length has been set already */
- }
- else
- {
- uint32 *length;
- compare_key= (qsort_cmp2) composite_key_cmp;
- cmp_arg= (void*) this;
- field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
- for (tree_key_length= 0, length= field_lengths, field= table->field;
- field < field_end; ++field, ++length)
- {
- *length= (*field)->pack_length();
- tree_key_length+= *length;
- }
- }
- }
- DBUG_ASSERT(tree == 0);
- tree= new Unique(compare_key, cmp_arg, tree_key_length,
- thd->variables.max_heap_table_size);
- /*
- The only time tree_key_length could be 0 is if someone does
- count(distinct) on a char(0) field - stupid thing to do,
- but this has to be handled - otherwise someone can crash
- the server with a DoS attack
- */
- is_evaluated= FALSE;
- if (! tree)
- return TRUE;
- }
- return FALSE;
-}
-
-
-Item *Item_sum_count_distinct::copy_or_same(THD* thd)
-{
- return new (thd->mem_root) Item_sum_count_distinct(thd, this);
-}
-
-
-void Item_sum_count_distinct::clear()
-{
- /* tree and table can be both null only if always_null */
- is_evaluated= FALSE;
- if (tree)
- {
- tree->reset();
- }
- else if (table)
- {
- table->file->extra(HA_EXTRA_NO_CACHE);
- table->file->delete_all_rows();
- table->file->extra(HA_EXTRA_WRITE_CACHE);
- }
-}
-
-bool Item_sum_count_distinct::add()
-{
- int error;
- if (always_null)
- return 0;
- copy_fields(tmp_table_param);
- copy_funcs(tmp_table_param->items_to_copy);
-
- for (Field **field=table->field ; *field ; field++)
- if ((*field)->is_real_null(0))
- return 0; // Don't count NULL
-
- is_evaluated= FALSE;
- if (tree)
- {
- /*
- The first few bytes of record (at least one) are just markers
- for deleted and NULLs. We want to skip them since they will
- bloat the tree without providing any valuable info. Besides,
- key_length used to initialize the tree didn't include space for them.
- */
- return tree->unique_add(table->record[0] + table->s->null_bytes);
- }
- if ((error= table->file->ha_write_row(table->record[0])) &&
- table->file->is_fatal_error(error, HA_CHECK_DUP))
- return TRUE;
- return FALSE;
-}
-
-
-longlong Item_sum_count_distinct::val_int()
-{
- int error;
- DBUG_ASSERT(fixed == 1);
- if (!table) // Empty query
- return LL(0);
- if (tree)
- {
- if (is_evaluated)
- return count;
-
- if (tree->elements == 0)
- return (longlong) tree->elements_in_tree(); // everything fits in memory
- count= 0;
- tree->walk(count_distinct_walk, (void*) &count);
- is_evaluated= TRUE;
- return (longlong) count;
- }
-
- error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
-
- if(error)
- {
- table->file->print_error(error, MYF(0));
- }
-
- return table->file->stats.records;
}
diff -Nrup a/sql/item_sum.h b/sql/item_sum.h
--- a/sql/item_sum.h 2007-07-01 06:25:43 +03:00
+++ b/sql/item_sum.h 2007-12-06 15:46:45 +02:00
@@ -23,6 +23,76 @@
#include <my_tree.h>
/*
+ The abstract base class for the Aggregator_* classes.
+ It implements the data collection functions (setup/add/clear)
+ as either pass-through to the real functionality or
+ as collectors into an Unique (for distinct) structure.
+
+ Note that update_field/reset_field are not in that
+ class, because they're simply not called when
+ GROUP BY/DISTINCT can be handled with help of index on grouped
+ fields (quick_group = 0);
+ */
+class Item_sum;
+class Aggregator_distinct;
+class Aggregator_simple;
+class Aggregator
+{
+ friend class Item_sum;
+ friend class Item_sum_sum;
+ friend class Item_sum_count;
+ friend class Item_sum_avg;
+
+ /*
+ all members are protected as this class is not usable
+ outside of an Item_sum descendant
+ */
+protected:
+ /*
+ When feeding back the data in endup() from Unique/temp table back to
+ Item_sum::add() methods we must read the data from Unique (and not
+ recalculate the functions that are given as arguments to the aggregate
+ function.
+ This flag is to tell the add() methods to take the data from the Unique
+ instead by calling the relevant val_..() method
+ */
+ bool use_distinct_values;
+
+ /* the aggregate function class to act on */
+ Item_sum *item_sum;
+public:
+ Aggregator (Item_sum *arg): item_sum(arg), use_distinct_values(FALSE) {}
+ virtual ~Aggregator () {} /* Keep gcc happy */
+
+ enum Aggregator_type { SIMPLE_AGGREGATOR, DISTINCT_AGGREGATOR };
+ virtual Aggregator_type Aggrtype() = 0;
+ /*
+ called before feeding the first row. Used to allocate/setup
+ the internal structures used for aggregation (like Unique
+ for distinct for example).
+ */
+ virtual bool setup(THD *) = 0;
+
+ /*
+ called to clean up the internal structures and reset them to their
+ initial state
+ */
+ virtual void clear() = 0;
+
+ /*
+ called when a value is to be processed through the aggregation
+ */
+ virtual bool add() = 0;
+
+ /*
+ called when there are no more data and the final value is to
+ be retrieved
+ */
+ virtual void endup() = 0;
+
+};
+
+/*
Class Item_sum is the base class used for special expressions that SQL calls
'set functions'. These expressions are formed with the help of aggregate
functions such as SUM, MAX, GROUP_CONCAT etc.
@@ -222,6 +292,27 @@ class st_select_lex;
class Item_sum :public Item_result_field
{
public:
+ /*
+ Aggregator class instance. Not set initially. Allocated only after
+ it is determined if the incoming data are already distinct.
+ */
+ Aggregator *aggr;
+
+ /*
+ Used in making ROLLUP. Set for the ROLLUP copies of the original
+ Item_sum and passed to create_tmp_field() to cause it to work
+ over the temp table buffer that is referenced by
+ Item_result_field::result_field.
+ */
+ bool force_copy_fields;
+
+ /*
+ Indicates how the aggregate function was specified by the parser :
+ 1 if it was written as AGGREGATE(DISTINCT),
+ 0 if it was AGGREGATE()
+ */
+ bool with_distinct;
+
enum Sumfunctype
{ COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
@@ -243,6 +334,7 @@ public:
protected:
table_map used_tables_cache;
bool forced_const;
+ uchar nest_level_tables_count;
public:
@@ -250,48 +342,28 @@ public:
Item_sum() :arg_count(0), quick_group(1), forced_const(FALSE)
{
mark_as_sum_func();
+ init_aggregator();
}
Item_sum(Item *a) :args(tmp_args), arg_count(1), quick_group(1),
forced_const(FALSE)
{
args[0]=a;
mark_as_sum_func();
+ init_aggregator();
}
Item_sum( Item *a, Item *b ) :args(tmp_args), arg_count(2), quick_group(1),
forced_const(FALSE)
{
args[0]=a; args[1]=b;
mark_as_sum_func();
+ init_aggregator();
}
Item_sum(List<Item> &list);
//Copy constructor, need to perform subselects with temporary tables
Item_sum(THD *thd, Item_sum *item);
enum Type type() const { return SUM_FUNC_ITEM; }
virtual enum Sumfunctype sum_func () const=0;
-
- /*
- This method is similar to add(), but it is called when the current
- aggregation group changes. Thus it performs a combination of
- clear() and add().
- */
- inline bool reset() { clear(); return add(); };
-
- /*
- Prepare this item for evaluation of an aggregate value. This is
- called by reset() when a group changes, or, for correlated
- subqueries, between subquery executions. E.g. for COUNT(), this
- method should set count= 0;
- */
- virtual void clear()= 0;
-
- /*
- This method is called for the next row in the same group. Its
- purpose is to aggregate the new value to the previous values in
- the group (i.e. since clear() was called last time). For example,
- for COUNT(), do count++.
- */
- virtual bool add()=0;
-
+ inline bool reset() { aggregator_clear(); return aggregator_add(); };
/*
Called when new group is started and results are being saved in
a temporary table. Similar to reset(), but must also store value in
@@ -330,11 +402,6 @@ public:
{ return new Item_field(field); }
table_map used_tables() const { return used_tables_cache; }
void update_used_tables ();
- void cleanup()
- {
- Item::cleanup();
- forced_const= FALSE;
- }
bool is_null() { return null_value; }
void make_const ()
{
@@ -354,10 +421,15 @@ public:
may be initialized to 0 by clear() and to NULL by
no_rows_in_result().
*/
- void no_rows_in_result() { clear(); }
-
- virtual bool setup(THD *thd) {return 0;}
- virtual void make_unique() {}
+ void no_rows_in_result()
+ {
+ if (!aggr)
+ set_aggregator(with_distinct ?
+ Aggregator::DISTINCT_AGGREGATOR :
+ Aggregator::SIMPLE_AGGREGATOR);
+ reset();
+ }
+ virtual void make_unique() { force_copy_fields= TRUE; }
Item *get_tmp_table_item(THD *thd);
virtual Field *create_tmp_field(bool group, TABLE *table,
uint convert_blob_length);
@@ -367,11 +439,153 @@ public:
bool register_sum_func(THD *thd, Item **ref);
st_select_lex *depended_from()
{ return (nest_level == aggr_level ? 0 : aggr_sel); }
+
+ /* Initialization of distinct related members */
+ void init_aggregator()
+ {
+ aggr= NULL;
+ with_distinct= FALSE;
+ force_copy_fields= FALSE;
+ }
+ /*
+ calls Aggregator code : to be called where add/setup/clear
+ are currently called
+ */
+ inline bool aggregator_setup(THD *thd) { return aggr->setup(thd); };
+ inline void aggregator_clear() { aggr->clear(); }
+ inline bool aggregator_add() { return aggr->add(); };
+ /* stores the declared DISTINCT flag (from the parser) */
+ void set_distinct(bool distinct)
+ {
+ with_distinct= distinct;
+ quick_group= with_distinct ? 0 : 1;
+ }
+ /*
+ called when the final determination is done about the aggregation
+ type and the object is about to be used
+ */
+ int set_aggregator(Aggregator::Aggregator_type aggregator);
+ virtual void clear()= 0;
+ virtual bool add()=0;
+ virtual bool setup(THD *thd) {return 0;}
+
+ void cleanup ();
+};
+
+
+/*
+ The distinct aggregator. Implements AGGFN (DISTINCT ..)
+ Collects all the data into an Unique (similarly to what Item_sum_distinct
+ does currently) and then (if applicable) iterates over the list of
+ unique values and pumps them back into its object
+*/
+class Unique;
+
+class Aggregator_distinct : public Aggregator
+{
+ friend class Item_sum_sum;
+ friend class Item_sum_count;
+ friend class Item_sum_avg;
+protected:
+
+ /*
+ flag to prevent consecutive runs of endup(). Normally in endup there are
+ expensive calculations (like walking the distinct tree for example)
+ which we must do only once if there are no data changes.
+ We can re-use the data for the second and subsequent val_xxx() calls.
+ endup_done set to TRUE also means that the calculated values for
+ the aggregate functions are correct and don't need recalculation.
+ */
+ bool endup_done;
+
+ /*
+ Used depending on the type of the aggregate function :
+ - For COUNT(DISTINCT) this is a real temp table. It is used as a hash table
+ if there are blob columns. Otherwise (no blob fields) this member is used
+ as mediator to the Field classes to make the row data.
+ - For AVG/SUM(DISTINCT) this is a virtual temp table that use used as a
+ mediator to the Field classes to make the row data.
+ */
+ TABLE *table;
+
+ /*
+ An array of field lengths on row allocated and used only for
+ COUNT(DISTINCT) with multiple columns and no blobs. Used in
+ Aggregator_distinct::composite_key_cmp (called from Unique to compare
+ nodes
+ */
+ uint32 *field_lengths;
+
+ /*
+ used in conjunction with 'table' to support the access to Field classes
+ for COUNT(DISTINCT). Needed by copy_fields()/copy_funcs().
+ */
+ TMP_TABLE_PARAM *tmp_table_param;
+
+ /*
+ If there are no blobs in the COUNT(DISTINCT) arguments, we can use a tree,
+ which is faster than heap table. In that case, we still use the table
+ to help get things set up, but we insert nothing in it.
+ For AVG/SUM(DISTINCT) we always use this tree (as it takes a single
+ argument) to get the distinct rows.
+ */
+ Unique *tree;
+
+ /*
+ The length of the temp table row. Must be a member of the class as it
+ gets passed down to simple_raw_key_cmp () as a compare function argument
+ to Unique. simple_raw_key_cmp () is used as a fast comparinson function
+ when the entire row can be binary compared.
+ */
+ uint tree_key_length;
+
+ /*
+ Set to 1 if the result is known to be always NULL.
+ If set disactivates creation and usage of 'tree' and 'table' as well
+ as the calculation of the value on the first val_xxx().
+ */
+ bool always_null;
+
+public:
+ Aggregator_distinct (Item_sum *sum) :
+ Aggregator(sum), table(NULL), tree(NULL), tmp_table_param(NULL),
+ always_null(FALSE) {}
+ virtual ~Aggregator_distinct ();
+ Aggregator_type Aggrtype() { return DISTINCT_AGGREGATOR; }
+
+ bool setup(THD *);
+ void clear();
+ bool add();
+ void endup();
+
+ bool unique_walk_function(void *element);
+ static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
+};
+
+
+/*
+ The pass-through aggregator. Implements AGGFN (DISTINCT ..) by
+ knowing it gets distinct data on input. So it just pumps them back
+ to the Item_sum descendant class.
+*/
+class Aggregator_simple : public Aggregator
+{
+public:
+
+ Aggregator_simple (Item_sum *sum) :
+ Aggregator(sum) {}
+ Aggregator_type Aggrtype() { return Aggregator::SIMPLE_AGGREGATOR; }
+
+ bool setup(THD * thd) { return item_sum->setup(thd); }
+ void clear() { item_sum->clear(); }
+ bool add() { return item_sum->add(); }
+ void endup() {};
};
class Item_sum_num :public Item_sum
{
+ friend class Aggregator_distinct;
protected:
/*
val_xxx() functions may be called several times during the execution of a
@@ -426,9 +640,15 @@ protected:
void fix_length_and_dec();
public:
- Item_sum_sum(Item *item_par) :Item_sum_num(item_par) {}
+ Item_sum_sum(Item *item_par, bool distinct= FALSE) :Item_sum_num(item_par)
+ {
+ set_distinct(distinct);
+ }
Item_sum_sum(THD *thd, Item_sum_sum *item);
- enum Sumfunctype sum_func () const {return SUM_FUNC;}
+ enum Sumfunctype sum_func () const
+ {
+ return with_distinct ? SUM_DISTINCT_FUNC : SUM_FUNC;
+ }
void clear();
bool add();
double val_real();
@@ -439,109 +659,41 @@ public:
void reset_field();
void update_field();
void no_rows_in_result() {}
- const char *func_name() const { return "sum("; }
+ const char *func_name() const
+ {
+ return with_distinct ? "sum(distinct " : "sum(";
+ }
Item *copy_or_same(THD* thd);
};
-
-/* Common class for SUM(DISTINCT), AVG(DISTINCT) */
-
-class Unique;
-
-class Item_sum_distinct :public Item_sum_num
+class Item_sum_count :public Item_sum_int
{
-protected:
- /* storage for the summation result */
- ulonglong count;
- Hybrid_type val;
- /* storage for unique elements */
- Unique *tree;
- TABLE *table;
- enum enum_field_types table_field_type;
- uint tree_key_length;
-protected:
- Item_sum_distinct(THD *thd, Item_sum_distinct *item);
-public:
- Item_sum_distinct(Item *item_par);
- ~Item_sum_distinct();
+ longlong count;
+
+ friend class Aggregator_distinct;
- bool setup(THD *thd);
void clear();
- void cleanup();
bool add();
- double val_real();
- my_decimal *val_decimal(my_decimal *);
- longlong val_int();
- String *val_str(String *str);
-
- /* XXX: does it need make_unique? */
-
- enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
- void reset_field() {} // not used
- void update_field() {} // not used
- virtual void no_rows_in_result() {}
- void fix_length_and_dec();
- enum Item_result result_type () const { return val.traits->type(); }
- virtual void calculate_val_and_count();
- virtual bool unique_walk_function(void *elem);
-};
-
-
-/*
- Item_sum_sum_distinct - implementation of SUM(DISTINCT expr).
- See also: MySQL manual, chapter 'Adding New Functions To MySQL'
- and comments in item_sum.cc.
-*/
-
-class Item_sum_sum_distinct :public Item_sum_distinct
-{
-private:
- Item_sum_sum_distinct(THD *thd, Item_sum_sum_distinct *item)
- :Item_sum_distinct(thd, item) {}
-public:
- Item_sum_sum_distinct(Item *item_arg) :Item_sum_distinct(item_arg) {}
-
- enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
- const char *func_name() const { return "sum(distinct "; }
- Item *copy_or_same(THD* thd) { return new Item_sum_sum_distinct(thd, this); }
-};
-
-
-/* Item_sum_avg_distinct - SELECT AVG(DISTINCT expr) FROM ... */
-
-class Item_sum_avg_distinct: public Item_sum_distinct
-{
-private:
- Item_sum_avg_distinct(THD *thd, Item_sum_avg_distinct *original)
- :Item_sum_distinct(thd, original) {}
-public:
- uint prec_increment;
- Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
-
- void fix_length_and_dec();
- virtual void calculate_val_and_count();
- enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
- const char *func_name() const { return "avg(distinct "; }
- Item *copy_or_same(THD* thd) { return new Item_sum_avg_distinct(thd, this); }
-};
-
-
-class Item_sum_count :public Item_sum_int
-{
- longlong count;
+ void cleanup();
public:
Item_sum_count(Item *item_par)
:Item_sum_int(item_par),count(0)
{}
+ Item_sum_count(List<Item> &list)
+ :Item_sum_int(list),count(0)
+ {
+ set_distinct(TRUE);
+ }
Item_sum_count(THD *thd, Item_sum_count *item)
:Item_sum_int(thd, item), count(item->count)
{}
- enum Sumfunctype sum_func () const { return COUNT_FUNC; }
- void clear();
+ enum Sumfunctype sum_func () const
+ {
+ return with_distinct ? COUNT_DISTINCT_FUNC : COUNT_FUNC;
+ }
void no_rows_in_result() { count=0; }
- bool add();
void make_const(longlong count_arg)
{
count=count_arg;
@@ -549,76 +701,12 @@ class Item_sum_count :public Item_sum_in
}
longlong val_int();
void reset_field();
- void cleanup();
void update_field();
- const char *func_name() const { return "count("; }
- Item *copy_or_same(THD* thd);
-};
-
-
-class TMP_TABLE_PARAM;
-
-class Item_sum_count_distinct :public Item_sum_int
-{
- TABLE *table;
- uint32 *field_lengths;
- TMP_TABLE_PARAM *tmp_table_param;
- bool force_copy_fields;
- /*
- If there are no blobs, we can use a tree, which
- is faster than heap table. In that case, we still use the table
- to help get things set up, but we insert nothing in it
- */
- Unique *tree;
- /*
- Storage for the value of count between calls to val_int() so val_int()
- will not recalculate on each call. Validitiy of the value is stored in
- is_evaluated.
- */
- longlong count;
- /*
- Following is 0 normal object and pointer to original one for copy
- (to correctly free resources)
- */
- Item_sum_count_distinct *original;
- uint tree_key_length;
-
-
- bool always_null; // Set to 1 if the result is always NULL
-
-
- friend int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
- friend int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2);
-
-public:
- Item_sum_count_distinct(List<Item> &list)
- :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
- force_copy_fields(0), tree(0), count(0),
- original(0), always_null(FALSE)
- { quick_group= 0; }
- Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
- :Item_sum_int(thd, item), table(item->table),
- field_lengths(item->field_lengths),
- tmp_table_param(item->tmp_table_param),
- force_copy_fields(0), tree(item->tree), count(item->count),
- original(item), tree_key_length(item->tree_key_length),
- always_null(item->always_null)
- {}
- ~Item_sum_count_distinct();
-
- void cleanup();
-
- enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
- void clear();
- bool add();
- longlong val_int();
- void reset_field() { return ;} // Never called
- void update_field() { return ; } // Never called
- const char *func_name() const { return "count(distinct "; }
- bool setup(THD *thd);
- void make_unique();
+ const char *func_name() const
+ {
+ return with_distinct ? "count(distinct " : "count(";
+ }
Item *copy_or_same(THD* thd);
- void no_rows_in_result() {}
};
@@ -657,13 +745,18 @@ public:
uint prec_increment;
uint f_precision, f_scale, dec_bin_size;
- Item_sum_avg(Item *item_par) :Item_sum_sum(item_par), count(0) {}
+ Item_sum_avg(Item *item_par, bool distinct= FALSE)
+ :Item_sum_sum(item_par, distinct), count(0)
+ {}
Item_sum_avg(THD *thd, Item_sum_avg *item)
:Item_sum_sum(thd, item), count(item->count),
prec_increment(item->prec_increment) {}
void fix_length_and_dec();
- enum Sumfunctype sum_func () const {return AVG_FUNC;}
+ enum Sumfunctype sum_func () const
+ {
+ return with_distinct ? AVG_DISTINCT_FUNC : AVG_FUNC;
+ }
void clear();
bool add();
double val_real();
@@ -676,7 +769,10 @@ public:
Item *result_item(Field *field)
{ return new Item_avg_field(hybrid_type, this); }
void no_rows_in_result() {}
- const char *func_name() const { return "avg("; }
+ const char *func_name() const
+ {
+ return with_distinct ? "avg(distinct " : "avg(";
+ }
Item *copy_or_same(THD* thd);
Field *create_tmp_field(bool group, TABLE *table, uint convert_blob_length);
void cleanup()
diff -Nrup a/sql/opt_range.cc b/sql/opt_range.cc
--- a/sql/opt_range.cc 2007-12-01 23:46:41 +02:00
+++ b/sql/opt_range.cc 2007-12-06 15:46:45 +02:00
@@ -2020,7 +2020,7 @@ public:
class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
{
private:
- bool have_min, have_max;
+ bool have_min, have_max, have_agg_distinct;
KEY_PART_INFO *min_max_arg_part;
uint group_prefix_len;
uint used_key_parts;
@@ -2036,7 +2036,8 @@ private:
public:
ha_rows quick_prefix_records;
public:
- TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
+ TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
+ bool have_agg_distinct_arg,
KEY_PART_INFO *min_max_arg_part_arg,
uint group_prefix_len_arg, uint used_key_parts_arg,
uint group_key_parts_arg, KEY *index_info_arg,
@@ -2045,6 +2046,7 @@ public:
SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
uint param_idx_arg, ha_rows quick_prefix_records_arg)
: have_min(have_min_arg), have_max(have_max_arg),
+ have_agg_distinct(have_agg_distinct_arg),
min_max_arg_part(min_max_arg_part_arg),
group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
group_key_parts(group_key_parts_arg), index_info(index_info_arg),
@@ -9224,69 +9226,77 @@ get_best_group_min_max(PARAM *param, SEL
ORDER *tmp_group;
Item *item;
Item_field *item_field;
+ bool is_agg_distinct;
+ List<Item_field> agg_distinct_flds;
+
DBUG_ENTER("get_best_group_min_max");
/* Perform few 'cheap' tests whether this access method is applicable. */
if (!join)
DBUG_RETURN(NULL); /* This is not a select statement. */
if ((join->tables != 1) || /* The query must reference one table. */
- ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
- (!join->select_distinct)) ||
(join->select_lex->olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
DBUG_RETURN(NULL);
if (table->s->keys == 0) /* There are no indexes to use. */
DBUG_RETURN(NULL);
- /* Analyze the query in more detail. */
List_iterator<Item> select_items_it(join->fields_list);
+ is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
- /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
- if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
- DBUG_RETURN(NULL);
- if (join->sum_funcs[0])
+ if (!is_agg_distinct)
{
- Item_sum *min_max_item;
- Item_sum **func_ptr= join->sum_funcs;
- while ((min_max_item= *(func_ptr++)))
- {
- if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
- have_min= TRUE;
- else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
- have_max= TRUE;
- else
- DBUG_RETURN(NULL);
+ if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
+ (!join->select_distinct))
+ DBUG_RETURN(NULL);
+ /* Analyze the query in more detail. */
- /* The argument of MIN/MAX. */
- Item *expr= min_max_item->args[0]->real_item();
- if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
- {
- if (! min_max_arg_item)
- min_max_arg_item= (Item_field*) expr;
- else if (! min_max_arg_item->eq(expr, 1))
+ /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
+ if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
+ DBUG_RETURN(NULL);
+ if (join->sum_funcs[0])
+ {
+ Item_sum *min_max_item;
+ Item_sum **func_ptr= join->sum_funcs;
+ while ((min_max_item= *(func_ptr++)))
+ {
+ if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
+ have_min= TRUE;
+ else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
+ have_max= TRUE;
+ else
+ DBUG_RETURN(NULL);
+
+ /* The argument of MIN/MAX. */
+ Item *expr= min_max_item->args[0]->real_item();
+ if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
+ {
+ if (! min_max_arg_item)
+ min_max_arg_item= (Item_field*) expr;
+ else if (! min_max_arg_item->eq(expr, 1))
+ DBUG_RETURN(NULL);
+ }
+ else
+ DBUG_RETURN(NULL);
+ }
+ }
+ /