List:Commits« Previous MessageNext Message »
From:kgeorge Date:April 7 2006 11:07am
Subject:bk commit into 5.2 tree (gkodinov:1.2154)
View as plain text  
Below is the list of changes that have just been committed into a local
5.2 repository of kgeorge. When kgeorge does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2154 06/04/07 14:07:13 gkodinov@stripped +10 -0
  WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
  
  Phase 1 of the implementation : 
  correct detection and modification of the execution plan done only for COUNT(DISTINCT).
  Phase 2 (final) will include AVG(DISTINCT) and SUM(DISTINCT).

  sql/sql_yacc.yy
    1.465 06/04/07 14:07:07 gkodinov@stripped +3 -3
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - instantiate the merged Item_sum_count class.

  sql/sql_select.h
    1.107 06/04/07 14:07:07 gkodinov@stripped +7 -0
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Detect the possibility of the optimization.
     - Turn flags/fields on if it is detected.

  sql/sql_select.cc
    1.392 06/04/07 14:07:07 gkodinov@stripped +99 -0
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Detect the possibility of the optimization.
     - Turn flags/fields on if it is detected.

  sql/opt_range.h
    1.61 06/04/07 14:07:07 gkodinov@stripped +5 -1
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Added a flag to denote that the optimization will be used

  sql/opt_range.cc
    1.206 06/04/07 14:07:07 gkodinov@stripped +39 -17
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Added a flag to denote that the optimization will be used

  sql/item_sum.h
    1.99 06/04/07 14:07:07 gkodinov@stripped +115 -51
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Merged the implementation of Item_sum_count and Item_sum_count_distinct

  sql/item_sum.cc
    1.174 06/04/07 14:07:07 gkodinov@stripped +79 -23
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Merged the implementation of Item_sum_count and Item_sum_count_distinct

  mysql-test/t/group_min_max.test
    1.19 06/04/07 14:07:07 gkodinov@stripped +35 -1
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Added tests for the COUNT(DISTINCT)
     - Fixed explains that are subject to the optimization.

  mysql-test/r/group_min_max.result
    1.21 06/04/07 14:07:06 gkodinov@stripped +47 -2
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    
     - Added tests for the COUNT(DISTINCT)
     - Fixed explains that are subject to the optimization.

  mysql-test/r/bench_count_distinct.result
    1.8 06/04/07 14:07:06 gkodinov@stripped +1 -1
    WL#3220: Loose index scan for COUNT DISTINCT: Phase 1
    

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	gkodinov
# Host:	rakia.progem.bg
# Root:	/home/kgeorge/mysql/5.2/WL3220

--- 1.173/sql/item_sum.cc	2005-12-22 06:10:54 +02:00
+++ 1.174/sql/item_sum.cc	2006-04-07 14:07:07 +03:00
@@ -1011,14 +1011,16 @@
 }
 
 
-void Item_sum_count::clear()
+void Item_sum_count::plain_clear()
 {
+  DBUG_ASSERT(!distinct);
   count= 0;
 }
 
 
-bool Item_sum_count::add()
+bool Item_sum_count::plain_add()
 {
+  DBUG_ASSERT(!distinct);
   if (!args[0]->maybe_null)
     count++;
   else
@@ -1030,22 +1032,74 @@
   return 0;
 }
 
-longlong Item_sum_count::val_int()
+longlong Item_sum_count::plain_val_int()
 {
   DBUG_ASSERT(fixed == 1);
+  DBUG_ASSERT(!distinct);
   return (longlong) count;
 }
 
 
-void Item_sum_count::cleanup()
+void Item_sum_count::plain_cleanup()
 {
-  DBUG_ENTER("Item_sum_count::cleanup");
+  DBUG_ENTER("Item_sum_count::plain_cleanup");
+  DBUG_ASSERT(!distinct);
   Item_sum_int::cleanup();
   used_table_cache= ~(table_map) 0;
   DBUG_VOID_RETURN;
 }
 
-
+void Item_sum_count::set_distinct(bool is_distinct)
+{
+  distinct = is_distinct;
+  quick_group = !is_distinct;
+  if (distinct)
+  {
+    used_tables_ptr= &Item_sum_count::distinct_used_tables;
+    const_item_ptr= &Item_sum_count::distinct_const_item;
+    sum_func_ptr= &Item_sum_count::distinct_sum_func;
+    clear_ptr= &Item_sum_count::distinct_clear;
+    no_rows_in_result_ptr= &Item_sum_count::distinct_no_rows_in_result;
+    add_ptr= &Item_sum_count::distinct_add;
+    val_int_ptr= &Item_sum_count::distinct_val_int;
+    reset_field_ptr= &Item_sum_count::distinct_reset_field;
+    cleanup_ptr= &Item_sum_count::distinct_cleanup;
+    update_field_ptr= &Item_sum_count::distinct_update_field;
+    func_name_ptr= &Item_sum_count::distinct_func_name;
+    setup_ptr= &Item_sum_count::distinct_setup;
+    make_unique_ptr= &Item_sum_count::distinct_make_unique;
+  }
+  else
+  {
+    used_tables_ptr= &Item_sum_count::plain_used_tables;
+    const_item_ptr= &Item_sum_count::plain_const_item;
+    sum_func_ptr= &Item_sum_count::plain_sum_func;
+    clear_ptr= &Item_sum_count::plain_clear;
+    no_rows_in_result_ptr= &Item_sum_count::plain_no_rows_in_result;
+    add_ptr= &Item_sum_count::plain_add;
+    val_int_ptr= &Item_sum_count::plain_val_int;
+    reset_field_ptr= &Item_sum_count::plain_reset_field;
+    cleanup_ptr= &Item_sum_count::plain_cleanup;
+    update_field_ptr= &Item_sum_count::plain_update_field;
+    func_name_ptr= &Item_sum_count::plain_func_name;
+    setup_ptr= &Item_sum_count::plain_setup;
+    make_unique_ptr= &Item_sum_count::plain_make_unique;
+  }
+
+  DBUG_ASSERT(used_tables_ptr != NULL);
+  DBUG_ASSERT(const_item_ptr != NULL);
+  DBUG_ASSERT(sum_func_ptr != NULL);
+  DBUG_ASSERT(clear_ptr != NULL);
+  DBUG_ASSERT(no_rows_in_result_ptr != NULL);
+  DBUG_ASSERT(add_ptr != NULL);
+  DBUG_ASSERT(val_int_ptr != NULL);
+  DBUG_ASSERT(reset_field_ptr != NULL);
+  DBUG_ASSERT(cleanup_ptr != NULL);
+  DBUG_ASSERT(update_field_ptr != NULL);
+  DBUG_ASSERT(func_name_ptr != NULL);
+  DBUG_ASSERT(setup_ptr != NULL);
+  DBUG_ASSERT(make_unique_ptr != NULL);
+}
 /*
   Avgerage
 */
@@ -1933,8 +1987,9 @@
 }
 
 
-void Item_sum_count::reset_field()
+void Item_sum_count::plain_reset_field()
 {
+  DBUG_ASSERT(!distinct);
   char *res=result_field->ptr;
   longlong nr=0;
 
@@ -2042,8 +2097,9 @@
 }
 
 
-void Item_sum_count::update_field()
+void Item_sum_count::plain_update_field()
 {
+  DBUG_ASSERT(!distinct);
   longlong nr;
   char *res=result_field->ptr;
 
@@ -2432,7 +2488,7 @@
 
 int composite_key_cmp(void* arg, byte* key1, byte* key2)
 {
-  Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
+  Item_sum_count* item = (Item_sum_count*)arg;
   Field **field    = item->table->field;
   Field **field_end= field + item->table->s->fields;
   uint32 *lengths=item->field_lengths;
@@ -2461,9 +2517,10 @@
 C_MODE_END
 
 
-void Item_sum_count_distinct::cleanup()
+void Item_sum_count::distinct_cleanup()
 {
-  DBUG_ENTER("Item_sum_count_distinct::cleanup");
+  DBUG_ENTER("Item_sum_count::distinct_cleanup");
+  DBUG_ASSERT(distinct);
   Item_sum_int::cleanup();
 
   /* Free objects only if we own them. */
@@ -2491,8 +2548,9 @@
 
 /* This is used by rollup to create a separate usable copy of the function */
 
-void Item_sum_count_distinct::make_unique()
+void Item_sum_count::distinct_make_unique()
 {
+  DBUG_ASSERT(distinct);
   table=0;
   original= 0;
   tree= 0;
@@ -2501,14 +2559,15 @@
 }
 
 
-Item_sum_count_distinct::~Item_sum_count_distinct()
+Item_sum_count::~Item_sum_count()
 {
   cleanup();
 }
 
 
-bool Item_sum_count_distinct::setup(THD *thd)
+bool Item_sum_count::distinct_setup(THD *thd)
 {
+  DBUG_ASSERT(distinct);
   List<Item> list;
   SELECT_LEX *select_lex= thd->lex->current_select;
 
@@ -2621,14 +2680,9 @@
 }
 
 
-Item *Item_sum_count_distinct::copy_or_same(THD* thd) 
-{
-  return new (thd->mem_root) Item_sum_count_distinct(thd, this);
-}
-
-
-void Item_sum_count_distinct::clear()
+void Item_sum_count::distinct_clear()
 {
+  DBUG_ASSERT(distinct);
   /* tree and table can be both null only if always_null */
   if (tree)
     tree->reset();
@@ -2640,8 +2694,9 @@
   }
 }
 
-bool Item_sum_count_distinct::add()
+bool Item_sum_count::distinct_add()
 {
+  DBUG_ASSERT(distinct);
   int error;
   if (always_null)
     return 0;
@@ -2670,8 +2725,9 @@
 }
 
 
-longlong Item_sum_count_distinct::val_int()
+longlong Item_sum_count::distinct_val_int()
 {
+  DBUG_ASSERT(distinct);
   DBUG_ASSERT(fixed == 1);
   if (!table)					// Empty query
     return LL(0);

--- 1.98/sql/item_sum.h	2006-02-28 18:29:47 +02:00
+++ 1.99/sql/item_sum.h	2006-04-07 14:07:07 +03:00
@@ -464,40 +464,49 @@
   Item *copy_or_same(THD* thd) { return new Item_sum_avg_distinct(thd, this); }
 };
 
+class TMP_TABLE_PARAM;
+
 
 class Item_sum_count :public Item_sum_int
 {
+  protected:
+  /* common part */
+  bool distinct;
+
+  /* pointers */
+  table_map (Item_sum_count::*used_tables_ptr) () const;
+  bool (Item_sum_count::*const_item_ptr) () const;
+  enum Sumfunctype (Item_sum_count::*sum_func_ptr) () const;
+  void (Item_sum_count::*clear_ptr) ();
+  void (Item_sum_count::*no_rows_in_result_ptr) ();
+  bool (Item_sum_count::*add_ptr) ();
+  longlong (Item_sum_count::*val_int_ptr) ();
+  void (Item_sum_count::*reset_field_ptr) ();
+  void (Item_sum_count::*cleanup_ptr) ();
+  void (Item_sum_count::*update_field_ptr) ();
+  const char * (Item_sum_count::*func_name_ptr) () const;
+  bool (Item_sum_count::*setup_ptr) (THD *thd);
+  void (Item_sum_count::*make_unique_ptr) ();
+
+  /* The plain part */
   longlong count;
   table_map used_table_cache;
 
-  public:
-  Item_sum_count(Item *item_par)
-    :Item_sum_int(item_par),count(0),used_table_cache(~(table_map) 0)
-  {}
-  Item_sum_count(THD *thd, Item_sum_count *item)
-    :Item_sum_int(thd, item), count(item->count),
-     used_table_cache(item->used_table_cache)
-  {}
-  table_map used_tables() const { return used_table_cache; }
-  bool const_item() const { return !used_table_cache; }
-  enum Sumfunctype sum_func () const { return COUNT_FUNC; }
-  void clear();
-  void no_rows_in_result() { count=0; }
-  bool add();
-  void make_const(longlong count_arg) { count=count_arg; used_table_cache=0; }
-  longlong val_int();
-  void reset_field();
-  void cleanup();
-  void update_field();
-  const char *func_name() const { return "count("; }
-  Item *copy_or_same(THD* thd);
-};
-
-
-class TMP_TABLE_PARAM;
+  table_map plain_used_tables() const { return used_table_cache; }
+  bool plain_const_item() const { return !used_table_cache; }
+  enum Sumfunctype plain_sum_func () const { return COUNT_FUNC; }
+  void plain_clear();
+  void plain_no_rows_in_result() { count=0; }
+  bool plain_add();
+  longlong plain_val_int();
+  void plain_reset_field();
+  void plain_cleanup();
+  void plain_update_field();
+  const char *plain_func_name() const { return "count("; }
+  bool plain_setup(THD *thd) { return Item_sum_int::setup(thd); }
+  void plain_make_unique() { Item_sum_int::make_unique(); }
 
-class Item_sum_count_distinct :public Item_sum_int
-{
+  /* the distinct part */
   TABLE *table;
   uint32 *field_lengths;
   TMP_TABLE_PARAM *tmp_table_param;
@@ -511,43 +520,98 @@
     Following is 0 normal object and pointer to original one for copy 
     (to correctly free resources)
   */
-  Item_sum_count_distinct *original;
+  Item_sum_count *original;
   uint tree_key_length;
 
 
   bool always_null;		// Set to 1 if the result is always NULL
 
-
   friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
   friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
 
-public:
-  Item_sum_count_distinct(List<Item> &list)
-    :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
-     tree(0), original(0), always_null(FALSE)
-  { quick_group= 0; }
-  Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
-    :Item_sum_int(thd, item), table(item->table),
-     field_lengths(item->field_lengths),
+
+  table_map distinct_used_tables() const { return Item_sum_int::used_tables(); }
+  bool distinct_const_item() const { return Item_sum_int::const_item(); }
+  enum Sumfunctype distinct_sum_func () const { return COUNT_DISTINCT_FUNC; }
+  void distinct_clear();
+  void distinct_no_rows_in_result() {}
+  bool distinct_add();
+  longlong distinct_val_int();
+  void distinct_reset_field() { return ;}		// Never called
+  void distinct_cleanup();
+  void distinct_update_field() { return ; }		// Never called
+  const char *distinct_func_name() const { return "count(distinct "; }
+  bool distinct_setup(THD *thd);
+  void distinct_make_unique();
+
+  public:
+  Item_sum_count(THD *thd, Item_sum_count *item)
+    /* common part */
+    :Item_sum_int(thd, item),
+    /* the plain part */
+     count(item->count), used_table_cache(item->used_table_cache),
+    /* the distinct part */
+     table(item->table), field_lengths(item->field_lengths),
      tmp_table_param(item->tmp_table_param),
      tree(item->tree), original(item), tree_key_length(item->tree_key_length),
      always_null(item->always_null)
-  {}
-  ~Item_sum_count_distinct();
+  {
+    set_distinct(item->distinct);
+  }
+
+  Item_sum_count(List<Item> &list, bool is_distinct)
+    /* common part */
+    :Item_sum_int(list),
+    /* the plain part */
+    count(0),used_table_cache(~(table_map) 0),
+    /* the distinct part */
+    table(0), field_lengths(0), tmp_table_param(0),
+    tree(0), original(0), always_null(FALSE)
+  {
+    set_distinct (is_distinct);
+  }
+
+  Item_sum_count(Item *item_par, bool is_distinct)
+    /* common part */
+    :Item_sum_int(item_par), distinct(is_distinct),
+    /* the plain part */
+     count(0),used_table_cache(~(table_map) 0),
+    /* the distinct part */
+    table(0), field_lengths(0), tmp_table_param(0),
+    tree(0), original(0), always_null(FALSE)
+  {
+    set_distinct (is_distinct);
+  }
+
+  ~Item_sum_count();
+
+
+  /* proxy methods */
+  table_map used_tables() const { return (this->*used_tables_ptr)(); }
+  bool const_item() const { return (this->*const_item_ptr)(); }
+  enum Sumfunctype sum_func () const { return (this->*sum_func_ptr)(); }
+  void clear() { (this->*clear_ptr)(); }
+  void no_rows_in_result() { (this->*no_rows_in_result_ptr)(); }
+  bool add() { return (this->*add_ptr)(); }
+  longlong val_int() { return (this->*val_int_ptr)(); }
+  void reset_field() { (this->*reset_field_ptr)(); }
+  void cleanup() { (this->*cleanup_ptr)(); }
+  void update_field() { (this->*update_field_ptr)(); }
+  const char *func_name() const { return (this->*func_name_ptr)(); }
+  bool setup(THD *thd) { return (this->*setup_ptr)(thd); }
+  void make_unique() { (this->*make_unique_ptr)(); }
+
+  void make_const(longlong count_arg)
+  {
+    DBUG_ASSERT(!distinct);
+    count=count_arg;
+    used_table_cache=0;
+  }
 
-  void cleanup();
-
-  enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
-  void clear();
-  bool add();
-  longlong val_int();
-  void reset_field() { return ;}		// Never called
-  void update_field() { return ; }		// Never called
-  const char *func_name() const { return "count(distinct "; }
-  bool setup(THD *thd);
-  void make_unique();
   Item *copy_or_same(THD* thd);
-  void no_rows_in_result() {}
+
+  /* class methods */
+  void set_distinct(bool is_distinct);
 };
 
 

--- 1.205/sql/opt_range.cc	2006-02-25 20:35:08 +02:00
+++ 1.206/sql/opt_range.cc	2006-04-07 14:07:07 +03:00
@@ -1705,7 +1705,7 @@
 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
 {
 private:
-  bool have_min, have_max;
+  bool have_min, have_max, have_agg_distinct;
   KEY_PART_INFO *min_max_arg_part;
   uint group_prefix_len;
   uint used_key_parts;
@@ -1722,6 +1722,7 @@
   ha_rows quick_prefix_records;
 public:
   TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
+                    bool have_agg_distinct_arg,
                     KEY_PART_INFO *min_max_arg_part_arg,
                     uint group_prefix_len_arg, uint used_key_parts_arg,
                     uint group_key_parts_arg, KEY *index_info_arg,
@@ -1730,6 +1731,7 @@
                     SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
                     uint param_idx_arg, ha_rows quick_prefix_records_arg)
   : have_min(have_min_arg), have_max(have_max_arg),
+    have_agg_distinct(have_agg_distinct_arg),
     min_max_arg_part(min_max_arg_part_arg),
     group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
     group_key_parts(group_key_parts_arg), index_info(index_info_arg),
@@ -8325,21 +8327,30 @@
   ORDER *tmp_group;
   Item *item;
   Item_field *item_field;
+  bool is_agg_distinct;
+  List<Item_field> agg_distinct_flds;
+
   DBUG_ENTER("get_best_group_min_max");
 
   /* Perform few 'cheap' tests whether this access method is applicable. */
   if (!join || (thd->lex->sql_command != SQLCOM_SELECT))
     DBUG_RETURN(NULL);        /* This is not a select statement. */
   if ((join->tables != 1) ||  /* The query must reference one table. */
-      ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
-       (!join->select_distinct)) ||
       (thd->lex->select_lex.olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
     DBUG_RETURN(NULL);
   if (table->s->keys == 0)        /* There are no indexes to use. */
     DBUG_RETURN(NULL);
 
-  /* Analyze the query in more detail. */
   List_iterator<Item> select_items_it(join->fields_list);
+  is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds);
+
+
+  if (!is_agg_distinct)
+  {
+    if ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
+         (!join->select_distinct))
+      DBUG_RETURN(NULL);
+    /* Analyze the query in more detail. */
 
   /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
   if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
@@ -8380,12 +8391,13 @@
     }
   }
 
-  /* Check (GA4) - that there are no expressions among the group attributes. */
+    /* Check (GA4) -that there are no expressions among the group attributes. */
   for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
   {
     if ((*tmp_group->item)->type() != Item::FIELD_ITEM)
       DBUG_RETURN(NULL);
   }
+  }
 
   /*
     Check that table has at least one compound index such that the conditions
@@ -8495,12 +8507,19 @@
       Later group_fields_array of ORDER objects is used to convert the query
       to a GROUP query.
     */
-    else if (join->select_distinct)
+    else if (join->select_distinct ||
+             is_agg_distinct)
     {
-      select_items_it.rewind();
-      cur_used_key_parts.clear_all();
       uint max_key_part= 0;
-      while ((item= select_items_it++))
+      cur_used_key_parts.clear_all();
+      if (!is_agg_distinct)
+      {
+        select_items_it.rewind();
+      }
+
+      List_iterator<Item_field> agg_distinct_flds_it (agg_distinct_flds);
+      while (NULL != (item = (is_agg_distinct ?
+             (Item *) agg_distinct_flds_it++ : select_items_it++)))
       {
         item_field= (Item_field*) item; /* (SA5) already checked above. */
         /* Find the order of the key part in the index. */
@@ -8511,7 +8530,8 @@
         */
         if (cur_used_key_parts.is_set(key_part_nr))
           continue;
-        if (key_part_nr < 1 || key_part_nr > join->fields_list.elements)
+        if (key_part_nr < 1 ||
+            (!is_agg_distinct && key_part_nr > join->fields_list.elements))
           goto next_index;
         cur_part= cur_index_info->key_part + key_part_nr - 1;
         cur_group_prefix_len+= cur_part->store_length;
@@ -8657,7 +8677,8 @@
 
   /* The query passes all tests, so construct a new TRP object. */
   read_plan= new (param->mem_root)
-                 TRP_GROUP_MIN_MAX(have_min, have_max, min_max_arg_part,
+                 TRP_GROUP_MIN_MAX(have_min, have_max, is_agg_distinct,
+                                   min_max_arg_part,
                                    group_prefix_len, used_key_parts,
                                    group_key_parts, index_info, index,
                                    key_infix_len,
@@ -9154,7 +9175,8 @@
 
   quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
                                         param->thd->lex->select_lex.join,
-                                        have_min, have_max, min_max_arg_part,
+                                        have_min, have_max,
+                                        have_agg_distinct, min_max_arg_part,
                                         group_prefix_len, used_key_parts,
                                         index_info, index, read_cost, records,
                                         key_infix_len, key_infix,
@@ -9244,7 +9266,7 @@
 
 QUICK_GROUP_MIN_MAX_SELECT::
 QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
-                           bool have_max_arg,
+                           bool have_max_arg, bool have_agg_distinct_arg,
                            KEY_PART_INFO *min_max_arg_part_arg,
                            uint group_prefix_len_arg,
                            uint used_key_parts_arg, KEY *index_info_arg,
@@ -9253,10 +9275,10 @@
                            byte *key_infix_arg, MEM_ROOT *parent_alloc)
   :join(join_arg), index_info(index_info_arg),
    group_prefix_len(group_prefix_len_arg), have_min(have_min_arg),
-   have_max(have_max_arg), seen_first_key(FALSE),
-   min_max_arg_part(min_max_arg_part_arg), key_infix(key_infix_arg),
-   key_infix_len(key_infix_len_arg), min_functions_it(NULL),
-   max_functions_it(NULL)
+   have_max(have_max_arg), have_agg_distinct(have_agg_distinct_arg),
+   seen_first_key(FALSE), min_max_arg_part(min_max_arg_part_arg),
+   key_infix(key_infix_arg), key_infix_len(key_infix_len_arg),
+   min_functions_it(NULL), max_functions_it(NULL)
 {
   head=       table;
   file=       head->file;

--- 1.60/sql/opt_range.h	2006-02-02 15:48:05 +02:00
+++ 1.61/sql/opt_range.h	2006-04-07 14:07:07 +03:00
@@ -606,6 +606,7 @@
   byte *last_prefix;     /* Prefix of the last group for detecting EOF. */
   bool have_min;         /* Specify whether we are computing */
   bool have_max;         /*   a MIN, a MAX, or both.         */
+  bool have_agg_distinct;/*   aggregate_function(DISTINCT ...).  */
   bool seen_first_key;   /* Denotes whether the first key was retrieved.*/
   KEY_PART_INFO *min_max_arg_part; /* The keypart of the only argument field */
                                    /* of all MIN/MAX functions.              */
@@ -635,7 +636,8 @@
   void update_max_result();
 public:
   QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join, bool have_min,
-                             bool have_max, KEY_PART_INFO *min_max_arg_part,
+                             bool have_max, bool have_agg_distinct,
+                             KEY_PART_INFO *min_max_arg_part,
                              uint group_prefix_len, uint used_key_parts,
                              KEY *index_info, uint use_index, double read_cost,
                              ha_rows records, uint key_infix_len,
@@ -654,6 +656,8 @@
 #ifndef DBUG_OFF
   void dbug_dump(int indent, bool verbose);
 #endif
+  bool is_agg_distinct() { return have_agg_distinct; }
+
 };
 
 

--- 1.391/sql/sql_select.cc	2006-02-22 10:27:17 +02:00
+++ 1.392/sql/sql_select.cc	2006-04-07 14:07:07 +03:00
@@ -1063,7 +1063,36 @@
     join_tab element of the plan for its access method.
   */
   if (join_tab->is_using_loose_index_scan())
+  {
     tmp_table_param.precomputed_group_by= TRUE;
+    /*
+       if a AGGFN(DISTINCT) optimization is in effect
+       and the compiler desided to use the quick index
+       with it, we must change the type of the Item_sum
+       with it's non-distinct counterpart , e.g:
+       COUNT_DISTINCT_FUNC->COUNT_FUNC
+    */
+    if (join_tab->is_using_agg_loose_index_scan())
+    {
+      Item_sum *item= sum_funcs[0];
+
+      switch (item->sum_func())
+      {
+      case Item_sum::COUNT_DISTINCT_FUNC:
+        ((Item_sum_count *)item)->set_distinct(false);
+        break;
+      default:
+        DBUG_ASSERT(0);
+        DBUG_RETURN(1);
+      }
+      /*
+         must also turn back on the grouping
+         because the loose index scan runtime will not
+         calculate other aggregates except MIN/MAX
+      */
+      tmp_table_param.precomputed_group_by= FALSE;
+    }
+  }
 
   /* Create a tmp table if distinct or if the sort is too complicated */
   if (need_tmp)
@@ -3212,6 +3241,72 @@
 
 
 /*
+  Check for the presence of AGGFN(DISTINCT a) queries that may be subject
+  to loose index scan
+
+  SYNOPSIS
+    is_indexed_agg_distinct()
+    join
+    out_args
+
+  DESCRIPTION
+    check if the query is a subject to AGGFN(DISTINCT) using
+    loose index scan (QUICK_GROUP_MIN_MAX_SELECT).
+    Optionally (if out_args is supplied) will push the
+    arguments of AGGFN(DISTINCT) to the list
+
+  RETURN
+    true if the query qualifies for AGGFN(DISTINCT)
+    optimization
+*/
+bool
+is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
+{
+  if (join->tables != 1 ||    /* the query references more than 1 table */
+      join->group_list ||     /* it is a GROUP BY */
+      join->select_distinct)  /* or a DISTINCT */
+    return false;
+
+  if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
+    return false;
+  if (!join->sum_funcs[0] || join->sum_funcs[1]) /* more than 1 aggregate */
+    return false;
+
+  Item_sum *sum_item= join->sum_funcs[0];
+
+  /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
+  if (sum_item->sum_func() != Item_sum::COUNT_DISTINCT_FUNC
+      /*|| sum_item->arg_count != 1*/)
+    return false;
+
+  /* Select list has more that 1 or no elements */
+  if (join->fields_list.elements != 1)
+    return false;
+
+  /* the AGGFN(DISTINCT) is alone in the select list */
+  Item *sel_expr= join->fields_list.head();
+  if (!sel_expr->eq(sum_item, 1))
+    return false;
+
+  Item **args_end= sum_item->args + sum_item->arg_count;
+  Item **expr;
+  for (expr= sum_item->args; expr < args_end; expr++)
+  {
+    /* The AGGFN(DISTINCT) arg is not an attribute? */
+    if ((*expr)->type() != Item::FIELD_ITEM)
+      return false;
+
+    /* if we came to that point the AGGFN(DISTINCT) loose index scan
+       optimization is applicable */
+    if (out_args)
+      out_args->push_back((Item_field *) (*expr));
+  }
+
+  return true;
+}
+
+
+/*
   Discover the indexes that can be used for GROUP BY or DISTINCT queries.
 
   SYNOPSIS
@@ -3253,6 +3348,10 @@
     Item *item;
     while ((item= select_items_it++))
       item->walk(&Item::collect_item_field_processor, (byte*) &indexed_fields);
+  }
+  else if (is_indexed_agg_distinct(join, &indexed_fields))
+  {
+    join->sort_and_group= 1;
   }
   else
     return;

--- 1.106/sql/sql_select.h	2006-02-27 17:58:32 +02:00
+++ 1.107/sql/sql_select.h	2006-04-07 14:07:07 +03:00
@@ -148,6 +148,11 @@
             (select->quick->get_type() ==
              QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
   }
+  inline bool is_using_agg_loose_index_scan ()
+  {
+    return (is_using_loose_index_scan() &&
+            ((QUICK_GROUP_MIN_MAX_SELECT *)select->quick)->is_agg_distinct());
+  }
 } JOIN_TAB;
 
 enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
@@ -426,6 +431,8 @@
                                    const char *name, TABLE *table,
                                    Item_field *item, uint convert_blob_length);
                                                                       
+bool is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args);
+
 /* functions from opt_sum.cc */
 bool simple_pred(Item_func *func_item, Item **args, bool *inv_order);
 int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds);

--- 1.464/sql/sql_yacc.yy	2006-03-08 20:06:08 +02:00
+++ 1.465/sql/sql_yacc.yy	2006-04-07 14:07:07 +03:00
@@ -6736,15 +6736,15 @@
 	| BIT_XOR  '(' in_sum_expr ')'
 	  { $$=new Item_sum_xor($3); }
 	| COUNT_SYM '(' opt_all '*' ')'
-	  { $$=new Item_sum_count(new Item_int((int32) 0L,1)); }
+	  { $$=new Item_sum_count(new Item_int((int32) 0L,1), false); }
 	| COUNT_SYM '(' in_sum_expr ')'
-	  { $$=new Item_sum_count($3); }
+	  { $$=new Item_sum_count($3, false); }
 	| COUNT_SYM '(' DISTINCT
 	  { Select->in_sum_expr++; }
 	   expr_list
 	  { Select->in_sum_expr--; }
 	  ')'
-	  { $$=new Item_sum_count_distinct(* $5); }
+	  { $$=new Item_sum_count(* $5, true); }
 	| GROUP_UNIQUE_USERS '(' text_literal ',' NUM ',' NUM ',' in_sum_expr ')'
 	  { $$= new Item_sum_unique_users($3,atoi($5.str),atoi($7.str),$9); }
 	| MIN_SYM '(' in_sum_expr ')'

--- 1.7/mysql-test/r/bench_count_distinct.result	2004-07-20 08:48:24 +03:00
+++ 1.8/mysql-test/r/bench_count_distinct.result	2006-04-07 14:07:06 +03:00
@@ -5,7 +5,7 @@
 100
 explain extended select count(distinct n) from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	NULL	n	4	NULL	200	Using index
+1	SIMPLE	t1	range	NULL	n	4	NULL	10	Using index for group-by
 Warnings:
 Note	1003	select count(distinct `test`.`t1`.`n`) AS `count(distinct n)` from `test`.`t1`
 drop table t1;

--- 1.20/mysql-test/r/group_min_max.result	2006-02-06 21:35:05 +02:00
+++ 1.21/mysql-test/r/group_min_max.result	2006-04-07 14:07:06 +03:00
@@ -1796,13 +1796,13 @@
 a
 explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	NULL	idx_t1_2	147	NULL	128	Using where; Using index
+1	SIMPLE	t1	range	NULL	idx_t1_1	147	NULL	17	Using where; Using index for group-by
 explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	idx_t1_1	163	NULL	128	Using where; Using index
 explain select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	index	idx_t1_0,idx_t1_1,idx_t1_2	idx_t1_2	147	NULL	128	Using where; Using index
+1	SIMPLE	t1	range	idx_t1_0,idx_t1_1,idx_t1_2	idx_t1_1	147	NULL	14	Using where; Using index for group-by
 explain select count(distinct b) from t1 where (a2 >= 'b') and (b = 'a');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index	NULL	idx_t1_2	147	NULL	128	Using where; Using index
@@ -2069,4 +2069,49 @@
 SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a;
 a
 BB
+DROP TABLE t1;
+CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b));
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	a	5	NULL	9	Using index for group-by
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	a	10	NULL	9	Using index for group-by
+SELECT COUNT(DISTINCT a,b) FROM t1;
+COUNT(DISTINCT a,b)
+16
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	NULL	a	10	NULL	9	Using index for group-by
+SELECT COUNT(DISTINCT b,a) FROM t1;
+COUNT(DISTINCT b,a)
+16
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	10	NULL	16	Using index
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	10	NULL	16	Using index
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	10	NULL	16	Using index
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	10	NULL	16	Using index
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 having COUNT(DISTINCT b) < 10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	NULL	a	10	NULL	16	Using index
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1_1	index	NULL	a	10	NULL	16	Using index; Using temporary; Using filesort
+1	SIMPLE	t1_2	index	NULL	a	10	NULL	16	Using index
+SELECT COUNT(DISTINCT a), 12 FROM t1;
+COUNT(DISTINCT a)	12
+2	12
 DROP TABLE t1;

--- 1.18/mysql-test/t/group_min_max.test	2006-02-06 21:35:06 +02:00
+++ 1.19/mysql-test/t/group_min_max.test	2006-04-07 14:07:07 +03:00
@@ -1,5 +1,7 @@
 #
-# Test file for WL#1724 (Min/Max Optimization for Queries with Group By Clause).
+# Test file for :
+# WL#1724 (Min/Max Optimization for Queries with Group By Clause).
+# WL#3220 (Loose index scan for COUNT DISTINCT)
 # The queries in this file test query execution via QUICK_GROUP_MIN_MAX_SELECT.
 #
 
@@ -734,5 +736,37 @@
 SELECT DISTINCT a FROM t1 WHERE a='BB';
 SELECT DISTINCT a FROM t1 WHERE a LIKE 'B%';
 SELECT a FROM t1 WHERE a LIKE 'B%' GROUP BY a;
+
+DROP TABLE t1;
+
+
+# WL#3220 (Loose index scan for COUNT DISTINCT)
+CREATE TABLE t1 (a INT, b INT, c INT, KEY (a,b));
+INSERT INTO t1 VALUES (1,1,1), (1,2,1), (1,3,1), (1,4,1);
+INSERT INTO t1 SELECT a, b + 4, 1 FROM t1;
+INSERT INTO t1 SELECT a + 1, b, 1 FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1;
+SELECT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1;
+SELECT COUNT(DISTINCT a,b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1;
+SELECT COUNT(DISTINCT b,a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT b) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 GROUP BY a;
+
+EXPLAIN SELECT DISTINCT COUNT(DISTINCT a) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a, b + 0) FROM t1;
+
+EXPLAIN SELECT COUNT(DISTINCT a) FROM t1 having COUNT(DISTINCT b) < 10;
+
+EXPLAIN SELECT COUNT(DISTINCT t1_1.a) FROM t1 t1_1, t1 t1_2 GROUP BY t1_1.a;
+
+SELECT COUNT(DISTINCT a), 12 FROM t1;
 
 DROP TABLE t1;
Thread
bk commit into 5.2 tree (gkodinov:1.2154)kgeorge7 Apr