List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:October 31 2006 5:51pm
Subject:bk commit into 5.0 tree (sergefp:1.2290) BUG#8804
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-10-31 20:51:09+03:00, sergefp@stripped +13 -0
  BUG#8804: wrong results for NULL IN (SELECT ...)
  Evaluate "NULL IN (SELECT ...)" in a special way: Disable pushed-down 
  conditions and their "consequences": 
   = Do full table scans instead of unique_[index_subquery] lookups.
   = Change appropriate "ref_or_null" accesses to full table scans in
     subquery's joins.
  Also cache value of NULL IN (SELECT ...) if the SELECT is not correlated 
  wrt any upper select.

  mysql-test/r/subselect.result@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +5 -5
    BUG#8804: wrong results for NULL IN (SELECT ...): 
     - Updated test results

  mysql-test/r/subselect3.result@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +153 -0
    New BitKeeper file ``mysql-test/r/subselect3.result''

  mysql-test/r/subselect3.result@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +0 -0

  mysql-test/t/subselect3.test@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +137 -0
    New BitKeeper file ``mysql-test/t/subselect3.test''

  mysql-test/t/subselect3.test@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +0 -0

  sql/item.h@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +10 -0
    BUG#8804: wrong results for NULL IN (SELECT ...): 
     - Added comments

  sql/item_cmpfunc.cc@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +33 -1
    BUG#8804: wrong results for NULL IN (SELECT ...): 
    Made Item_in_optimizer to:
    - cache the value of "NULL IN (uncorrelated select)"
    - Turn off pushed-down predicates when evaluating "NULL IN (SELECT ...)"
    

  sql/item_cmpfunc.h@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +39 -9
    BUG#8804: wrong results for NULL IN (SELECT ...): 
    - Made Item_in_optimizer cache the value of "NULL IN (uncorrelated select)"
    - Added comments

  sql/item_subselect.cc@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +353 -53
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - When needed, wrap the predicates we push into subquery into an 
      Item_func_trig_cond so we're able to turn them off when evaluating 
      NULL IN (SELECT ...).
    - Added code to evaluate NULL IN (SELECT ...) in a special way:
      = In [unique_]index_subquery, do full table scan to see if there 
        are any rows.
      = For other subqueries, change ref[_or_null] to ALL if the
        ref[_or_null] was created from pushed-down predicate.
     

  sql/item_subselect.h@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +62 -8
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - Added Item_subselect::is_correlated
    - Added comments

  sql/records.cc@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +3 -2
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - Make rr_sequential() non-static

  sql/sql_lex.cc@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +3 -1
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - Added st_select_lex::is_correlated and Item_subselect::is_correlated.

  sql/sql_lex.h@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +3 -1
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - Added st_select_lex::is_correlated

  sql/sql_select.cc@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +91 -21
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - Added KEY_FIELD::outer_ref to keep track of which ref accesses are 
      created from predicates that were pushed down into the subquery.

  sql/sql_select.h@stripped, 2006-10-31 20:42:45+03:00, sergefp@stripped +8 -3
    BUG#8804: wrong results for NULL IN (SELECT ...):
    - Added KEYUSE::outer_ref

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	pylon.mylan
# Root:	/home/psergey/mysql-5.0-bug8804-r4

--- 1.209/sql/item.h	2006-10-31 20:51:13 +03:00
+++ 1.210/sql/item.h	2006-10-31 20:51:13 +03:00
@@ -1959,6 +1959,16 @@
 
 class Item_in_subselect;
 
+
+/*
+  An object of this class:
+   - Converts val_XXX() calls to ref->val_XXX_result() calls, like Item_ref.
+   - Sets owner->was_null=TRUE if it has returned a NULL value from any
+     val_XXX() function. This allows to inject an Item_ref_null_helper
+     object into subquery and then check if the subquery has produced a row
+     with NULL value.
+*/
+
 class Item_ref_null_helper: public Item_ref
 {
 protected:

--- 1.223/sql/item_cmpfunc.cc	2006-10-31 20:51:13 +03:00
+++ 1.224/sql/item_cmpfunc.cc	2006-10-31 20:51:13 +03:00
@@ -786,9 +786,41 @@
 {
   DBUG_ASSERT(fixed == 1);
   cache->store(args[0]);
+  
   if (cache->null_value)
   {
-    null_value= 1;
+    if (((Item_in_subselect*)args[1])->is_top_level_item())
+    {
+      /*
+        We're evaluating "NULL IN (SELECT ...)". The result can be NULL or
+        FALSE, and we can return one instead of another. Just return NULL.
+      */
+      null_value= 1;
+    }
+    else
+    {
+      if (!((Item_in_subselect*)args[1])->is_correlated &&
+          result_for_null_param != UNKNOWN)
+      {
+        /* Use cached value from previous execution */
+        null_value= result_for_null_param;
+      }
+      else
+      {
+        /*
+          We're evaluating "NULL IN (SELECT ...)". The result is:
+             FALSE if SELECT produces an empty set, or
+             NULL  otherwise.
+          We disable the predicates we've pushed down into subselect, run the
+          subselect and see if it has produced any rows.
+        */
+        ((Item_in_subselect*)args[1])->enable_pushed_conds= FALSE;
+        longlong tmp= args[1]->val_bool_result();
+        result_for_null_param= null_value= 
+          !((Item_in_subselect*)args[1])->engine->no_rows();
+        ((Item_in_subselect*)args[1])->enable_pushed_conds= TRUE;
+      }
+    }
     return 0;
   }
   bool tmp= args[1]->val_bool_result();

--- 1.133/sql/item_cmpfunc.h	2006-10-31 20:51:13 +03:00
+++ 1.134/sql/item_cmpfunc.h	2006-10-31 20:51:13 +03:00
@@ -100,25 +100,44 @@
 };
 
 class Item_cache;
+#define UNKNOWN ((my_bool)-1)
+
+
+/*
+  Item_in_optimizer(left_expr, Item_in_subselect(...))
+
+  Item_in_optimizer is used to wrap an instance of Item_in_subselect. This
+  class does the following:
+   - Evaluate the left expression and store it in Item_cache_* object (to
+     avoid re-evaluating it many times during subquery execution)
+   - Shortcut the evaluation of "NULL IN (...)" to NULL in the cases where we
+     don't care if the result is NULL or FALSE.
+
+  NOTE
+    It is not quite clear why the above listed functionality should be
+    placed into a separate class called 'Item_in_optimizer'.
+*/
+
 class Item_in_optimizer: public Item_bool_func
 {
 protected:
   Item_cache *cache;
   bool save_cache;
+  /* 
+    Stores the value of "NULL IN (SELECT ...)" for uncorrelated subqueries:
+      UNKNOWN - "NULL in (SELECT ...)" has not yet been evaluated
+      FALSE   - result is FALSE
+      TRUE    - result is NULL
+  */
+  my_bool result_for_null_param;
 public:
   Item_in_optimizer(Item *a, Item_in_subselect *b):
-    Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0), save_cache(0)
+    Item_bool_func(a, my_reinterpret_cast(Item *)(b)), cache(0),
+    save_cache(0), result_for_null_param(UNKNOWN)
   {}
   bool fix_fields(THD *, Item **);
   bool fix_left(THD *thd, Item **ref);
   bool is_null();
-  /*
-    Item_in_optimizer item is special boolean function. On value request 
-    (one of val, val_int or val_str methods) it evaluate left expression 
-    of IN by storing it value in cache item (one of Item_cache* items), 
-    then it test cache is it NULL. If left expression (cache) is NULL then
-    Item_in_optimizer return NULL, else it evaluate Item_in_subselect.
-  */
   longlong val_int();
   void cleanup();
   const char *func_name() const { return "<in_optimizer>"; }
@@ -256,9 +275,11 @@
 class Item_maxmin_subselect;
 
 /*
+  trigcond<param>(arg) ::= param? arg : TRUE
+
   The class Item_func_trig_cond is used for guarded predicates 
   which are employed only for internal purposes.
-  A guarded predicates is an object consisting of an a regular or
+  A guarded predicate is an object consisting of an a regular or
   a guarded predicate P and a pointer to a boolean guard variable g. 
   A guarded predicate P/g is evaluated to true if the value of the
   guard g is false, otherwise it is evaluated to the same value that
@@ -276,6 +297,10 @@
   Objects of this class are built only for query execution after
   the execution plan has been already selected. That's why this
   class needs only val_int out of generic methods. 
+ 
+  Current uses of Item_func_trig_cond objects:
+   - To wrap selection conditions when executing outer joins
+   - To wrap condition that is pushed down into subquery
 */
 
 class Item_func_trig_cond: public Item_bool_func
@@ -1019,6 +1044,11 @@
 /* Functions used by HAVING for rewriting IN subquery */
 
 class Item_in_subselect;
+
+/* 
+  This is like IS NOT NULL but it also remembers if it ever has
+  encountered a NULL.
+*/
 class Item_is_not_null_test :public Item_func_isnull
 {
   Item_in_subselect* owner;

--- 1.42/sql/records.cc	2006-10-31 20:51:13 +03:00
+++ 1.43/sql/records.cc	2006-10-31 20:51:13 +03:00
@@ -20,7 +20,7 @@
 #include "mysql_priv.h"
 
 static int rr_quick(READ_RECORD *info);
-static int rr_sequential(READ_RECORD *info);
+int rr_sequential(READ_RECORD *info);
 static int rr_from_tempfile(READ_RECORD *info);
 static int rr_unpack_from_tempfile(READ_RECORD *info);
 static int rr_unpack_from_buffer(READ_RECORD *info);
@@ -184,6 +184,7 @@
 } /* init_read_record */
 
 
+
 void end_read_record(READ_RECORD *info)
 {                   /* free cache if used */
   if (info->cache)
@@ -289,7 +290,7 @@
 }
 
 
-static int rr_sequential(READ_RECORD *info)
+int rr_sequential(READ_RECORD *info)
 {
   int tmp;
   while ((tmp=info->file->rnd_next(info->record)))

--- 1.202/sql/sql_lex.cc	2006-10-31 20:51:13 +03:00
+++ 1.203/sql/sql_lex.cc	2006-10-31 20:51:13 +03:00
@@ -1179,7 +1179,7 @@
   select_limit= 0;      /* denotes the default limit = HA_POS_ERROR */
   offset_limit= 0;      /* denotes the default offset = 0 */
   with_sum_func= 0;
-
+  is_correlated= 0;
 }
 
 /*
@@ -1373,6 +1373,8 @@
       SELECT_LEX_UNIT *munit= s->master_unit();
       munit->uncacheable|= UNCACHEABLE_DEPENDENT;
     }
+  is_correlated= TRUE;
+  this->master_unit()->item->is_correlated= TRUE;
 }
 
 bool st_select_lex_node::set_braces(bool value)      { return 1; }

--- 1.229/sql/sql_lex.h	2006-10-31 20:51:13 +03:00
+++ 1.230/sql/sql_lex.h	2006-10-31 20:51:13 +03:00
@@ -470,7 +470,7 @@
   void set_thd(THD *thd_arg) { thd= thd_arg; }
 
   friend void lex_start(THD *thd, uchar *buf, uint length);
-  friend int subselect_union_engine::exec();
+  friend int subselect_union_engine::exec(bool);
 
   List<Item> *get_unit_column_types();
 };
@@ -562,6 +562,8 @@
     query processing end even if we use temporary table
   */
   bool subquery_in_having;
+  /* TRUE <=> this SELECT is correlated w.r.t. some ancestor select */
+  bool is_correlated;
   /*
     This variable is required to ensure proper work of subqueries and
     stored procedures. Generally, one should use the states of

--- 1.467/sql/sql_select.cc	2006-10-31 20:51:13 +03:00
+++ 1.468/sql/sql_select.cc	2006-10-31 20:51:14 +03:00
@@ -158,8 +158,8 @@
 static int join_read_prev(READ_RECORD *info);
 static int join_ft_read_first(JOIN_TAB *tab);
 static int join_ft_read_next(READ_RECORD *info);
-static int join_read_always_key_or_null(JOIN_TAB *tab);
-static int join_read_next_same_or_null(READ_RECORD *info);
+int join_read_always_key_or_null(JOIN_TAB *tab);
+int join_read_next_same_or_null(READ_RECORD *info);
 static COND *make_cond_for_table(COND *cond,table_map table,
 				 table_map used_table);
 static Item* part_of_refkey(TABLE *form,Field *field);
@@ -512,11 +512,12 @@
   DBUG_RETURN(-1);				/* purecov: inspected */
 }
 
+
 /*
   test if it is known for optimisation IN subquery
 
-  SYNOPSYS
-    JOIN::test_in_subselect
+  SYNOPSIS
+    JOIN::test_in_subselect()
     where - pointer for variable in which conditions should be
             stored if subquery is known
 
@@ -551,6 +552,35 @@
 
 
 /*
+  Check if the passed HAVING clause is a clause added by subquery optimizer
+
+  SYNOPSIS
+    is_having_subq_predicates()
+      having  Having clause
+
+  RETURN
+    TRUE   The passed HAVING clause was added by the subquery optimizer
+    FALSE  Otherwise
+*/
+
+bool is_having_subq_predicates(Item *having)
+{
+  if (having->type() == Item::FUNC_ITEM)
+  {
+    if (((Item_func *) having)->functype() == Item_func::ISNOTNULLTEST_FUNC)
+      return TRUE;
+    if (((Item_func *) having)->functype() == Item_func::TRIG_COND_FUNC)
+    {
+      having= ((Item_func*)having)->arguments()[0];
+      if (((Item_func *) having)->functype() == Item_func::ISNOTNULLTEST_FUNC)
+        return TRUE;
+    }
+    return TRUE;
+  }
+  return FALSE;
+}
+
+/*
   global select optimisation.
   return 0 - success
          1 - error
@@ -1016,9 +1046,7 @@
       }
     } else if (join_tab[0].type == JT_REF_OR_NULL &&
 	       join_tab[0].ref.items[0]->name == in_left_expr_name &&
-	       having->type() == Item::FUNC_ITEM &&
-	       ((Item_func *) having)->functype() ==
-	       Item_func::ISNOTNULLTEST_FUNC)
+               is_having_subq_predicates(having))
     {
       join_tab[0].type= JT_INDEX_SUBQUERY;
       error= 0;
@@ -2512,6 +2540,9 @@
     when val IS NULL.
   */
   bool          null_rejecting; 
+
+  /* TRUE<=> This ref access is an outer subquery reference access */
+  bool          outer_ref;
 } KEY_FIELD;
 
 /* Values in optimize */
@@ -2810,6 +2841,7 @@
                                    cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
                                   ((*value)->type() == Item::FIELD_ITEM) &&
                                   ((Item_field*)*value)->field->maybe_null());
+  (*key_fields)->outer_ref=      FALSE;
   (*key_fields)++;
 }
 
@@ -2868,7 +2900,7 @@
 }
 
 static void
-add_key_fields(KEY_FIELD **key_fields,uint *and_level,
+add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level,
                COND *cond, table_map usable_tables,
                SARGABLE_PARAM **sargables)
 {
@@ -2881,28 +2913,54 @@
     {
       Item *item;
       while ((item=li++))
-	add_key_fields(key_fields,and_level,item,usable_tables,sargables);
+        add_key_fields(join, key_fields, and_level, item, usable_tables,
+                       sargables);
       for (; org_key_fields != *key_fields ; org_key_fields++)
 	org_key_fields->level= *and_level;
     }
     else
     {
       (*and_level)++;
-      add_key_fields(key_fields,and_level,li++,usable_tables,sargables);
+      add_key_fields(join, key_fields, and_level, li++, usable_tables,
+                     sargables);
       Item *item;
       while ((item=li++))
       {
 	KEY_FIELD *start_key_fields= *key_fields;
 	(*and_level)++;
-	add_key_fields(key_fields,and_level,item,usable_tables,sargables);
+        add_key_fields(join, key_fields, and_level, item, usable_tables,
+                       sargables);
 	*key_fields=merge_key_fields(org_key_fields,start_key_fields,
 				     *key_fields,++(*and_level));
       }
     }
     return;
   }
-  /* If item is of type 'field op field/constant' add it to key_fields */
 
+  /* 
+    Subquery optimization: check if the encountered condition is one
+    added by condition push down into subquery.
+  */
+  {
+    if (cond->type() == Item::FUNC_ITEM &&
+        ((Item_func*)cond)->functype() == Item_func::TRIG_COND_FUNC)
+    {
+      cond= ((Item_func*)cond)->arguments()[0];
+      if (!join->group_list && !join->order &&
+          join->unit->item && 
+          join->unit->item->substype() == Item_subselect::IN_SUBS &&
+          !join->unit->first_select()->next_select())
+      {
+        add_key_fields(join, key_fields, and_level, cond, usable_tables,
+                       sargables);
+        // Indicate that this ref access candidate is for subquery lookup:
+        (*key_fields)[-1].outer_ref= TRUE;
+      }
+      return;
+    }
+  }
+
+  /* If item is of type 'field op field/constant' add it to key_fields */
   if (cond->type() != Item::FUNC_ITEM)
     return;
   Item_func *cond_func= (Item_func*) cond;
@@ -3076,6 +3134,7 @@
 	  keyuse.used_tables=key_field->val->used_tables();
 	  keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
           keyuse.null_rejecting= key_field->null_rejecting;
+          keyuse.outer_ref= key_field->outer_ref;
 	  VOID(insert_dynamic(keyuse_array,(gptr) &keyuse));
 	}
       }
@@ -3198,7 +3257,7 @@
     Here we can add 'ref' access candidates for t1 and t2, but not for t3.
 */
 
-static void add_key_fields_for_nj(TABLE_LIST *nested_join_table,
+static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
                                   KEY_FIELD **end, uint *and_level,
                                   SARGABLE_PARAM **sargables)
 {
@@ -3210,12 +3269,13 @@
   while ((table= li++))
   {
     if (table->nested_join)
-      add_key_fields_for_nj(table, end, and_level, sargables);
+      add_key_fields_for_nj(join, table, end, and_level, sargables);
     else
       if (!table->on_expr)
         tables |= table->table->map;
   }
-  add_key_fields(end, and_level, nested_join_table->on_expr, tables, sargables);
+  add_key_fields(join, end, and_level, nested_join_table->on_expr, tables,
+                 sargables);
 }
 
 
@@ -3290,7 +3350,8 @@
     return TRUE;
   if (cond)
   {
-    add_key_fields(&end,&and_level,cond,normal_tables,sargables);
+    add_key_fields(join_tab->join, &end, &and_level, cond, normal_tables,
+                   sargables);
     for (; field != end ; field++)
     {
       add_key_part(keyuse,field);
@@ -3312,8 +3373,9 @@
       into account as well.
     */ 
     if (*join_tab[i].on_expr_ref)
-      add_key_fields(&end,&and_level,*join_tab[i].on_expr_ref,
-                     join_tab[i].table->map,sargables);
+      add_key_fields(join_tab->join, &end, &and_level, 
+                     *join_tab[i].on_expr_ref,
+                     join_tab[i].table->map, sargables);
   }
 
   /* Process ON conditions for the nested joins */
@@ -3323,7 +3385,8 @@
     while ((table= li++))
     {
       if (table->nested_join)
-	add_key_fields_for_nj(table, &end, &and_level, sargables);
+        add_key_fields_for_nj(join_tab->join, table, &end, &and_level, 
+                              sargables);
     }
   }
 
@@ -10784,6 +10847,13 @@
 }
 
 
+int rr_sequential(READ_RECORD *info);
+int init_read_record_seq(JOIN_TAB *tab)
+{
+  tab->read_record.read_record= rr_sequential;
+  return tab->read_record.file->ha_rnd_init(1);
+}
+
 static int
 test_if_quick_select(JOIN_TAB *tab)
 {
@@ -10912,7 +10982,7 @@
   Reading of key with key reference and one part that may be NULL
 */
 
-static int
+int
 join_read_always_key_or_null(JOIN_TAB *tab)
 {
   int res;
@@ -10928,7 +10998,7 @@
 }
 
 
-static int
+int
 join_read_next_same_or_null(READ_RECORD *info)
 {
   int error;

--- 1.111/sql/sql_select.h	2006-10-31 20:51:14 +03:00
+++ 1.112/sql/sql_select.h	2006-10-31 20:51:14 +03:00
@@ -36,6 +36,8 @@
     satisfied if val has NULL 'value'.
   */
   bool null_rejecting;
+  /* TRUE<=> This ref access is an outer subquery reference access */
+  bool outer_ref;
 } KEYUSE;
 
 class store_key;
@@ -455,10 +457,11 @@
   Field *to_field;				// Store data here
   char *null_ptr;
   char err;
- public:
+public:
+  bool null_key; /* TRUE <=> the value of the key has a null part */
   enum store_key_result { STORE_KEY_OK, STORE_KEY_FATAL, STORE_KEY_CONV };
   store_key(THD *thd, Field *field_arg, char *ptr, char *null, uint length)
-    :null_ptr(null),err(0)
+    :null_ptr(null), err(0), null_key(0)
   {
     if (field_arg->type() == FIELD_TYPE_BLOB)
     {
@@ -496,6 +499,7 @@
   enum store_key_result copy()
   {
     copy_field.do_copy(&copy_field);
+    null_key= to_field->is_null();
     return err != 0 ? STORE_KEY_FATAL : STORE_KEY_OK;
   }
   const char *name() const { return field_name; }
@@ -516,8 +520,8 @@
   enum store_key_result copy()
   {
     int res= item->save_in_field(to_field, 1);
+    null_key= to_field->is_null() || item->null_value;
     return (err != 0 || res > 2 ? STORE_KEY_FATAL : (store_key_result) res); 
-	                 
   }
   const char *name() const { return "func"; }
 };
@@ -547,6 +551,7 @@
           err= res;
       }
     }
+    null_key= to_field->is_null() || item->null_value;
     return (err > 2 ?  STORE_KEY_FATAL : (store_key_result) err);
   }
   const char *name() const { return "const"; }

--- 1.164/mysql-test/r/subselect.result	2006-10-31 20:51:14 +03:00
+++ 1.165/mysql-test/r/subselect.result	2006-10-31 20:51:14 +03:00
@@ -744,7 +744,7 @@
 3	DEPENDENT UNION	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
 NULL	UNION RESULT	<union2,3>	ALL	NULL	NULL	NULL	NULL	NULL	
 Warnings:
-Note	1003	select `test`.`t2`.`id` AS `id` from `test`.`t2` where <in_optimizer>(`test`.`t2`.`id`,<exists>(select 1 AS `1` having (<cache>(`test`.`t2`.`id`) = <ref_null_helper>(1)) union select 3 AS `3` having (<cache>(`test`.`t2`.`id`) = <ref_null_helper>(3))))
+Note	1003	select `test`.`t2`.`id` AS `id` from `test`.`t2` where <in_optimizer>(`test`.`t2`.`id`,<exists>(select 1 AS `1` having trigcond((<cache>(`test`.`t2`.`id`) = <ref_null_helper>(1))) union select 3 AS `3` having trigcond((<cache>(`test`.`t2`.`id`) = <ref_null_helper>(3)))))
 SELECT * FROM t2 WHERE id IN (SELECT 5 UNION SELECT 3);
 id
 SELECT * FROM t2 WHERE id IN (SELECT 5 UNION SELECT 2);
@@ -907,7 +907,7 @@
 2	DEPENDENT SUBQUERY	t2	ref_or_null	a	a	5	func	2	Using where; Using index
 2	DEPENDENT SUBQUERY	t3	ALL	NULL	NULL	NULL	NULL	3	Using where
 Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a`,<in_optimizer>(`test`.`t1`.`a`,<exists>(select 1 AS `Not_used` from `test`.`t2` join `test`.`t3` where ((`test`.`t3`.`a` = `test`.`t2`.`a`) and ((<cache>(`test`.`t1`.`a`) = `test`.`t2`.`a`) or isnull(`test`.`t2`.`a`))) having <is_not_null_test>(`test`.`t2`.`a`))) AS `t1.a in (select t2.a from t2,t3 where t3.a=t2.a)` from `test`.`t1`
+Note	1003	select `test`.`t1`.`a` AS `a`,<in_optimizer>(`test`.`t1`.`a`,<exists>(select 1 AS `Not_used` from `test`.`t2` join `test`.`t3` where ((`test`.`t3`.`a` = `test`.`t2`.`a`) and trigcond(((<cache>(`test`.`t1`.`a`) = `test`.`t2`.`a`) or isnull(`test`.`t2`.`a`)))) having trigcond(<is_not_null_test>(`test`.`t2`.`a`)))) AS `t1.a in (select t2.a from t2,t3 where t3.a=t2.a)` from `test`.`t1`
 drop table t1,t2,t3;
 create table t1 (a float);
 select 10.5 IN (SELECT * from t1 LIMIT 1);
@@ -2817,19 +2817,19 @@
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	8	
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
 Warnings:
-Note	1003	select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'0') and ((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`))) having (<is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`)))) AS `test` from `test`.`t1`
+Note	1003	select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'0') and trigcond((((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`))))) having trigcond((<is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1`
 explain extended SELECT one,two from t1 where ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = 'N');
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	8	Using where
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where
 Warnings:
-Note	1003	select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` where <in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'N') and (<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) and (<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`))))
+Note	1003	select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two` from `test`.`t1` where <in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where ((`test`.`t2`.`flag` = _latin1'N') and trigcond(((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) and (<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`))))))
 explain extended SELECT one,two,ROW(one,two) IN (SELECT one,two FROM t2 WHERE flag = '0' group by one,two) as 'test' from t1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	8	
 2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	9	Using where; Using temporary; Using filesort
 Warnings:
-Note	1003	select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where (`test`.`t2`.`flag` = _latin1'0') group by `test`.`t2`.`one`,`test`.`t2`.`two` having (((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)) and <is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`)))) AS `test` from `test`.`t1`
+Note	1003	select `test`.`t1`.`one` AS `one`,`test`.`t1`.`two` AS `two`,<in_optimizer>((`test`.`t1`.`one`,`test`.`t1`.`two`),<exists>(select `test`.`t2`.`one` AS `one`,`test`.`t2`.`two` AS `two` from `test`.`t2` where (`test`.`t2`.`flag` = _latin1'0') group by `test`.`t2`.`one`,`test`.`t2`.`two` having trigcond((((<cache>(`test`.`t1`.`one`) = `test`.`t2`.`one`) or isnull(`test`.`t2`.`one`)) and ((<cache>(`test`.`t1`.`two`) = `test`.`t2`.`two`) or isnull(`test`.`t2`.`two`)) and <is_not_null_test>(`test`.`t2`.`one`) and <is_not_null_test>(`test`.`t2`.`two`))))) AS `test` from `test`.`t1`
 DROP TABLE t1,t2;
 CREATE TABLE t1 (a char(5), b char(5));
 INSERT INTO t1 VALUES (NULL,'aaa'), ('aaa','aaa');

--- 1.136/sql/item_subselect.cc	2006-10-31 20:51:14 +03:00
+++ 1.137/sql/item_subselect.cc	2006-10-31 20:51:14 +03:00
@@ -37,7 +37,7 @@
 Item_subselect::Item_subselect():
   Item_result_field(), value_assigned(0), thd(0), substitution(0),
   engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0),
-  const_item_cache(1), engine_changed(0), changed(0)
+  const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE)
 {
   with_subselect= 1;
   reset();
@@ -192,16 +192,16 @@
   return res;
 }
 
-bool Item_subselect::exec()
+bool Item_subselect::exec(bool full_scan)
 {
   int res;
 
-  res= engine->exec();
+  res= engine->exec(full_scan);
 
   if (engine_changed)
   {
     engine_changed= 0;
-    return exec();
+    return exec(full_scan);
   }
   return (res);
 }
@@ -441,13 +441,13 @@
 
 void Item_singlerow_subselect::bring_value()
 {
-  exec();
+  exec(FALSE);
 }
 
 double Item_singlerow_subselect::val_real()
 {
   DBUG_ASSERT(fixed == 1);
-  if (!exec() && !value->null_value)
+  if (!exec(FALSE) && !value->null_value)
   {
     null_value= 0;
     return value->val_real();
@@ -462,7 +462,7 @@
 longlong Item_singlerow_subselect::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  if (!exec() && !value->null_value)
+  if (!exec(FALSE) && !value->null_value)
   {
     null_value= 0;
     return value->val_int();
@@ -476,7 +476,7 @@
 
 String *Item_singlerow_subselect::val_str(String *str)
 {
-  if (!exec() && !value->null_value)
+  if (!exec(FALSE) && !value->null_value)
   {
     null_value= 0;
     return value->val_str(str);
@@ -491,7 +491,7 @@
 
 my_decimal *Item_singlerow_subselect::val_decimal(my_decimal *decimal_value)
 {
-  if (!exec() && !value->null_value)
+  if (!exec(FALSE) && !value->null_value)
   {
     null_value= 0;
     return value->val_decimal(decimal_value);
@@ -506,7 +506,7 @@
 
 bool Item_singlerow_subselect::val_bool()
 {
-  if (!exec() && !value->null_value)
+  if (!exec(FALSE) && !value->null_value)
   {
     null_value= 0;
     return value->val_bool();
@@ -557,7 +557,8 @@
 
 Item_in_subselect::Item_in_subselect(Item * left_exp,
 				     st_select_lex *select_lex):
-  Item_exists_subselect(), optimizer(0), transformed(0), upper_item(0)
+  Item_exists_subselect(), optimizer(0), transformed(0),
+  enable_pushed_conds(TRUE), upper_item(0)
 {
   DBUG_ENTER("Item_in_subselect::Item_in_subselect");
   left_expr= left_exp;
@@ -602,7 +603,7 @@
 double Item_exists_subselect::val_real()
 {
   DBUG_ASSERT(fixed == 1);
-  if (exec())
+  if (exec(FALSE))
   {
     reset();
     return 0;
@@ -613,7 +614,7 @@
 longlong Item_exists_subselect::val_int()
 {
   DBUG_ASSERT(fixed == 1);
-  if (exec())
+  if (exec(FALSE))
   {
     reset();
     return 0;
@@ -624,7 +625,7 @@
 String *Item_exists_subselect::val_str(String *str)
 {
   DBUG_ASSERT(fixed == 1);
-  if (exec())
+  if (exec(FALSE))
   {
     reset();
     return 0;
@@ -637,7 +638,7 @@
 my_decimal *Item_exists_subselect::val_decimal(my_decimal *decimal_value)
 {
   DBUG_ASSERT(fixed == 1);
-  if (exec())
+  if (exec(FALSE))
   {
     reset();
     return 0;
@@ -650,7 +651,7 @@
 bool Item_exists_subselect::val_bool()
 {
   DBUG_ASSERT(fixed == 1);
-  if (exec())
+  if (exec(FALSE))
   {
     reset();
     return 0;
@@ -668,7 +669,7 @@
   DBUG_ASSERT(0);
   DBUG_ASSERT(fixed == 1);
   null_value= 0;
-  if (exec())
+  if (exec(!enable_pushed_conds))
   {
     reset();
     null_value= 1;
@@ -689,7 +690,7 @@
   DBUG_ASSERT(0);
   DBUG_ASSERT(fixed == 1);
   null_value= 0;
-  if (exec())
+  if (exec(!enable_pushed_conds))
   {
     reset();
     null_value= 1;
@@ -710,7 +711,7 @@
   DBUG_ASSERT(0);
   DBUG_ASSERT(fixed == 1);
   null_value= 0;
-  if (exec())
+  if (exec(!enable_pushed_conds))
   {
     reset();
     null_value= 1;
@@ -730,7 +731,7 @@
 {
   DBUG_ASSERT(fixed == 1);
   null_value= 0;
-  if (exec())
+  if (exec(!enable_pushed_conds))
   {
     reset();
     null_value= 1;
@@ -750,7 +751,7 @@
   DBUG_ASSERT(0);
   null_value= 0;
   DBUG_ASSERT(fixed == 1);
-  if (exec())
+  if (exec(!enable_pushed_conds))
   {
     reset();
     null_value= 1;
@@ -763,7 +764,51 @@
 }
 
 
-/* Rewrite a single-column IN/ALL/ANY subselect. */
+/* 
+  Rewrite a single-column IN/ALL/ANY subselect
+
+  SYNOPSIS
+    Item_in_subselect::single_value_transformer()
+      join
+      func
+
+  DESCRIPTION
+    Rewrite a single-column subquery using rule-based approach. The subquery
+    
+       oe $cmp$ (SELECT sel FROM ... WHERE subq_where HAVING subq_having)
+    
+    First, try to convert the subquery to scalar-result subquery in one of
+    the forms:
+    
+       - oe $cmp$ (SELECT MAX(...) )  // handled by Item_singlerow_subselect
+       - oe $cmp$ <max>(SELECT ...)   // handled by Item_maxminsubselect
+   
+    If that fails, the subquery will be handled with class Item_in_optimizer, 
+    Inject the predicates into subquery, i.e. convert it to:
+
+    - If the subquery has aggregates, GROUP BY, or HAVING, convert to
+
+       SELECT sel FROM ...  HAVING subq_having AND 
+                                   trigcond(oe $cmp$ ref_or_null_helper<ie>)
+                                   
+      the addition is wrapped into trigger only when we want to distinguish
+      between NULL and FALSE results.
+
+    - Else, if we don't care if subquery result is NULL or FALSE, convert to
+      
+       SELECT 1 ... WHERE (oe $CMP$ ie) AND subq_where
+
+    - Else convert to:
+
+       SELECT 1 WHERE ...
+         WHERE  subq_where  AND trigcond((oe $CMP$ ie) OR ie IS NULL)
+         HAVING subq_having AND trigcond(<is_not_null_test>(ie))
+
+  RETURN
+    RES_OK     - Transformed successfully (or done nothing?)
+    RES_REDUCE - The subquery was reduced to non-subquery
+    RES_ERROR  - Error
+*/
 
 Item_subselect::trans_res
 Item_in_subselect::single_value_transformer(JOIN *join,
@@ -896,8 +941,12 @@
   select_lex->uncacheable|= UNCACHEABLE_DEPENDENT;
   /*
     Add the left part of a subselect to a WHERE or HAVING clause of
-    the right part, e.g. SELECT 1 IN (SELECT a FROM t1)  =>
-    SELECT Item_in_optimizer(1, SELECT a FROM t1 WHERE a=1)
+    the right part, e.g. 
+    
+       SELECT 1 IN (SELECT a FROM t1)  =>
+       
+       SELECT Item_in_optimizer(1, SELECT a FROM t1 WHERE a=1)
+       
     HAVING is used only if the right part contains a SUM function, a GROUP
     BY or a HAVING clause.
   */
@@ -912,10 +961,15 @@
                                                       ref_pointer_array,
                                                       (char *)"<ref>",
                                                       this->full_name()));
-#ifdef CORRECT_BUT_TOO_SLOW_TO_BE_USABLE
-    if (!abort_on_null && left_expr->maybe_null)
-      item= new Item_cond_or(new Item_func_isnull(left_expr), item); 
-#endif
+    if (!abort_on_null && ((Item*)select_lex->item_list.head())->maybe_null)
+    {
+      /* 
+        We can encounter "NULL IN (SELECT ...)". Wrap the added condition
+        within a trigger.
+      */
+      item= new Item_func_trig_cond(item, &enable_pushed_conds);
+    }
+    
     /*
       AND and comparison functions can't be changed during fix_fields()
       we can assign select_lex->having here, and pass 0 as last
@@ -944,10 +998,13 @@
       select_lex->item_list.push_back(new Item_int("Not_used",
                                                    (longlong) 1, 21));
       select_lex->ref_pointer_array[0]= select_lex->item_list.head();
+       
       item= func->create(expr, item);
       if (!abort_on_null && orig_item->maybe_null)
       {
-	having= new Item_is_not_null_test(this, having);
+	having= 
+          new Item_func_trig_cond(new Item_is_not_null_test(this, having),
+                                  &enable_pushed_conds);
 	/*
 	  Item_is_not_null_test can't be changed during fix_fields()
 	  we can assign select_lex->having here, and pass 0 as last
@@ -967,12 +1024,15 @@
         select_lex->having_fix_field= 0;
         if (tmp)
 	  DBUG_RETURN(RES_ERROR);
+        /* 
+          NOTE: It is important that we add this "IS NULL" here, even when
+          orig_item can't be NULL. This is needed so that this predicate is
+          only used by ref[_or_null] analyzer (and, e.g. is not used by const
+          propagation).
+        */
 	item= new Item_cond_or(item,
 			       new Item_func_isnull(orig_item));
-#ifdef CORRECT_BUT_TOO_SLOW_TO_BE_USABLE
-        if (left_expr->maybe_null)
-          item= new Item_cond_or(new Item_func_isnull(left_expr), item);
-#endif
+        item= new Item_func_trig_cond(item, &enable_pushed_conds);
       }
       item->name= (char *)in_additional_cond;
       /*
@@ -999,13 +1059,14 @@
 	  we can assign select_lex->having here, and pass 0 as last
 	  argument (reference) to fix_fields()
 	*/
-	select_lex->having=
-	  join->having=
-	  func->create(expr,
+        Item *new_having=
+          func->create(expr,
                        new Item_ref_null_helper(&select_lex->context, this,
                                             select_lex->ref_pointer_array,
                                             (char *)"<no matter>",
                                             (char *)"<result>"));
+        new_having= new Item_func_trig_cond(new_having, &enable_pushed_conds);
+	select_lex->having= join->having= new_having;
 
 	select_lex->having_fix_field= 1;
         /*
@@ -1210,6 +1271,8 @@
 
       where_item= and_items(where_item, item);
     }
+    if (where_item)
+      where_item= new Item_func_trig_cond(where_item, &enable_pushed_conds);
     /*
       AND can't be changed during fix_fields()
       we can assign select_lex->where here, and pass 0 as last
@@ -1223,6 +1286,8 @@
   if (having_item)
   {
     bool res;
+    having_item= new Item_func_trig_cond(having_item, &enable_pushed_conds);
+
     select_lex->having= join->having= and_items(join->having, having_item);
     select_lex->having->top_level_item();
     /*
@@ -1439,6 +1504,27 @@
 }
 
 
+/*
+  Check if last execution of the subquery engine produced any rows
+
+  SYNOPSIS
+    subselect_union_engine::no_rows()
+
+  DESCRIPTION
+    Check if last execution of the subquery engine produced any rows. The
+    return value is undefined if last execution ended in an error.
+
+  RETURN
+    TRUE  - Last subselect execution has produced no rows
+    FALSE - Otherwise
+*/
+
+bool subselect_union_engine::no_rows()
+{
+  /* Check if we got any rows when reading UNION result from temp. table: */
+  return test(!unit->fake_select_lex->join->send_records);
+}
+
 void subselect_uniquesubquery_engine::cleanup()
 {
   DBUG_ENTER("subselect_uniquesubquery_engine::cleanup");
@@ -1504,6 +1590,29 @@
   return 1;
 }
 
+
+/*
+  Check if last execution of the subquery engine produced any rows
+
+  SYNOPSIS
+    subselect_single_select_engine::no_rows()
+
+  DESCRIPTION
+    Check if last execution of the subquery engine produced any rows. The
+    return value is undefined if last execution ended in an error.
+
+  RETURN
+    TRUE  - Last subselect execution has produced no rows
+    FALSE - Otherwise
+*/
+
+bool subselect_single_select_engine::no_rows()
+{ 
+//  return test(!join->send_records);
+  return !item->assigned();
+}
+
+
 static Item_result set_row(List<Item> &item_list, Item *item,
 			   Item_cache **row, bool *maybe_null)
 {
@@ -1557,7 +1666,11 @@
   DBUG_ASSERT(0);
 }
 
-int subselect_single_select_engine::exec()
+int  init_read_record_seq(JOIN_TAB *tab);
+int join_read_always_key_or_null(JOIN_TAB *tab);
+int join_read_next_same_or_null(READ_RECORD *info);
+
+int subselect_single_select_engine::exec(bool full_scan)
 {
   DBUG_ENTER("subselect_single_select_engine::exec");
   char const *save_where= thd->where;
@@ -1595,7 +1708,43 @@
   if (!executed)
   {
     item->reset_value_registration();
+    if (full_scan)
+    {
+      /*
+        We should not apply optimizations based on the condition that was
+        pushed down into the subquery. Those optimizations are ref[_or_null]
+        acceses. Change them to be full table scans.
+      */
+      for (uint i=join->const_tables ; i < join->tables ; i++)
+      {
+        JOIN_TAB *tab=join->join_tab+i;
+        if (tab->keyuse && tab->keyuse->outer_ref)
+        {
+          tab->read_first_record= init_read_record_seq;
+          tab->read_record.record= tab->table->record[0];
+          tab->read_record.thd= join->thd;
+          tab->read_record.ref_length= tab->table->file->ref_length;
+        }
+      }
+    }
+    
     join->exec();
+
+    if (full_scan)
+    {
+      /* Enable the optimizations back */
+      for (uint i=join->const_tables ; i < join->tables ; i++)
+      {
+        JOIN_TAB *tab=join->join_tab+i;
+        if (tab->keyuse && tab->keyuse->outer_ref)
+        {
+          tab->read_record.record= 0;
+          tab->read_record.ref_length= 0;
+          tab->read_first_record= join_read_always_key_or_null;
+          tab->read_record.read_record= join_read_next_same_or_null;
+        }
+      }
+    }
     executed= 1;
     thd->where= save_where;
     thd->lex->current_select= save_select;
@@ -1606,29 +1755,161 @@
   DBUG_RETURN(0);
 }
 
-int subselect_union_engine::exec()
+int subselect_union_engine::exec(bool full_scan)
 {
   char const *save_where= thd->where;
+  /* 
+    Ignore the full_scan parameter: the pushed down predicates are only used
+    for filtering, and the caller has disabled them if necessary.
+  */
   int res= unit->exec();
   thd->where= save_where;
   return res;
 }
 
 
-int subselect_uniquesubquery_engine::exec()
+/*
+  Search for at least on row satisfying select condition
+ 
+  SYNOPSIS
+    subselect_uniquesubquery_engine::scan_table()
+
+  DESCRIPTION
+    Scan the table using sequential access until we find at least one row
+    satisfying select condition.
+    
+    The result of this function (info about whether a row was found) is
+    stored in this->empty_result_set.
+
+  RETURN
+    FALSE - OK
+    TRUE  - Error
+*/
+
+int subselect_uniquesubquery_engine::scan_table()
 {
-  DBUG_ENTER("subselect_uniquesubquery_engine::exec");
   int error;
   TABLE *table= tab->table;
-  for (store_key **copy=tab->ref.key_copy ; *copy ; copy++)
+  DBUG_ENTER("subselect_uniquesubquery_engine::scan_table");
+
+  empty_result_set= TRUE;
+  bool is_uncorrelated= !cond || !(cond->used_tables() & OUTER_REF_TABLE_BIT);
+
+  if (table->file->inited)
+    table->file->ha_index_end();
+ 
+  table->file->ha_rnd_init(1);
+  table->file->extra_opt(HA_EXTRA_CACHE,
+                         current_thd->variables.read_buff_size);
+  table->null_row= 0;
+  for (;;)
+  {
+    error=table->file->rnd_next(table->record[0]);
+    if (error && error != HA_ERR_END_OF_FILE)
+    {
+      error= report_error(table, error);
+      break;
+    }
+    /* No more rows */
+    if (table->status)
+      break;
+
+    if (!cond || cond->val_int())
+    {
+      empty_result_set= FALSE;
+      break;
+    }
+  }
+
+  table->file->ha_rnd_end();
+  DBUG_RETURN(error != 0);
+}
+
+
+/*
+  Copy ref key and check for null parts in it
+
+  SYNOPSIS
+    subselect_uniquesubquery_engine::copy_ref_key()
+
+  DESCRIPTION
+    Copy ref key and check for null parts in it.
+
+  RETURN
+    FALSE - ok, index lookup key without keys copied.
+    TRUE  - an error occured while copying the key
+*/
+
+bool subselect_uniquesubquery_engine::copy_ref_key()
+{
+  DBUG_ENTER("subselect_uniquesubquery_engine::copy_ref_key");
+
+  for (store_key **copy= tab->ref.key_copy ; *copy ; copy++)
   {
-    if ((tab->ref.key_err= (*copy)->copy()) & 1)
+    tab->ref.key_err= (*copy)->copy();
+
+    /*
+      When there is a NULL part in the key we don't need to make index
+      lookup for such key thus we don't need to copy whole key.
+      If we later should do a sequential scan return OK. Fail otherwise.
+
+      See also the comment for the subselect_uniquesubquery_engine::exec()
+      function.
+    */
+    null_keypart= (*copy)->null_key;
+    bool top_level= ((Item_in_subselect *) item)->is_top_level_item();
+    if (null_keypart && !top_level)
+      break;
+    if ((tab->ref.key_err) & 1 || (null_keypart && top_level))
     {
-      table->status= STATUS_NOT_FOUND;
+      tab->table->status= STATUS_NOT_FOUND;
       DBUG_RETURN(1);
     }
   }
+  DBUG_RETURN(0);
+}
 
+
+/*
+  Execute subselect
+
+  SYNOPSIS
+    subselect_uniquesubquery_engine::exec()
+
+  DESCRIPTION
+    Find rows corresponding to the ref key using index access.
+    If some part of the lookup key is NULL, then we're evaluating
+      NULL IN (SELECT ... )
+    This is a special case, we don't need to search for NULL in the table,
+    instead, the result value is 
+      - NULL  if select produces empty row set
+      - FALSE otherwise.
+
+    In some cases (IN subselect is a top level item, i.e. abort_on_null==TRUE)
+    the caller doesn't distinguish between NULL and FALSE result and we just 
+    return FALSE. 
+    Otherwise we make a full table scan to see if there is at least one matching row.
+  
+  NOTE
+    
+  RETURN
+    FALSE - ok
+    TRUE  - an error occured while scanning
+*/
+
+int subselect_uniquesubquery_engine::exec(bool full_scan)
+{
+  DBUG_ENTER("subselect_uniquesubquery_engine::exec");
+  int error;
+  TABLE *table= tab->table;
+ 
+  /* TODO: change to use of 'full_scan' here? */
+  if (copy_ref_key())
+    DBUG_RETURN(1);
+
+  if (null_keypart)
+    DBUG_RETURN(scan_table());
+ 
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key);
   error= table->file->index_read(table->record[0],
@@ -1657,14 +1938,35 @@
 }
 
 
-int subselect_indexsubquery_engine::exec()
+/*
+  Index-lookup subselect 'engine' - run the subquery
+
+  SYNOPSIS
+    subselect_uniquesubquery_engine:exec()
+      full_scan 
+
+  DESCRIPTION
+    Resolve subquery using index lookup(s).
+    First look for specified constant, 
+    If not found and we need to check for NULLs, do that too.
+
+    NULL IN (SELECT ...) is a special case.
+
+  RETURN
+    0
+    1
+*/
+
+int subselect_indexsubquery_engine::exec(bool full_scan)
 {
-  DBUG_ENTER("subselect_indexsubselect_engine::exec");
+  DBUG_ENTER("subselect_indexsubquery_engine::exec");
   int error;
   bool null_finding= 0;
   TABLE *table= tab->table;
 
   ((Item_in_subselect *) item)->value= 0;
+  empty_result_set= TRUE;
+  null_keypart= 0;
 
   if (check_null)
   {
@@ -1673,14 +1975,12 @@
     ((Item_in_subselect *) item)->was_null= 0;
   }
 
-  for (store_key **copy=tab->ref.key_copy ; *copy ; copy++)
-  {
-    if ((tab->ref.key_err= (*copy)->copy()) & 1)
-    {
-      table->status= STATUS_NOT_FOUND;
-      DBUG_RETURN(1);
-    }
-  }
+  /* Copy the ref key and check for nulls... */
+  if (copy_ref_key())
+    DBUG_RETURN(1);
+
+  if (null_keypart)
+    DBUG_RETURN(scan_table());
 
   if (!table->file->inited)
     table->file->ha_index_init(tab->ref.key);

--- 1.78/sql/item_subselect.h	2006-10-31 20:51:14 +03:00
+++ 1.79/sql/item_subselect.h	2006-10-31 20:51:14 +03:00
@@ -60,6 +60,9 @@
   /* subquery is transformed */
   bool changed;
 
+  /* TRUE <=> The underlying SELECT is correlated w.r.t some ancestor select */
+  bool is_correlated; 
+
   enum trans_res {RES_OK, RES_REDUCE, RES_ERROR};
   enum subs_type {UNKNOWN_SUBS, SINGLEROW_SUBS,
 		  EXISTS_SUBS, IN_SUBS, ALL_SUBS, ANY_SUBS};
@@ -92,7 +95,7 @@
     return null_value;
   }
   bool fix_fields(THD *thd, Item **ref);
-  virtual bool exec();
+  virtual bool exec(bool full_scan);
   virtual void fix_length_and_dec();
   table_map used_tables() const;
   table_map not_null_tables() const { return 0; }
@@ -215,7 +218,20 @@
   friend class subselect_indexsubquery_engine;
 };
 
-/* IN subselect */
+
+/*
+  IN subselect: this represents "left_exr IN (SELECT ...)"
+
+  This class has: 
+   - (as a descendant of Item_subselect) a "subquery execution engine" which 
+      allows it to evaluate subqueries. (and this class participates in
+      execution by having was_null variable where part of execution result
+      is stored.
+   - Transformation methods (todo: more on this).
+
+  This class is not used directly, it is "wrapped" into Item_in_optimizer
+  which provides some small bits of subquery evaluation.
+*/
 
 class Item_in_subselect :public Item_exists_subselect
 {
@@ -231,12 +247,14 @@
   bool abort_on_null;
   bool transformed;
 public:
+  /* Used to trigger on/off conditions that were pushed down to subselect */
+  bool enable_pushed_conds;
   Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery
 
   Item_in_subselect(Item * left_expr, st_select_lex *select_lex);
   Item_in_subselect()
     :Item_exists_subselect(), optimizer(0), abort_on_null(0), transformed(0),
-     upper_item(0)
+     enable_pushed_conds(TRUE), upper_item(0)
   {}
 
   subs_type substype() { return IN_SUBS; }
@@ -256,6 +274,7 @@
   my_decimal *val_decimal(my_decimal *);
   bool val_bool();
   void top_level_item() { abort_on_null=1; }
+  inline bool is_top_level_item() { return abort_on_null; }
   bool test_limit(st_select_lex_unit *unit);
   void print(String *str);
   bool fix_fields(THD *thd, Item **ref);
@@ -313,7 +332,28 @@
   THD * get_thd() { return thd; }
   virtual int prepare()= 0;
   virtual void fix_length_and_dec(Item_cache** row)= 0;
-  virtual int exec()= 0;
+  /*
+    Execute the engine
+
+    SYNOPSIS
+      exec()
+        full_scan TRUE  - Pushed-down predicates are disabled, the engine
+                          must disable made based on those predicates.
+                  FALSE - Pushed-down predicates are in effect.
+    DESCRIPTION
+      Execute the engine. The result of execution is subquery value that is
+      either captured by previously set up select_result-based 'sink' or
+      stored somewhere by the exec() method itself.
+
+      A required side effect: if full_scan==TRUE, subselect_engine->no_rows()
+      should return correct result.
+
+    RETURN
+      0 - OK
+      1 - Either an execution error, or the engine was be "changed", and
+          caller should call exec() again for the new engine.
+  */
+  virtual int exec(bool full_scan)= 0;
   virtual uint cols()= 0; /* return number of columns in select */
   virtual uint8 uncacheable()= 0; /* query is uncacheable */
   enum Item_result type() { return res_type; }
@@ -325,6 +365,8 @@
   virtual bool change_result(Item_subselect *si, select_subselect *result)= 0;
   virtual bool no_tables()= 0;
   virtual bool is_executed() const { return FALSE; }
+  /* Check if subquery produced any rows during last query execution */
+  virtual bool no_rows() = 0;
 };
 
 
@@ -342,7 +384,7 @@
   void cleanup();
   int prepare();
   void fix_length_and_dec(Item_cache** row);
-  int exec();
+  int exec(bool full_scan);
   uint cols();
   uint8 uncacheable();
   void exclude();
@@ -351,6 +393,7 @@
   bool change_result(Item_subselect *si, select_subselect *result);
   bool no_tables();
   bool is_executed() const { return executed; }
+  bool no_rows();
 };
 
 
@@ -364,7 +407,7 @@
   void cleanup();
   int prepare();
   void fix_length_and_dec(Item_cache** row);
-  int exec();
+  int exec(bool full_scan);
   uint cols();
   uint8 uncacheable();
   void exclude();
@@ -373,6 +416,7 @@
   bool change_result(Item_subselect *si, select_subselect *result);
   bool no_tables();
   bool is_executed() const;
+  bool no_rows();
 };
 
 
@@ -382,6 +426,12 @@
 protected:
   st_join_table *tab;
   Item *cond;
+  /* 
+    TRUE<=> last execution produced empty set. Valid only when left
+    expression is NULL.
+  */
+  bool empty_result_set;
+  bool null_keypart; /* TRUE <=> constructed search tuple has a NULL */
 public:
 
   // constructor can assign THD because it will be called after JOIN::prepare
@@ -395,7 +445,7 @@
   void cleanup();
   int prepare();
   void fix_length_and_dec(Item_cache** row);
-  int exec();
+  int exec(bool full_scan);
   uint cols() { return 1; }
   uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
   void exclude();
@@ -403,11 +453,15 @@
   void print (String *str);
   bool change_result(Item_subselect *si, select_subselect *result);
   bool no_tables();
+  int scan_table();
+  bool copy_ref_key();
+  bool no_rows() { return empty_result_set; }
 };
 
 
 class subselect_indexsubquery_engine: public subselect_uniquesubquery_engine
 {
+  /* FALSE for 'ref', TRUE for 'ref-or-null'. */
   bool check_null;
 public:
 
@@ -418,7 +472,7 @@
     :subselect_uniquesubquery_engine(thd, tab_arg, subs, where),
      check_null(chk_null)
   {}
-  int exec();
+  int exec(bool full_scan);
   void print (String *str);
 };
 
--- New file ---
+++ mysql-test/r/subselect3.result	06/10/31 20:42:45
drop table if exists t0, t1, t2, t3, t4;
create table t1 (oref int, grp int, ie int) ;
insert into t1 (oref, grp, ie) values
(1, 1, 1),
(1, 1, 1),
(1, 2, NULL),
(2, 1, 3),
(3, 1, 4),
(3, 2, NULL);
create table t2 (oref int, a int);
insert into t2 values 
(1, 1),
(2, 2),
(3, 3),
(4, NULL),
(2, NULL);
select a, oref, a in (select max(ie) 
from t1 where oref=t2.oref group by grp) from t2;
a	oref	a in (select max(ie) 
from t1 where oref=t2.oref group by grp)
1	1	1
2	2	0
3	3	NULL
NULL	4	0
NULL	2	NULL
explain extended
select a, oref, a in (select max(ie) 
from t1 where oref=t2.oref group by grp) from t2;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	
2	DEPENDENT SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	6	Using where; Using temporary; Using filesort
Warnings:
Note	1276	Field or reference 't2.oref' of SELECT #2 was resolved in SELECT #1
Note	1003	select `test`.`t2`.`a` AS `a`,`test`.`t2`.`oref` AS `oref`,<in_optimizer>(`test`.`t2`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = `test`.`t2`.`oref`) group by `test`.`t1`.`grp` having trigcond((<cache>(`test`.`t2`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))) AS `a in (select max(ie) 
from t1 where oref=t2.oref group by grp)` from `test`.`t2`
explain extended
select a, oref from t2 
where a in (select max(ie) from t1 where oref=t2.oref group by grp);
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
2	DEPENDENT SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	6	Using where; Using temporary; Using filesort
Warnings:
Note	1276	Field or reference 't2.oref' of SELECT #2 was resolved in SELECT #1
Note	1003	select `test`.`t2`.`a` AS `a`,`test`.`t2`.`oref` AS `oref` from `test`.`t2` where <in_optimizer>(`test`.`t2`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = `test`.`t2`.`oref`) group by `test`.`t1`.`grp` having (<cache>(`test`.`t2`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))
create table t3 (a int);
insert into t3 values (NULL), (NULL);
flush status;
select a in (select max(ie) from t1 where oref=4 group by grp) from t3;
a in (select max(ie) from t1 where oref=4 group by grp)
0
0
show status like 'Handler_read_rnd_next';
Variable_name	Value
Handler_read_rnd_next	11
select ' ^ This must show 11' Z;
Z
 ^ This must show 11
explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	2	
2	DEPENDENT SUBQUERY	t1	ALL	NULL	NULL	NULL	NULL	6	Using where; Using temporary; Using filesort
Warnings:
Note	1003	select <in_optimizer>(`test`.`t3`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = 4) group by `test`.`t1`.`grp` having trigcond((<cache>(`test`.`t3`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))) AS `a in (select max(ie) from t1 where oref=4 group by grp)` from `test`.`t3`
drop table t1, t2, t3;
create table t1 (a int, oref int, key(a));
insert into t1 values 
(1, 1),
(1, NULL),
(2, 3),
(2, NULL),
(3, NULL);
create table t2 (a int, oref int);
insert into t2 values (1, 1), (2,2), (NULL, 3), (NULL, 4);
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
oref	a	Z
1	1	1
2	2	0
3	NULL	NULL
4	NULL	0
explain extended 
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	4	
2	DEPENDENT SUBQUERY	t1	index_subquery	a	a	5	func	2	Using index; Using where
Warnings:
Note	1276	Field or reference 't2.oref' of SELECT #2 was resolved in SELECT #1
Note	1003	select `test`.`t2`.`oref` AS `oref`,`test`.`t2`.`a` AS `a`,<in_optimizer>(`test`.`t2`.`a`,<exists>(<index_lookup>(<cache>(`test`.`t2`.`a`) in t1 on a checking NULL where (`test`.`t1`.`oref` = `test`.`t2`.`oref`)))) AS `Z` from `test`.`t2`
flush status;
select oref, a from t2 where a in (select a from t1 where oref=t2.oref);
oref	a
1	1
show status like '%Handler_read_rnd_next';
Variable_name	Value
Handler_read_rnd_next	5
delete from t2;
insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);
flush status;
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
oref	a	Z
0	NULL	0
0	NULL	0
0	NULL	0
0	NULL	0
show status like '%Handler_read%';
Variable_name	Value
Handler_read_first	0
Handler_read_key	0
Handler_read_next	0
Handler_read_prev	0
Handler_read_rnd	0
Handler_read_rnd_next	29
select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z;
Z
No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.
drop table t1, t2;
create table t1 (a int, b int, primary key (a));
insert into t1 values (1,1), (3,1),(100,1);
create table t2 (a int, b int);
insert into t2 values (1,1),(2,1),(NULL,1),(NULL,0);
select a,b, a in (select a from t1 where t1.b = t2.b) Z from t2 ;
a	b	Z
1	1	1
2	1	0
NULL	1	NULL
NULL	0	0
drop table t1, t2;
create table t1 (a int, b int, key(a));
insert into t1 values 
(0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
create table t2 like t1;
insert into t2 select * from t1;
update t2 set b=1;
create table t3 (a int, oref int);
insert into t3 values (1, 1), (NULL,1), (NULL,0);
select a, oref, 
t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z 
from t3;
a	oref	Z
1	1	1
NULL	1	NULL
NULL	0	0
explain extended
select a, oref, 
t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z 
from t3;
id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	3	
2	DEPENDENT SUBQUERY	t1	ref_or_null	a	a	5	func	4	Using where
2	DEPENDENT SUBQUERY	t2	ref	a	a	5	test.t1.b	1	Using where
Warnings:
Note	1276	Field or reference 't3.oref' of SELECT #2 was resolved in SELECT #1
Note	1003	select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<in_optimizer>(`test`.`t3`.`a`,<exists>(select 1 AS `Not_used` from `test`.`t1` join `test`.`t2` where ((`test`.`t2`.`a` = `test`.`t1`.`b`) and (`test`.`t2`.`b` = `test`.`t3`.`oref`) and trigcond(((<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`) or isnull(`test`.`t1`.`a`)))) having trigcond(<is_not_null_test>(`test`.`t1`.`a`)))) AS `Z` from `test`.`t3`
drop table t1, t2, t3;

--- New file ---
+++ mysql-test/t/subselect3.test	06/10/31 20:42:45
--disable_warnings
drop table if exists t0, t1, t2, t3, t4;
--enable_warnings

#
# 1. Subquery with GROUP/HAVING
#
create table t1 (oref int, grp int, ie int) ;
insert into t1 (oref, grp, ie) values
 (1, 1, 1),
 (1, 1, 1),
 (1, 2, NULL),

 (2, 1, 3),

 (3, 1, 4),
 (3, 2, NULL);

#  Ok, for  
#   select max(ie) from t1 where oref=PARAM group by grp
# we'll have:
#  1  ->  (1, NULL)    matching + NULL
#  2  ->  (3)          non-matching
#  3  ->  (3, NULL)    non-matching + NULL
#  4  ->  ()           nothing.

create table t2 (oref int, a int);
insert into t2 values 
  (1, 1),
  (2, 2),
  (3, 3),
  (4, NULL),
  (2, NULL);

# true, false, null, false, null
select a, oref, a in (select max(ie) 
  from t1 where oref=t2.oref group by grp) from t2;

# This must have a trigcond
explain extended
select a, oref, a in (select max(ie) 
  from t1 where oref=t2.oref group by grp) from t2;

# This must not have a trigcond:
explain extended
select a, oref from t2 
where a in (select max(ie) from t1 where oref=t2.oref group by grp);


# Non-correlated subquery, 2 NULL evaluations
create table t3 (a int);
insert into t3 values (NULL), (NULL);
flush status;
select a in (select max(ie) from t1 where oref=4 group by grp) from t3;
show status like 'Handler_read_rnd_next';
select ' ^ This must show 11' Z;

# This must show trigcond:
explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3;

drop table t1, t2, t3;

# 
#  2. Subquery handled with 'index_subquery':
# 
create table t1 (a int, oref int, key(a));
insert into t1 values 
  (1, 1),
  (1, NULL),
  (2, 3),
  (2, NULL),
  (3, NULL);

create table t2 (a int, oref int);
insert into t2 values (1, 1), (2,2), (NULL, 3), (NULL, 4);

select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;

# The next explain shows "using index" but that is just incorrect display
# (there is a bug filed about this).
explain extended 
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;

flush status;
select oref, a from t2 where a in (select a from t1 where oref=t2.oref);
# This will only show access to t2:
show status like '%Handler_read_rnd_next';

# Check that repeated NULL-scans are not cached (subq. is not correlated):
delete from t2;
insert into t2 values (NULL, 0),(NULL, 0), (NULL, 0), (NULL, 0);

flush status;
select oref, a, a in (select a from t1 where oref=t2.oref) Z from t2;
show status like '%Handler_read%';
select 'No key lookups, seq reads: 29= 5 reads from t2 + 4 * 6 reads from t1.' Z;

drop table t1, t2;

#
# 3. Subquery handled with 'unique_index_subquery':
#
create table t1 (a int, b int, primary key (a));
insert into t1 values (1,1), (3,1),(100,1);

create table t2 (a int, b int);
insert into t2 values (1,1),(2,1),(NULL,1),(NULL,0);

select a,b, a in (select a from t1 where t1.b = t2.b) Z from t2 ;

drop table t1, t2;

#
# 4. Subquery that is a join, with ref access
#
create table t1 (a int, b int, key(a));
insert into t1 values 
  (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);

create table t2 like t1;
insert into t2 select * from t1;
update t2 set b=1;

create table t3 (a int, oref int);
insert into t3 values (1, 1), (NULL,1), (NULL,0);
select a, oref, 
       t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z 
from t3;

# This must have trigcond in WHERE and HAVING:
explain extended
select a, oref, 
       t3.a in (select t1.a from t1, t2 where t1.b=t2.a and t2.b=t3.oref) Z 
from t3;

drop table t1, t2, t3;


Thread
bk commit into 5.0 tree (sergefp:1.2290) BUG#8804Sergey Petrunia31 Oct