List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:March 19 2007 12:39pm
Subject:bk commit into 5.2 tree (sergefp:1.2456)
View as plain text  
Below is the list of changes that have just been committed into a local
5.2 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-03-19 14:39:06+03:00, sergefp@stripped +14 -0
  WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
   - MS1 code

  sql/item.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +1 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_cmpfunc.cc@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +18 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_cmpfunc.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +1 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_func.cc@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +32 -1
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_func.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +1 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_row.cc@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +9 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_row.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +1 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_subselect.cc@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +5 -2
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/item_subselect.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +17 -2
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/sql_base.cc@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +5 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/sql_class.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +2 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/sql_select.cc@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +662 -6
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/sql_select.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +8 -1
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

  sql/table.h@stripped, 2007-03-19 14:39:00+03:00, sergefp@stripped +1 -0
    WL#3740 "Subquery optimization: Semijoin: Pull-out of inner tables"
     - MS1 code

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	pylon.mylan
# Root:	/home/psergey/mysql-5.2-subq-r4-cp

--- 1.206/sql/item.h	2007-03-19 14:39:15 +03:00
+++ 1.207/sql/item.h	2007-03-19 14:39:15 +03:00
@@ -520,6 +520,7 @@
   virtual void make_field(Send_field *field);
   Field *make_string_field(TABLE *table);
   virtual bool fix_fields(THD *, Item **);
+  virtual void fix_after_tbl_changes(uint shiftval) {};
   /*
     should be used in case where we are sure that we do not need
     complete fix_fields() procedure.

--- 1.248/sql/item_cmpfunc.cc	2007-03-19 14:39:15 +03:00
+++ 1.249/sql/item_cmpfunc.cc	2007-03-19 14:39:15 +03:00
@@ -2795,11 +2795,15 @@
   DBUG_ASSERT(fixed == 0);
   List_iterator<Item> li(list);
   Item *item;
+  void *orig_thd_marker= thd->thd_marker;
 #ifndef EMBEDDED_LIBRARY
   char buff[sizeof(char*)];			// Max local vars in function
 #endif
   not_null_tables_cache= used_tables_cache= 0;
   const_item_cache= 1;
+
+  if (functype() == COND_OR_FUNC)
+    thd->thd_marker= 0;
   /*
     and_table_cache is the value that Item_cond_or() returns for
     not_null_tables()
@@ -2858,10 +2862,24 @@
       maybe_null=1;
   }
   thd->lex->current_select->cond_count+= list.elements;
+  thd->thd_marker= orig_thd_marker;
   fix_length_and_dec();
   fixed= 1;
   return FALSE;
 }
+
+
+void Item_cond::fix_after_tbl_changes(uint shiftval)
+{
+  List_iterator<Item> li(list);
+  Item *item;
+  and_tables_cache= and_tables_cache << shiftval;
+  while ((item= li++))
+  {
+    item->fix_after_tbl_changes(shiftval);
+  }
+}
+
 
 bool Item_cond::walk(Item_processor processor, bool walk_subquery, byte *arg)
 {

--- 1.151/sql/item_cmpfunc.h	2007-03-19 14:39:15 +03:00
+++ 1.152/sql/item_cmpfunc.h	2007-03-19 14:39:15 +03:00
@@ -1284,6 +1284,7 @@
   bool add(Item *item) { return list.push_back(item); }
   void add_at_head(List<Item> *nlist) { list.prepand(nlist); }
   bool fix_fields(THD *, Item **ref);
+  void fix_after_tbl_changes(uint shiftval);
 
   enum Type type() const { return COND_ITEM; }
   List<Item>* argument_list() { return &list; }

--- 1.357/sql/item_func.cc	2007-03-19 14:39:15 +03:00
+++ 1.358/sql/item_func.cc	2007-03-19 14:39:15 +03:00
@@ -141,10 +141,11 @@
 {
   DBUG_ASSERT(fixed == 0);
   Item **arg,**arg_end;
+  void *save_thd_marker= thd->thd_marker;
 #ifndef EMBEDDED_LIBRARY			// Avoid compiler warning
   char buff[STACK_BUFF_ALLOC];			// Max argument in function
 #endif
-
+  thd->thd_marker= 0;
   used_tables_cache= not_null_tables_cache= 0;
   const_item_cache=1;
 
@@ -190,7 +191,37 @@
   if (thd->net.report_error) // An error inside fix_length_and_dec occured
     return TRUE;
   fixed= 1;
+  thd->thd_marker= save_thd_marker;
   return FALSE;
+}
+
+
+void Item_func::fix_after_tbl_changes(uint shiftval)
+{
+  Item **arg,**arg_end;
+  used_tables_cache= used_tables_cache << shiftval;
+  not_null_tables_cache= not_null_tables_cache << shiftval;
+  if (arg_count)
+  {						// Print purify happy
+    for (arg=args, arg_end=args+arg_count; arg != arg_end ; arg++)
+    {
+      (*arg)->fix_after_tbl_changes(shiftval);
+      /* 
+        psergey-todo: Check if update of maybe_null is necessary:
+
+        On one hand, if we look at an expression in a global context of
+        parent's join, it may become NULLable because the semi-join is
+        within a nested outer join.
+
+        On the other hand, there is no reason to ever evaluate this
+        expression outside of the outer join. And within an outer join,
+        whatever was NOT NULL inside the subquery remains NOT NULL within
+        the semi-join.
+      */
+      if ((*arg)->maybe_null)
+        maybe_null= 1;
+    }
+  }
 }
 
 

--- 1.161/sql/item_func.h	2007-03-19 14:39:15 +03:00
+++ 1.162/sql/item_func.h	2007-03-19 14:39:15 +03:00
@@ -116,6 +116,7 @@
   // Constructor used for Item_cond_and/or (see Item comment)
   Item_func(THD *thd, Item_func *item);
   bool fix_fields(THD *, Item **ref);
+  virtual void fix_after_tbl_changes(uint shiftval);
   table_map used_tables() const;
   table_map not_null_tables() const;
   void update_used_tables();

--- 1.376/sql/sql_base.cc	2007-03-19 14:39:15 +03:00
+++ 1.377/sql/sql_base.cc	2007-03-19 14:39:15 +03:00
@@ -5874,6 +5874,7 @@
   SELECT_LEX *select_lex= thd->lex->current_select;
   Query_arena *arena= thd->stmt_arena, backup;
   TABLE_LIST *table= NULL;	// For HP compilers
+  void *save_thd_marker= thd->thd_marker;
   /*
     it_is_update set to TRUE when tables of primary SELECT_LEX (SELECT_LEX
     which belong to LEX, i.e. most up SELECT) will be updated by
@@ -5901,6 +5902,7 @@
       goto err_no_arena;
   }
 
+  thd->thd_marker= (void*)1;
   if (*conds)
   {
     thd->where="where clause";
@@ -5908,6 +5910,7 @@
 	(*conds)->check_cols(1))
       goto err_no_arena;
   }
+  thd->thd_marker= save_thd_marker;
 
   /*
     Apply fix_fields() to all ON clauses at all levels of nesting,
@@ -5923,6 +5926,7 @@
       if (embedded->on_expr)
       {
         /* Make a join an a expression */
+        thd->thd_marker= (void*)embedded;
         thd->where="on clause";
         if (!embedded->on_expr->fixed &&
             embedded->on_expr->fix_fields(thd, &embedded->on_expr) ||
@@ -5947,6 +5951,7 @@
       }
     }
   }
+  thd->thd_marker= save_thd_marker;
 
   if (!thd->stmt_arena->is_conventional())
   {

--- 1.341/sql/sql_class.h	2007-03-19 14:39:15 +03:00
+++ 1.342/sql/sql_class.h	2007-03-19 14:39:15 +03:00
@@ -954,6 +954,8 @@
   /* container for handler's private per-connection data */
   void *ha_data[MAX_HA];
 
+  /* psergey-subq: place to store various things */
+  void *thd_marker;
 #ifndef MYSQL_CLIENT
   int binlog_setup_trx_data();
 

--- 1.494/sql/sql_select.cc	2007-03-19 14:39:15 +03:00
+++ 1.495/sql/sql_select.cc	2007-03-19 14:39:16 +03:00
@@ -235,6 +235,7 @@
   {
     SELECT_LEX_UNIT *unit= &lex->unit;
     unit->set_limit(unit->global_parameters);
+    thd->thd_marker= 0; //psergey-todo: find a better place for this
     /*
       'options' of mysql_select will be set in JOIN, as far as JOIN for
       every PS/SP execution new, we will not need reset this flag if 
@@ -331,6 +332,7 @@
   return res;
 }
 
+#define MAGIC_IN_WHERE_TOP_LEVEL 10
 /*
   Function to setup clauses without sum functions
 */
@@ -444,11 +446,50 @@
     if ((subselect= select_lex->master_unit()->item))
     {
       Item_subselect::trans_res res;
-      if ((res= subselect->select_transformer(this)) !=
-	  Item_subselect::RES_OK)
+      bool do_delay= TRUE;
+      /*
+        psergey: 
+        Subquery is a candidate for conversion into semi-join nest if 
+          1. It is an IN/=ANY subquery
+          2. It is a single SELECT (no UNIONs)
+          3. Subquery does not have GROUP BY or ORDER BY
+          4. Subquery does not use aggregate functions or HAVING
+
+          5. Subquery predicate is at the AND-top-level of ON/WHERE clause
+
+          (*): A single table doesn't have a JOIN (TODO: decide if we
+               handle this? If yes, provision to switch to multi-table
+               delete?)
+      */
+      if (subselect->substype() == Item_subselect::IN_SUBS &&           // 1
+          !select_lex->master_unit()->first_select()->next_select() && 
// 2
+          !select_lex->group_list.elements && !order &&               
 // 3
+          !having && !select_lex->with_sum_func &&                    
 // 4
+          thd->thd_marker &&                                            // 5
+          select_lex->outer_select()->join &&                           //
(*)
+          do_delay)
+      {
+        fprintf(stderr, "subq is an sj candidate\n");
+        Item_in_subselect *in_subs= (Item_in_subselect*)subselect;
+        // fix the left col
+        if (!in_subs->left_expr->fixed &&
+             in_subs->left_expr->fix_fields(thd, &in_subs->left_expr));
+
+        //mark it for further processing
+        select_lex->outer_select()->join->sj_subselects.append(in_subs);
+        in_subs->expr_join_nest= (TABLE_LIST*)thd->thd_marker;
+        in_subs->delay_fix_fields= TRUE;
+      }
+      else
       {
-        select_lex->fix_prepare_information(thd, &conds, &having);
-	DBUG_RETURN((res == Item_subselect::RES_ERROR));
+        fprintf(stderr, "subq is not an sj candidate\n");
+
+        if ((res= subselect->select_transformer(this)) !=
+            Item_subselect::RES_OK)
+        {
+          select_lex->fix_prepare_information(thd, &conds, &having);
+          DBUG_RETURN((res == Item_subselect::RES_ERROR));
+        }
       }
     }
   }
@@ -2042,6 +2083,201 @@
   DBUG_RETURN(error);
 }
 
+
+
+//////////////////////////////////////////////////////////////////////////////
+// psergey-dbug-dump
+//////////////////////////////////////////////////////////////////////////////
+template <class T> class GraphDumper
+{
+public:
+  // Record given TABLE_LIST for dumping.
+  void dbug_dump_also(T *tbl)
+  {
+    if (!tbl)
+      return;
+    // check if we've already scheduled and/or dumped the element
+    for (int i= 0; i < last; i++)
+    {
+      if (dbg_tables_to_dump[i] == tbl)
+      {
+        return;
+      }
+    }
+    // schedule the element to be dumped
+    dbg_tables_to_dump[last++]=  tbl;
+  }
+
+  bool get_next(T **elem)
+  {
+    if (first < last)
+    {
+      *elem= dbg_tables_to_dump[first++];
+      return TRUE;
+    }
+    return FALSE;
+  }
+
+  void reset()
+  {
+    first= last= 0;
+  }
+
+  T *dbg_tables_to_dump[1000];
+  int first; // First undumped table
+  int last;  // Last undumped element
+};
+
+GraphDumper<TABLE_LIST> dbug_tbl_graph;
+
+GraphDumper<List<TABLE_LIST> > dbug_tbl_lists;
+
+FILE *out;
+
+
+/* 
+  - Dump one TABLE_LIST and its outgoing edges
+  - Schedule stuff seen along these edges for dumping
+*/
+
+void dump_table_list(TABLE_LIST *tbl)
+{
+  // Dump the node
+  fprintf(out, "\"%p\" [\n", tbl);
+  fprintf(out, "  label = \"%p|", tbl);
+  fprintf(out, "alias=%s|", tbl->alias? tbl->alias : "NULL");
+  fprintf(out, "<next_leaf>next_leaf=%p|", tbl->next_leaf);
+  fprintf(out, "<next_local>next_local=%p|", tbl->next_local);
+  fprintf(out, "<next_global>next_global=%p|", tbl->next_global);
+  fprintf(out, "<embedding>embedding=%p", tbl->embedding);
+
+  if (tbl->nested_join)
+     fprintf(out, "|<nested_j>nested_j=%p", tbl->nested_join);
+  if (tbl->join_list)
+     fprintf(out, "|<join_list>join_list=%p", tbl->join_list);
+  if (tbl->on_expr)
+     fprintf(out, "|<on_expr>on_expr=%p", tbl->on_expr);
+  fprintf(out, "\"\n");
+  fprintf(out, "  shape = \"record\"\n];\n\n");
+ 
+  if (tbl->next_leaf)
+  {
+    fprintf(out, "\n\"%p\":next_leaf -> \"%p\"[ color = \"#000000\" ];\n",  
+            tbl, tbl->next_leaf);
+    dbug_tbl_graph.dbug_dump_also(tbl->next_leaf);
+  }
+  if (tbl->next_local)
+  {
+    fprintf(out, "\n\"%p\":next_local -> \"%p\"[ color = \"#404040\" ];\n",  
+            tbl, tbl->next_local);
+    dbug_tbl_graph.dbug_dump_also(tbl->next_local);
+  }
+  if (tbl->next_global)
+  {
+    fprintf(out, "\n\"%p\":next_global -> \"%p\"[ color = \"#808080\" ];\n",  
+            tbl, tbl->next_global);
+    dbug_tbl_graph.dbug_dump_also(tbl->next_global);
+  }
+
+
+  if (tbl->embedding)
+  {
+    fprintf(out, "\n\"%p\":embedding -> \"%p\"[ color = \"#FF0000\" ];\n",  
+            tbl, tbl->embedding);
+    dbug_tbl_graph.dbug_dump_also(tbl->embedding);
+  }
+
+  if (tbl->join_list)
+  {
+    fprintf(out, "\n\"%p\":join_list -> \"%p\"[ color = \"#0000FF\" ];\n",  
+            tbl, tbl->join_list);
+    dbug_tbl_lists.dbug_dump_also(tbl->join_list);
+  }
+}
+
+void dump_list(List<TABLE_LIST> *list)
+{
+//top_join_list
+  fprintf(out, "\"%p\" [\n", list);
+  fprintf(out, "  bgcolor = \"\"");
+  fprintf(out, "  label = \"L %p\"", list);
+  fprintf(out, "  shape = \"record\"\n];\n\n");
+}
+
+void dump_TABLE_LIST_struct(st_select_lex *select_lex, TABLE_LIST *first_leaf)
+{
+  DBUG_ENTER("dump_TABLE_LIST_struct");
+  char filename[500];
+  int no = 0;
+  do {
+    sprintf(filename, "tlist_tree%.3d.g", no);
+    out = fopen(filename, "rt");
+    if (out)
+    {
+      //file exists, try next
+      fclose(out);
+    }
+    no++;
+  } while (out);
+ 
+  //ok, file doesn't not exist, try opening
+  out = fopen(filename, "wt");
+ 
+  if (!out)
+  {
+    DBUG_PRINT("tree_dump", ("Failed to create output file"));
+    DBUG_VOID_RETURN;
+  }
+ 
+  DBUG_PRINT("tree_dump", ("dumping tree to %s", filename));
+     
+  fputs("digraph g {\n", out);
+  fputs("graph [", out);
+  fputs("  rankdir = \"LR\"", out);
+  fputs("];", out);
+   
+  dbug_tbl_graph.reset();
+  dump_table_list(first_leaf);   
+  TABLE_LIST *tbl;
+  while (dbug_tbl_graph.get_next(&tbl))
+  {
+    dump_table_list(tbl);
+  }
+
+  List<TABLE_LIST> *plist;
+  dbug_tbl_lists.dbug_dump_also(&select_lex->top_join_list);
+  while (dbug_tbl_lists.get_next(&plist))
+  {
+    dump_list(plist);
+  }
+
+  fprintf(out, " { rank = same; ");
+  for (TABLE_LIST *tl=first_leaf; tl; tl= tl->next_leaf)
+    fprintf(out, " \"%p\"; ", tl);
+  fprintf(out, "};\n");
+  fputs("}", out);
+  fclose(out);
+ 
+  char filename2[500];
+  filename[strlen(filename) - 1] = 0;
+  filename[strlen(filename) - 1] = 0;
+ 
+  sprintf(filename2, "%s.query", filename);
+  out = fopen(filename2, "wt");
+  if (out)
+  {
+    fprintf(out, "%s", current_thd->query);
+    fclose(out);
+  }
+  DBUG_VOID_RETURN;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// psergey-dbug-dump ends
+//////////////////////////////////////////////////////////////////////////////
+
+
 /*
   An entry point to single-unit select (a select without UNION).
 
@@ -2146,6 +2382,19 @@
     }
   }
 
+  //psergey-subq:
+  if (!unit->item)
+  {
+    //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
+    /* We're not in a subquery predicate */
+    if (join->fix_subqueries())
+    {
+      thd->net.report_error= 1;
+      goto err;
+    }
+    //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
+  }
+
   if ((err= join->optimize()))
   {
     goto err;					// 1
@@ -2188,6 +2437,357 @@
   DBUG_RETURN(join->error);
 }
 
+
+int subq_sj_candidate_cmp(Item_in_subselect* const *el1, 
+                          Item_in_subselect * const *el2)
+{
+  return ((*el1)->sort_by < (*el2)->sort_by) ? 1 : 
+         ( ((*el1)->sort_by == (*el2)->sort_by)? 0 : -1);
+}
+
+
+inline Item * and_items(Item* cond, Item *item)
+{
+  return (cond? (new Item_cond_and(cond, item)) : item);
+}
+
+
+static TABLE_LIST *alloc_join_nest(THD *thd)
+{
+  TABLE_LIST *tbl;
+  if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
+                                       sizeof(NESTED_JOIN))))
+    return NULL;
+  tbl->nested_join= (NESTED_JOIN*) ((byte*)tbl + 
+                                    ALIGN_SIZE(sizeof(TABLE_LIST)));
+  return tbl;
+}
+
+
+void fix_list_after_tbl_changes(List<TABLE_LIST> tlist, uint shiftval)
+{
+  List_iterator<TABLE_LIST> it(tlist);
+  TABLE_LIST *table;
+  while ((table= it++))
+  {
+    if (table->on_expr)
+      fix_list_after_tbl_changes(tlist, shiftval);
+    if (table->nested_join)
+      fix_list_after_tbl_changes(table->nested_join->join_list, shiftval);
+  }
+}
+
+
+/*
+  Convert a subquery predicate into semi-join nest
+
+  SYNOPSIS
+    //psergey-todo:
+*/
+
+bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
+{
+  SELECT_LEX *parent_lex= parent_join->select_lex;
+  TABLE_LIST *emb_tbl_nest= NULL;
+  List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
+  DBUG_ENTER("convert_subq_to_sj");
+  /*
+    1. Find out where to put the predicate into.
+     Note: for "t1 LEFT JOIN t2" this will be t2, a leaf.
+  */
+  if ((void*)subq_pred->expr_join_nest != (void*)1)
+  {
+    if (subq_pred->expr_join_nest->nested_join)
+    {
+      /*
+        We're dealing with
+
+          ... [LEFT] JOIN  ( ... ) ON (subquery AND whatever) ...
+
+        The sj-nest will be inserted into the brackets nest.
+      */
+      emb_tbl_nest=  subq_pred->expr_join_nest;
+      emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->expr_join_nest->outer_join)
+    {
+      /*
+        We're dealing with
+
+          ... INNER JOIN tblX ON (subquery AND whatever) ...
+
+        The sj-nest will be tblX's "sibling", i.e. another child of its
+        parent. This is ok because tblX is joined as an inner join.
+      */
+      emb_tbl_nest= subq_pred->expr_join_nest->embedding;
+      if (emb_tbl_nest)
+        emb_join_list= &emb_tbl_nest->nested_join->join_list;
+    }
+    else if (!subq_pred->expr_join_nest->join_list)
+    {
+      TABLE_LIST *outer_tbl= subq_pred->expr_join_nest;      
+      TABLE_LIST *wrap_nest;
+      /*
+        We're dealing with
+
+          ... LEFT JOIN tbl ON (on_expr AND subq_pred) ...
+
+        we'll need to convert it into:
+
+          ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ...
+                        |                      |
+                        |<----- wrap_nest ---->|
+        
+        Q:  other subqueries may be pointing to this element. What to do?
+        A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest.
+            But we'll need to fix other pointers.
+        A2: Another way: have TABLE_LIST::next_ptr so the following
+            subqueries know the table has been nested.
+        A3: changes in the TABLE_LIST::outer_join will make everything work
+            automatically.
+      */
+      if (!(wrap_nest= alloc_join_nest(parent_join->thd)))
+        DBUG_RETURN(TRUE);
+      wrap_nest->embedding= outer_tbl->embedding;
+      wrap_nest->join_list= outer_tbl->join_list;
+      wrap_nest->alias= (char*) "(sj-wrap)";
+
+      wrap_nest->nested_join->join_list.empty();
+      wrap_nest->nested_join->join_list.push_back(outer_tbl);
+
+      outer_tbl->embedding= wrap_nest;
+      outer_tbl->join_list= &wrap_nest->nested_join->join_list;
+
+      /* wrap_nest will take place of outer_tbl, so move outer join and on_expr */
+      wrap_nest->outer_join= outer_tbl->outer_join;
+      outer_tbl->outer_join= 0;
+
+      wrap_nest->on_expr= outer_tbl->on_expr;
+      outer_tbl->on_expr= NULL;
+
+      List_iterator<TABLE_LIST> li(*outer_tbl->join_list);
+      TABLE_LIST *tbl;
+      while ((tbl= li++))
+      {
+        if (tbl == outer_tbl)
+        {
+          li.replace(wrap_nest);
+          break;
+        }
+      }
+      /*
+        Ok now wrap_nest 'contains' outer_tbl and we're ready to add the 
+        semi-join nest into it
+      */
+      emb_join_list= &wrap_nest->nested_join->join_list;
+      emb_tbl_nest=  wrap_nest;
+    }
+    DBUG_RETURN(TRUE);
+  }
+
+  TABLE_LIST *sj_nest;
+  NESTED_JOIN *nested_join;
+  if (!(sj_nest= alloc_join_nest(parent_join->thd)))
+    DBUG_RETURN(TRUE);
+  nested_join= sj_nest->nested_join;
+
+  sj_nest->join_list= emb_join_list;
+  sj_nest->embedding= emb_tbl_nest;
+  sj_nest->alias= (char*) "(sj-nest)";
+  /* Nests do not participate in those 'chains', so: */
+  /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/
+  emb_join_list->push_back(sj_nest);
+
+  /* 
+    nested_join->used_tables and nested_join->not_null_tables are
+    initialized in simplify_joins().
+  */
+  
+  /* 
+    2. Walk through subquery's top list and set 'embedding' to point to the
+       sj-nest.
+  */
+  st_select_lex *subq_lex= subq_pred->unit->first_select();
+  nested_join->join_list.empty();
+  List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list);
+  TABLE_LIST *tl, *last_leaf;
+  while ((tl= li++))
+  {
+    tl->embedding= sj_nest;
+    tl->join_list= &nested_join->join_list;
+    nested_join->join_list.push_back(tl);
+  }
+  
+  /*
+    Reconnect the next_leaf chain.
+    TODO: Do we have to put subquery's tables at the end of the chain?
+          Inserting them at the beginning would be a bit faster.
+  */
+  for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf);
+  tl->next_leaf= subq_lex->leaf_tables;
+  last_leaf= tl;
+
+  /*
+    Same as above for next_local chain
+    (a theory: a next_local chain always starts with ::leaf_tables
+     because view's tables are inserted after the view)
+  */
+  for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local);
+  tl->next_local= subq_lex->leaf_tables;
+
+  /* A theory: no need to re-connect the next_global chain */
+
+  /* 3. Remove the original subquery predicate from the WHERE/ON */
+  *(subq_pred->ref_ptr)= new Item_int(1);
+  
+  /* n. Adjust the parent_join->tables counter */
+  parent_join->tables += subq_lex->join->tables;
+  table_map map= ((table_map)1) << subq_lex->join->tables;
+
+  /* n. Walk through child's tables and adjust table->map */
+  for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf)
+  {
+    tl->table->map= map;
+    map= map<<1;
+  }
+  //TODO: check again the hypothesis that we don't have to update
+  //      table->maybe_null
+
+  /* 
+    Put the subquery's WHERE into semi-join's sj_on_expr
+    Add the subquery-induced equalities too.
+  */
+  sj_nest->sj_on_expr= subq_lex->where;
+  if (subq_pred->left_expr->cols() == 1)
+  {
+    Item *item_eq= 
+      new Item_func_eq(subq_pred->left_expr, 
+                       subq_lex->ref_pointer_array[0]);
+    sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+  }
+  else
+  {
+    for (uint i= 0; i < subq_pred->left_expr->cols(); i++)
+    {
+      Item *item_eq= 
+        new Item_func_eq(subq_pred->left_expr->element_index(i), 
+                         subq_lex->ref_pointer_array[i]);
+      sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq);
+    }
+  }
+
+
+  /*
+    Walk through sj nest's WHERE and ON expressions and call
+    item->fix_table_changes() for all items.
+  */
+  uint shiftval= subq_lex->join->tables;
+  sj_nest->sj_on_expr->fix_after_tbl_changes(shiftval);
+
+  //TODO: also fix all ON expressions
+  fix_list_after_tbl_changes(sj_nest->nested_join->join_list, shiftval);
+
+
+  /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */
+  subq_lex->master_unit()->exclude_level();
+
+  /* 
+    Inject sj_on_expr into the parent's WHERE
+    (TODO: inject into ON)
+  */
+  if (emb_tbl_nest)
+  {
+    emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, 
+                                     sj_nest->sj_on_expr);
+  }
+  else
+  {
+    /* Inject into the WHERE */
+    parent_join->conds= parent_join->select_lex->where=
+      and_items(parent_join->conds, sj_nest->sj_on_expr);
+  }
+
+  return FALSE;
+}
+
+
+/*
+  Walk through semi-join conversion candidates and either convert or fix them
+
+  SYNOPSIS
+    JOIN::fix_subqueries()
+
+  RETURN 
+    FALSE  OK
+    TRUE   Error
+*/
+
+bool JOIN::fix_subqueries()
+{
+  DBUG_ENTER("JOIN::fix_subqueries");
+  Item_in_subselect **in_subq;
+  Item_in_subselect **in_subq_end;
+
+  if (sj_subselects.elements() == 0)
+    DBUG_RETURN(FALSE);
+
+  /* 1. Fix children subqueries */
+  for (in_subq= sj_subselects.front(), in_subq_end= sj_subselects.back(); 
+       in_subq != in_subq_end; in_subq++)
+  {
+    JOIN *child_join= (*in_subq)->unit->first_select()->join;
+    child_join->outer_tables = child_join->tables;
+    if (child_join->fix_subqueries())
+      DBUG_RETURN(TRUE);
+    (*in_subq)->sort_by= (*in_subq)->is_correlated * MAX_TABLES + 
+                         child_join->outer_tables;
+  }
+
+  //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables);
+  /* 
+    2. Pick which subqueries to convert:
+      sort the subquery array
+      - prefer correlated subqueries over uncorrelated;
+      - prefer subqueries that have greater number of outer tables;
+  */
+  sj_subselects.sort(subq_sj_candidate_cmp);
+  // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
+  bool do_converts= TRUE;
+  for (in_subq= sj_subselects.front(); 
+       tables < MAX_TABLES && in_subq !=in_subq_end; in_subq++)
+  {
+    if (!do_converts)
+      break;
+    //convert subquery to sj;
+    if (convert_subq_to_sj(this, *in_subq))
+      DBUG_RETURN(TRUE);
+  }
+
+  /* 3. Finalize those we didn't convert */
+  for (; in_subq!= in_subq_end; in_subq++)
+  {
+    JOIN *child_join= (*in_subq)->unit->first_select()->join;
+    Item_subselect::trans_res res;
+    (*in_subq)->delay_fix_fields= FALSE;
+    (*in_subq)->changed= 0;
+    (*in_subq)->fixed= 0;
+    res= (*in_subq)->select_transformer(child_join);
+    if (res == Item_subselect::RES_ERROR)
+      DBUG_RETURN(TRUE);
+
+    *((*in_subq)->ref_ptr)= (*in_subq)->substitution;
+    (*in_subq)->changed= 1;
+    (*in_subq)->fixed= 1;
+    if (!(*in_subq)->substitution->fixed &&
+      (*in_subq)->substitution->fix_fields(thd, (*in_subq)->ref_ptr))
+      DBUG_RETURN(TRUE);
+
+    //if ((*in_subq)->fix_fields(thd, (*in_subq)->ref_ptr))
+    //  DBUG_RETURN(TRUE);
+  }
+  DBUG_RETURN(FALSE);
+}
+
 /*****************************************************************************
   Create JOIN_TABS, make a guess about the table types,
   Approximate how many records will be used in each table
@@ -2548,6 +3148,50 @@
       }
     }
   } while (join->const_table_map & found_ref && ref_changed);
+ 
+
+  /*
+  //psergey: 
+  // note: semi-join nests may be inside different outer joins
+  //   (and that does not matter)
+  for (each semi-join nest)
+  {
+    // Action #1: Mark the constant tables to be pulled out
+    pulled_tables= {};
+    for (each table T in semi-join nest)
+    {
+      if (T has at most one row)
+      {
+        pulled_tables.insert(T);
+      }
+    }
+    
+    // Action #2: Find which tables we can pull out based on
+    // update_ref_and_keys() data. Note that pulling one table out
+    // can allow us to pull out some other tables too.
+    do {
+      pulled_a_table= FALSE;
+      for each possible ref access to some inner table T
+      {
+        if (ref access is eq_ref &&
+            all used tables are outer tables)
+        {
+          pulled_a_table=TRUE;
+          pulled_tables.insert(T)
+        }
+      }
+    } while (pulled_a_table);
+
+    // Remove pulled_tables from the join nest;
+    if (we've removed all of the tables)
+      destroy the nest;
+    else
+    {
+      //we're not capable of executing semi-join nests yet
+      report query failure;
+    }
+  }
+  */
 
   /* 
     Update info on indexes that can be used for search lookups as
@@ -4151,7 +4795,7 @@
     This is because table scans uses index and we would not win
     anything by using a table scan.
 
-    A word for word translation of the below if-statement in psergey's
+    A word for word translation of the below if-statement in sergefp's
     understanding: we check if we should use table scan if:
     (1) The found 'ref' access produces more records than a table scan
         (or index scan, or quick select), or 'ref' is more expensive than
@@ -8127,6 +8771,13 @@
   RETURN VALUE
     The new condition, if success
     0, otherwise  
+
+  psergey-todo: this should 
+   - remove semi-joins nested one within another. 
+     (semi-join within nested-outer-join within semi-join must be removed
+     too)
+
+   - Take into account sj_on_expr for outer-to-inner conversions.
 */
 
 static COND *
@@ -8267,7 +8918,12 @@
   while ((table= li++))
   {
     nested_join= table->nested_join;
-    if (nested_join && !table->on_expr)
+    /* psergey: don't remove sj-nests: */
+    if (table->sj_on_expr)
+    {
+      join->sj_nests.append(table);
+    }
+    else if (nested_join && !table->on_expr)
     {
       TABLE_LIST *tbl;
       List_iterator<TABLE_LIST> it(nested_join->join_list);

--- 1.117/sql/sql_select.h	2007-03-19 14:39:16 +03:00
+++ 1.118/sql/sql_select.h	2007-03-19 14:39:16 +03:00
@@ -258,7 +258,9 @@
   JOIN_TAB **map2table;    // mapping between table indexes and JOIN_TABs
   JOIN_TAB *join_tab_save; // saved join_tab for subquery reexecution
   TABLE    **table,**all_tables,*sort_by_table;
-  uint	   tables,const_tables;
+  uint	   tables;        /* Number of tables in the join */
+  uint     outer_tables;  /* Number of tables that are not inside semijoin */
+  uint     const_tables;
   uint	   send_group_parts;
   bool	   sort_and_group,first_record,full_join,group, no_field_update;
   bool	   do_send_rows;
@@ -357,6 +359,10 @@
   
   bool union_part; // this subselect is part of union 
   bool optimized; // flag to avoid double optimization in EXPLAIN
+  
+  //psergey:
+  Dynamic_array<Item_in_subselect*> sj_subselects;
+  Dynamic_array<TABLE_LIST*> sj_nests;
 
   /* 
     storage for caching buffers allocated during query execution. 
@@ -441,6 +447,7 @@
   int destroy();
   void restore_tmp();
   bool alloc_func_list();
+  bool fix_subqueries();
   bool make_sum_func_list(List<Item> &all_fields, List<Item>
&send_fields,
 			  bool before_group_by, bool recompute= FALSE);
 

--- 1.161/sql/table.h	2007-03-19 14:39:16 +03:00
+++ 1.162/sql/table.h	2007-03-19 14:39:16 +03:00
@@ -675,6 +675,7 @@
   char		*db, *alias, *table_name, *schema_table_name;
   char          *option;                /* Used by cache index  */
   Item		*on_expr;		/* Used with outer join */
+  Item          *sj_on_expr;
   /*
     The structure of ON expression presented in the member above
     can be changed during certain optimizations. This member

--- 1.35/sql/item_row.cc	2007-03-19 14:39:16 +03:00
+++ 1.36/sql/item_row.cc	2007-03-19 14:39:16 +03:00
@@ -83,6 +83,15 @@
   return FALSE;
 }
 
+void Item_row::fix_after_tbl_changes(uint shiftval)
+{
+  used_tables_cache= used_tables_cache << shiftval;
+  Item **arg, **arg_end;
+  for (arg= items, arg_end= items+arg_count; arg != arg_end ; arg++)
+  {
+    (*arg)->fix_after_tbl_changes(shiftval);
+  }
+}
 
 void Item_row::cleanup()
 {

--- 1.24/sql/item_row.h	2007-03-19 14:39:16 +03:00
+++ 1.25/sql/item_row.h	2007-03-19 14:39:16 +03:00
@@ -59,6 +59,7 @@
     return 0;
   };
   bool fix_fields(THD *thd, Item **ref);
+  void fix_after_tbl_changes(uint shiftval);
   void cleanup();
   void split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields);
   table_map used_tables() const { return used_tables_cache; };

--- 1.146/sql/item_subselect.cc	2007-03-19 14:39:16 +03:00
+++ 1.147/sql/item_subselect.cc	2007-03-19 14:39:16 +03:00
@@ -612,7 +612,7 @@
 Item_in_subselect::Item_in_subselect(Item * left_exp,
 				     st_select_lex *select_lex):
   Item_exists_subselect(), optimizer(0), transformed(0),
-  pushed_cond_guards(NULL), upper_item(0)
+  pushed_cond_guards(NULL), delay_fix_fields(FALSE), upper_item(0)
 {
   DBUG_ENTER("Item_in_subselect::Item_in_subselect");
   left_expr= left_exp;
@@ -1519,7 +1519,10 @@
 bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref)
 {
   bool result = 0;
-  
+  ref_ptr= ref;
+  //if (delay_fix_fields)
+  //  return 0;
+
   if (thd_arg->lex->view_prepare_mode && left_expr &&
!left_expr->fixed)
     result = left_expr->fix_fields(thd_arg, &left_expr);
 

--- 1.89/sql/item_subselect.h	2007-03-19 14:39:16 +03:00
+++ 1.90/sql/item_subselect.h	2007-03-19 14:39:16 +03:00
@@ -31,13 +31,15 @@
 class Item_subselect :public Item_result_field
 {
   my_bool value_assigned; /* value already assigned to subselect */
-protected:
+public:
   /* thread handler, will be assigned in fix_fields only */
   THD *thd;
   /* substitution instead of subselect in case of optimization */
   Item *substitution;
   /* unit of subquery */
+public:
   st_select_lex_unit *unit;
+protected:
   /* engine that perform execution of subselect (single select or union) */
   subselect_engine *engine;
   /* old engine if engine was changed */
@@ -238,8 +240,9 @@
 
 class Item_in_subselect :public Item_exists_subselect
 {
-protected:
+public:
   Item *left_expr;
+protected:
   /*
     expr & optimizer used in subselect rewriting to store Item for
     all JOIN in UNION
@@ -252,6 +255,18 @@
 public:
   /* Used to trigger on/off conditions that were pushed down to subselect */
   bool *pushed_cond_guards;
+  
+  //psergey:
+  int sort_by;
+  bool delay_fix_fields;
+
+  /* 
+    Location of the subquery predicate. It is either
+     - pointer to join nest if the subquery predicate is in the ON expression
+     - (TABLE_LIST*)1 if the predicate is in the WHERE.
+  */
+  TABLE_LIST *expr_join_nest;
+  Item **ref_ptr;
 
   bool *get_cond_guard(int i)
   {
Thread
bk commit into 5.2 tree (sergefp:1.2456)Sergey Petrunia19 Mar