List:Commits« Previous MessageNext Message »
From:Ole John Aske Date:May 22 2012 9:04am
Subject:bzr push into mysql-trunk branch (ole.john.aske:3900 to 3901) WL#5940
View as plain text  
 3901 Ole John Aske	2012-05-22
      WL#5940: Handler extension and optimizer addaption required for pushed joins.
      
       

    modified:
      sql/CMakeLists.txt
      sql/ha_ndbcluster.cc
      sql/ha_ndbcluster.h
      sql/handler.cc
      sql/handler.h
      sql/opt_explain.cc
      sql/opt_explain_format.h
      sql/opt_explain_json.cc
      sql/opt_explain_traditional.cc
      sql/sql_executor.cc
      sql/sql_optimizer.cc
      sql/sql_select.cc
 3900 Hemant Kumar	2012-05-22
      Several tests are failing on mysql/daily trunk with "Thread stack overrun" on Solaris.Making these tests experimental since Alik is trying to setup a workaround.

    modified:
      mysql-test/collections/default.experimental
=== modified file 'sql/CMakeLists.txt'
--- a/sql/CMakeLists.txt	2012-05-18 14:39:25 +0000
+++ b/sql/CMakeLists.txt	2012-05-22 09:03:56 +0000
@@ -49,6 +49,7 @@ IF(HAVE_VISIBILITY_HIDDEN)
 ENDIF()
 
 SET(SQL_SHARED_SOURCES
+  abstract_query_plan.cc
   datadict.cc
   debug_sync.cc
   derror.cc

=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc	2012-04-25 12:30:33 +0000
+++ b/sql/ha_ndbcluster.cc	2012-05-22 09:03:56 +0000
@@ -12345,6 +12345,14 @@ ulonglong ha_ndbcluster::table_flags(voi
   */
   if (thd->variables.binlog_format == BINLOG_FORMAT_STMT)
     f= (f | HA_BINLOG_STMT_CAPABLE) & ~HA_HAS_OWN_BINLOGGING;
+
+  /**
+   * To maximize join pushability we want const-table 
+   * optimization blocked if 'ndb_join_pushdown= on'
+   */
+  if (THDVAR(thd, join_pushdown))
+    f= f | HA_BLOCK_CONST_TABLE;
+
   return f;
 }
 
@@ -14504,97 +14512,6 @@ ha_ndbcluster::parent_of_pushed_join() c
   return NULL;
 }
 
-bool
-ha_ndbcluster::test_push_flag(enum ha_push_flag flag) const
-{
-  DBUG_ENTER("test_push_flag");
-  switch (flag) {
-  case HA_PUSH_BLOCK_CONST_TABLE:
-  {
-    /**
-     * We don't support join push down if...
-     *   - not LM_CommittedRead
-     *   - uses blobs
-     */
-    THD *thd= current_thd;
-    if (unlikely(!THDVAR(thd, join_pushdown)))
-      DBUG_RETURN(false);
-
-    if (table->read_set != NULL && uses_blob_value(table->read_set))
-    {
-      DBUG_RETURN(false);
-    }
-
-    NdbOperation::LockMode lm= get_ndb_lock_mode(m_lock.type);
-
-    if (lm != NdbOperation::LM_CommittedRead)
-    {
-      DBUG_RETURN(false);
-    }
-
-    DBUG_RETURN(true);
-  }
-  case HA_PUSH_MULTIPLE_DEPENDENCY:
-    /**
-     * If any child operation within this pushed join refer 
-     * column values (paramValues), the pushed join has dependencies
-     * in addition to the root operation itself.
-     */
-    if (m_pushed_join_operation==PUSHED_ROOT &&
-        m_pushed_join_member->get_field_referrences_count() > 0)  // Childs has field refs
-    {
-      DBUG_RETURN(true);
-    }
-    DBUG_RETURN(false);
-
-  case HA_PUSH_NO_ORDERED_INDEX:
-  {
-    if (m_pushed_join_operation != PUSHED_ROOT)
-    {
-      DBUG_RETURN(true);
-    }
-    const NdbQueryDef& query_def = m_pushed_join_member->get_query_def();
-    const NdbQueryOperationDef::Type root_type=
-      query_def.getQueryOperation((uint)PUSHED_ROOT)->getType();
-
-    /**
-     * Primary key/ unique key lookup is always 'ordered' wrt. itself.
-     */
-    if (root_type == NdbQueryOperationDef::PrimaryKeyAccess  ||
-        root_type == NdbQueryOperationDef::UniqueIndexAccess)
-    {
-      DBUG_RETURN(false);
-    }
-
-    /**
-     * Ordered index scan can be provided as an ordered resultset iff
-     * it has no child scans.
-     */
-    if (root_type == NdbQueryOperationDef::OrderedIndexScan)
-    {
-      for (uint i= 1; i < query_def.getNoOfOperations(); i++)
-      {
-        const NdbQueryOperationDef::Type child_type=
-          query_def.getQueryOperation(i)->getType();
-        if (child_type == NdbQueryOperationDef::TableScan ||
-            child_type == NdbQueryOperationDef::OrderedIndexScan)
-        {
-          DBUG_RETURN(true);
-        }
-      }
-      DBUG_RETURN(false);
-    }
-    DBUG_RETURN(true);
-  }
-
-  default:
-    DBUG_ASSERT(0);
-    DBUG_RETURN(false);
-  }
-  DBUG_RETURN(false);
-}
-
-
 /**
   @param[in] comment  table comment defined by user
 

=== modified file 'sql/ha_ndbcluster.h'
--- a/sql/ha_ndbcluster.h	2012-04-25 12:30:33 +0000
+++ b/sql/ha_ndbcluster.h	2012-05-22 09:03:56 +0000
@@ -414,15 +414,6 @@ static void set_tabname(const char *path
   bool maybe_pushable_join(const char*& reason) const;
   int assign_pushed_join(const ndb_pushed_join* pushed_join);
 
-#ifdef NDB_WITHOUT_JOIN_PUSHDOWN
-  enum ha_push_flag {
-    HA_PUSH_BLOCK_CONST_TABLE,
-    HA_PUSH_MULTIPLE_DEPENDENCY,
-    HA_PUSH_NO_ORDERED_INDEX
-  };
-#endif
-  bool test_push_flag(enum ha_push_flag flag) const;
-
   uint number_of_pushed_joins() const;
   const TABLE* root_of_pushed_join() const;
   const TABLE* parent_of_pushed_join() const;

=== modified file 'sql/handler.cc'
--- a/sql/handler.cc	2012-05-21 12:08:05 +0000
+++ b/sql/handler.cc	2012-05-22 09:03:56 +0000
@@ -5069,6 +5069,44 @@ int ha_table_exists_in_engine(THD* thd, 
   DBUG_RETURN(args.err);
 }
 
+/**
+  Prepare (sub-) sequences of joins in this statement 
+  which may be pushed to each storage engine for execution.
+*/
+struct st_make_pushed_join_args
+{
+  const AQP::Join_plan* plan; // Query plan provided by optimizer
+  int err;                    // Error code to return.
+};
+
+static my_bool make_pushed_join_handlerton(THD *thd, plugin_ref plugin,
+                                   void *arg)
+{
+  st_make_pushed_join_args *vargs= (st_make_pushed_join_args *)arg;
+  handlerton *hton= plugin_data(plugin, handlerton *);
+
+  if (hton && hton->make_pushed_join)
+  {
+    const int error= hton->make_pushed_join(hton, thd, vargs->plan);
+    if (unlikely(error))
+    {
+      vargs->err = error;
+      return TRUE;
+    }
+  }
+  return FALSE;
+}
+
+int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan)
+{
+  DBUG_ENTER("ha_make_pushed_joins");
+  st_make_pushed_join_args args= {plan, 0};
+  plugin_foreach(thd, make_pushed_join_handlerton,
+                 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
+  DBUG_PRINT("exit", ("error: %d", args.err));
+  DBUG_RETURN(args.err);
+}
+
 #ifdef HAVE_NDB_BINLOG
 /*
   TODO: change this into a dynamic struct

=== modified file 'sql/handler.h'
--- a/sql/handler.h	2012-05-11 12:05:39 +0000
+++ b/sql/handler.h	2012-05-22 09:03:56 +0000
@@ -231,6 +231,12 @@ enum enum_alter_inplace_result {
  */
 #define HA_READ_OUT_OF_SYNC              (LL(1) << 40)
 
+/*
+  The handler don't want accesses to this table to 
+  be const-table optimized
+*/
+#define HA_BLOCK_CONST_TABLE          (LL(1) << 41)
+
 /* bits in index_flags(index_number) for what you can do with index */
 #define HA_READ_NEXT            1       /* TODO really use this flag */
 #define HA_READ_PREV            2       /* supports ::index_prev */
@@ -480,6 +486,10 @@ typedef ulonglong my_xid; // this line i
 #define COMPATIBLE_DATA_YES 0
 #define COMPATIBLE_DATA_NO  1
 
+namespace AQP {
+  class Join_plan;
+};
+
 /**
   struct xid_t is binary compatible with the XID structure as
   in the X/Open CAE Specification, Distributed Transaction Processing:
@@ -886,6 +896,8 @@ struct handlerton
                      const char *wild, bool dir, List<LEX_STRING> *files);
    int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db,
                                  const char *name);
+   int (*make_pushed_join)(handlerton *hton, THD* thd, 
+                           const AQP::Join_plan* plan);
 
   /**
     List of all system tables specific to the SE.
@@ -2525,6 +2537,34 @@ public:
    in_range_check_pushed_down= false;
  }
 
+  /**
+    Reports #tables included in pushed join which this
+    handler instance is part of. ==0 -> Not pushed
+  */
+  virtual uint number_of_pushed_joins() const
+  { return 0; }
+
+  /**
+    If this handler instance is part of a pushed join sequence
+    returned TABLE instance being root of the pushed query?
+  */
+  virtual const TABLE* root_of_pushed_join() const
+  { return NULL; }
+
+  /**
+    If this handler instance is a child in a pushed join sequence
+    returned TABLE instance being my parent?
+  */
+  virtual const TABLE* parent_of_pushed_join() const
+  { return NULL; }
+
+  virtual int index_read_pushed(uchar * buf, const uchar * key,
+                             key_part_map keypart_map)
+  { return  HA_ERR_WRONG_COMMAND; }
+
+  virtual int index_next_pushed(uchar * buf)
+  { return  HA_ERR_WRONG_COMMAND; }
+
  /**
    Part of old, deprecated in-place ALTER API.
  */
@@ -3195,6 +3235,9 @@ int ha_rollback_to_savepoint(THD *thd, S
 int ha_savepoint(THD *thd, SAVEPOINT *sv);
 int ha_release_savepoint(THD *thd, SAVEPOINT *sv);
 
+/* Build pushed joins in handlers implementing this feature */
+int ha_make_pushed_joins(THD *thd, const AQP::Join_plan* plan);
+
 /* these are called by storage engines */
 void trans_register_ha(THD *thd, bool all, handlerton *ht);
 

=== modified file 'sql/opt_explain.cc'
--- a/sql/opt_explain.cc	2012-05-11 19:37:22 +0000
+++ b/sql/opt_explain.cc	2012-05-22 09:03:56 +0000
@@ -904,6 +904,46 @@ bool Explain_table_base::explain_extra_c
     return true;
   }
 
+  const TABLE* pushed_root= table->file->root_of_pushed_join();
+  if (pushed_root)
+  {
+    char buf[128];
+    int len;
+    int pushed_id= 0;
+
+    for (JOIN_TAB* prev= join->join_tab; prev <= tab; prev++)
+    {
+      const TABLE* prev_root= prev->table->file->root_of_pushed_join();
+      if (prev_root == prev->table)
+      {
+        pushed_id++;
+        if (prev_root == pushed_root)
+          break;
+      }
+    }
+    if (pushed_root == table)
+    {
+      uint pushed_count= tab->table->file->number_of_pushed_joins();
+      len= my_snprintf(buf, sizeof(buf)-1,
+                       "Parent of %d pushed join@%d",
+                       pushed_count, pushed_id);
+    }
+    else
+    {
+      len= my_snprintf(buf, sizeof(buf)-1,
+                       "Child of '%s' in pushed join@%d",
+                       tab->table->file->parent_of_pushed_join()->alias,
+                       pushed_id);
+    }
+
+    {
+      StringBuffer<128> buff(cs);
+      buff.append(buf,len);
+      if (push_extra(ET_PUSHED_JOIN, buff))
+        return true;
+    }
+  }
+
   switch (quick_type) {
   case QUICK_SELECT_I::QS_TYPE_ROR_UNION:
   case QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT:

=== modified file 'sql/opt_explain_format.h'
--- a/sql/opt_explain_format.h	2012-05-11 19:37:22 +0000
+++ b/sql/opt_explain_format.h	2012-05-22 09:03:56 +0000
@@ -98,6 +98,7 @@ enum Extra_tag
   ET_CONST_ROW_NOT_FOUND,
   ET_UNIQUE_ROW_NOT_FOUND,
   ET_IMPOSSIBLE_ON_CONDITION,
+  ET_PUSHED_JOIN,
   //------------------------------------
   ET_total
 };

=== modified file 'sql/opt_explain_json.cc'
--- a/sql/opt_explain_json.cc	2012-04-04 17:22:45 +0000
+++ b/sql/opt_explain_json.cc	2012-05-22 09:03:56 +0000
@@ -53,6 +53,7 @@ static const char *json_extra_tags[ET_to
   "const_row_not_found",                // ET_CONST_ROW_NOT_FOUND
   "unique_row_not_found",               // ET_UNIQUE_ROW_NOT_FOUND
   "impossible_on_condition",            // ET_IMPOSSIBLE_ON_CONDITION
+  "pushed_join"                         // ET_PUSHED_JOIN
 };
 
 

=== modified file 'sql/opt_explain_traditional.cc'
--- a/sql/opt_explain_traditional.cc	2012-05-11 19:37:22 +0000
+++ b/sql/opt_explain_traditional.cc	2012-05-22 09:03:56 +0000
@@ -52,6 +52,7 @@ static const char *traditional_extra_tag
   "const row not found",               // ET_CONST_ROW_NOT_FOUND
   "unique row not found",              // ET_UNIQUE_ROW_NOT_FOUND
   "Impossible ON condition"            // ET_IMPOSSIBLE_ON_CONDITION
+  ""                                   // ET_PUSHED_JOIN
 };
 
 
@@ -202,6 +203,7 @@ bool Explain_format_traditional::flush_e
           break;
         }
         if (e->tag != ET_FIRST_MATCH && // for backward compatibility
+            e->tag != ET_PUSHED_JOIN &&
             buff.append(" "))
           return true;
         if (brackets && buff.append("("))

=== modified file 'sql/sql_executor.cc'
--- a/sql/sql_executor.cc	2012-05-16 08:13:19 +0000
+++ b/sql/sql_executor.cc	2012-05-22 09:03:56 +0000
@@ -81,6 +81,8 @@ static int join_ft_read_next(READ_RECORD
 static int join_read_always_key_or_null(JOIN_TAB *tab);
 static int join_read_next_same_or_null(READ_RECORD *info);
 static int join_read_record_no_init(JOIN_TAB *tab);
+static int join_read_linked_first(JOIN_TAB *tab);
+static int join_read_linked_next(READ_RECORD *info);
 // Create list for using with tempory table
 static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
 				     List<Item> &new_list1,
@@ -2972,6 +2974,76 @@ join_read_key_unlock_row(st_join_table *
     tab->ref.use_count--;
 }
 
+/**
+  Read a table *assumed* to be included in execution of a pushed join.
+  This is the counterpart of join_read_key() / join_read_always_key()
+  for child tables in a pushed join.
+
+  When the table access is performed as part of the pushed join,
+  all 'linked' child colums are prefetched together with the parent row.
+  The handler will then only format the row as required by MySQL and set
+  'table->status' accordingly.
+
+  However, there may be situations where the prepared pushed join was not
+  executed as assumed. It is the responsibility of the handler to handle
+  these situation by letting ::index_read_pushed() then effectively do a 
+  plain old' index_read_map(..., HA_READ_KEY_EXACT);
+  
+  @param tab			Table to read
+
+  @retval
+    0	Row was found
+  @retval
+    -1   Row was not found
+  @retval
+    1   Got an error (other than row not found) during read
+*/
+static int
+join_read_linked_first(JOIN_TAB *tab)
+{
+  TABLE *table= tab->table;
+  DBUG_ENTER("join_read_linked_first");
+
+  DBUG_ASSERT(!tab->sorted); // Pushed child can't be sorted
+  if (!table->file->inited)
+    table->file->ha_index_init(tab->ref.key, tab->sorted);
+
+  if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
+  {
+    table->status=STATUS_NOT_FOUND;
+    DBUG_RETURN(-1);
+  }
+
+  // 'read' itself is a NOOP: 
+  //  handler::index_read_pushed() only unpack the prefetched row and set 'status'
+  int error=table->file->index_read_pushed(table->record[0],
+                                      tab->ref.key_buff,
+                                      make_prev_keypart_map(tab->ref.key_parts));
+  if (unlikely(error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE))
+    DBUG_RETURN(report_error(table, error));
+
+  table->null_row=0;
+  int rc= table->status ? -1 : 0;
+  DBUG_RETURN(rc);
+}
+
+static int
+join_read_linked_next(READ_RECORD *info)
+{
+  TABLE *table= info->table;
+  DBUG_ENTER("join_read_linked_next");
+
+  int error=table->file->index_next_pushed(table->record[0]);
+  if (error)
+  {
+    if (unlikely(error != HA_ERR_END_OF_FILE))
+      DBUG_RETURN(report_error(table, error));
+    table->status= STATUS_GARBAGE;
+    DBUG_RETURN(-1);
+  }
+  DBUG_RETURN(error);
+}
+
 /*
   ref access method implementation: "read_first" function
 
@@ -3353,6 +3425,36 @@ join_read_next_same_or_null(READ_RECORD 
 void
 pick_table_access_method(JOIN_TAB *tab)
 {
+  /**
+    Set up modified access function for pushed joins.
+  */
+  uint pushed_joins= tab->table->file->number_of_pushed_joins();
+  if (pushed_joins > 0)
+  {
+    if (tab->table->file->root_of_pushed_join() != tab->table)
+    {
+      /*
+        Is child of a pushed join operation:
+        Replace access functions with its linked counterpart.
+        ... Which is effectively a NOOP as the row is already fetched 
+        together with the root of the linked operation.
+      */
+      DBUG_ASSERT(tab->type != JT_REF_OR_NULL);
+      tab->read_first_record= join_read_linked_first;
+      tab->read_record.read_record= join_read_linked_next;
+      tab->read_record.unlock_row= rr_unlock_row;
+      return;
+    }
+  }
+
+  /**
+    Already set to some non-default value in sql_select.cc
+    TODO: Move these settings into pick_table_access_method() also
+  */
+  else if (tab->read_first_record != NULL)
+    return;  
+
+  // Fall through to set default access functions:
   switch (tab->type) 
   {
   case JT_REF:

=== modified file 'sql/sql_optimizer.cc'
--- a/sql/sql_optimizer.cc	2012-04-30 10:06:23 +0000
+++ b/sql/sql_optimizer.cc	2012-05-22 09:03:56 +0000
@@ -36,6 +36,7 @@
 #include "sql_parse.h"
 #include "my_bit.h"
 #include "lock.h"
+#include "abstract_query_plan.h"
 #include "opt_explain_format.h"  // Explain_format_flags
 
 #include <algorithm>
@@ -948,6 +949,33 @@ JOIN::optimize()
     }
   }
 
+  /**
+   * Push joins to handler(s) whenever possible.
+   * The handlers will inspect the QEP through the
+   * AQP (Abstract Query Plan), and extract from it
+   * whatewer it might implement of pushed execution.
+   * It is the responsibility if the handler to store any
+   * information it need for later execution of pushed queries.
+   *
+   * Currently pushed joins are only implemented by NDB.
+   * It only make sense to try pushing if > 1 tables.
+   */
+  if ((tables-const_tables) > 1)
+  {
+    const AQP::Join_plan plan(this);
+    if (ha_make_pushed_joins(thd, &plan))
+      DBUG_RETURN(1);
+  }
+
+  /**
+   * Set up access functions for the tables as
+   * required by the selected access type.
+   */
+  for (uint i= const_tables; i < tables; i++)
+  {
+    pick_table_access_method (&join_tab[i]);
+  }
+
   tmp_having= having;
   if (!(select_options & SELECT_DESCRIBE))
   {
@@ -3203,12 +3231,14 @@ const_table_extraction_done:
              3. are part of semi-join, or
              4. have an expensive outer join condition.
                 DontEvaluateMaterializedSubqueryTooEarly
+             5. are blocked by handler for const table optimize.
           */
 	  if (eq_part.is_prefix(table->key_info[key].key_parts) &&
               !table->fulltext_searched &&                           // 1
               !tl->in_outer_join_nest() &&                           // 2
               !(tl->embedding && tl->embedding->sj_on_expr) &&       // 3
-              !(*s->on_expr_ref && (*s->on_expr_ref)->is_expensive())) // 4
+              !(*s->on_expr_ref && (*s->on_expr_ref)->is_expensive()) &&// 4
+              !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))  // 5
 	  {
             if (table->key_info[key].flags & HA_NOSAME)
             {

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2012-05-16 08:13:19 +0000
+++ b/sql/sql_select.cc	2012-05-22 09:03:56 +0000
@@ -1511,7 +1511,8 @@ bool create_ref_for_key(JOIN *join, JOIN
     j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
     j->ref.null_ref_key= null_ref_key;
   }
-  else if (keyuse_uses_no_tables)
+  else if (keyuse_uses_no_tables &&
+           !(table->file->ha_table_flags() & HA_BLOCK_CONST_TABLE))
   {
     /*
       This happen if we are using a constant expression in the ON part
@@ -2638,7 +2639,9 @@ make_join_readinfo(JOIN *join, ulonglong
     tab->sorted= (tab->type != JT_EQ_REF) ? sorted : false;
     sorted= false;                              // only first must be sorted
     table->status= STATUS_GARBAGE | STATUS_NOT_FOUND;
-    pick_table_access_method (tab);
+    tab->read_first_record= NULL; // Access methods not set yet
+    tab->read_record.read_record= NULL;
+    tab->read_record.unlock_row= rr_unlock_row;
 
     Opt_trace_object trace_refine_table(trace);
     trace_refine_table.add_utf8_table(table);
@@ -3983,7 +3986,6 @@ check_reverse_order:                  
         goto use_filesort;
 
       DBUG_ASSERT(tab->type != JT_REF_OR_NULL && tab->type != JT_FT);
-      pick_table_access_method(tab);
     }
     else if (best_key >= 0)
     {

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk branch (ole.john.aske:3900 to 3901) WL#5940Ole John Aske22 May