List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:August 21 2012 7:13am
Subject:bzr push into mysql-5.6 branch (roy.lyseng:4108 to 4109) Bug#14272788
View as plain text  
 4109 Roy Lyseng	2012-08-20
      Bug#14272788: Query with MaterializeScan and materialized subquery
                    returns too many rows
      
      Patch #9 - Move fields fropm Semijoin_mat_exec to Semijoin_mat_optimize.
      
      Some fields in the Semijoin_mat_exec class are actually optimizer related
      and should be moved into struct Semijoin_mat_optimize.
      The field mat_fields has been moved and the field emb_sj_nest has been
      deleted.
      
      sql/item_cmpfunc.cc
        Item::equality_substitution_transformer() accepts an argument that is
        a semi-join nest pointer instead of a Semijoin_mat_exec pointer.
      
      sql/sql_class.h
        Deleted fields mat_fields and emb_sj_nest from class Semijoin_mat_exec.
      
      sql/sql_optimizer.cc
        JOIN::update_equalities_for_sjm() must loop over select_lex->sj_nests
        instead of JOIN::sjm_exec_list.
        Local variable sjm is renamed to sjm_exec.
      
      sql/sql_select.cc
        In JOIN::get_best_combination(), assign sjm_exec pointer to embedding
        semi-join nest.
        Local variable sjm is renamed to sjm_exec.
        In JOIN::setup_materialized_table(), populate Semijoin_mat_optimize::
        mat_fields instead of Semijoin_mat_exec::mat_fields.
      
      sql/table.h
        Added field mat_fields to struct Semijoin_mat_optimize.

    modified:
      sql/item_cmpfunc.cc
      sql/sql_class.h
      sql/sql_optimizer.cc
      sql/sql_select.cc
      sql/table.h
 4108 Roy Lyseng	2012-08-20
      Bug#14272788: Query with MaterializeScan and materialized subquery
                    returns too many rows
      
      Patch #8 - Fix problem with how subqueries are transformed to semi-join.
      
      There was a problem in how JOIN::flatten_subqueries() determined whether
      to transform a subquery to semi-join or not. When checking number of
      tables in query block after a transformation, the tables already added
      were not considered.
      
      This is now changed so that the first loop decides whether a subquery
      qualifies for transformation or not, and the second and third loops
      reuse the decision from the first loop. In addition, if a join nest
      with fewer tables follows one with more tables, which was not transformed,
      the smaller nest is now considered. In addition, the condition
      "< MAX_TABLES" was checked, but it should be "<= MAX_TABLES".
      
      sql/item_subselect.h
        Added the sj_chosen member variable that tells whether a subquery is
        chosen for semi-join transformation.
      
      sql/sql_optimizer.cc
        Changes to JOIN::flatten_subqueries(), see above.

    modified:
      sql/item_subselect.h
      sql/sql_optimizer.cc
=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2012-08-13 20:13:27 +0000
+++ b/sql/item_cmpfunc.cc	2012-08-20 10:33:40 +0000
@@ -6452,7 +6452,7 @@ Item_field* Item_equal::get_subst_item(c
 
 Item* Item_equal::equality_substitution_transformer(uchar *arg)
 {
-  Semijoin_mat_exec *sjm= reinterpret_cast<Semijoin_mat_exec *>(arg);
+  TABLE_LIST *sj_nest= reinterpret_cast<TABLE_LIST *>(arg);
   List_iterator<Item_field> it(fields);
   List<Item_field> added_fields;
   Item_field *item;
@@ -6465,13 +6465,13 @@ Item* Item_equal::equality_substitution_
       continue;
 
     // Iterate over the fields selected from the subquery
-    List_iterator<Item> mit(*sjm->subq_exprs);
+    List_iterator<Item> mit(sj_nest->nested_join->sj_inner_exprs);
     Item *existing;
     uint fieldno= 0;
     while ((existing= mit++))
     {
       if (existing->real_item()->eq(item, false))
-        added_fields.push_back(sjm->mat_fields[fieldno]);
+        added_fields.push_back(sj_nest->nested_join->sjm.mat_fields[fieldno]);
       fieldno++;
     }
   }
@@ -6493,17 +6493,18 @@ Item* Item_equal::equality_substitution_
 */
 Item* Item_func_eq::equality_substitution_transformer(uchar *arg)
 {
-  Semijoin_mat_exec *sjm= reinterpret_cast<Semijoin_mat_exec *>(arg);
+  TABLE_LIST *sj_nest= reinterpret_cast<TABLE_LIST *>(arg);
 
   // Iterate over the fields selected from the subquery
-  List_iterator<Item> mit(*sjm->subq_exprs);
+  List_iterator<Item> mit(sj_nest->nested_join->sj_inner_exprs);
   Item *existing;
   uint fieldno= 0;
   while ((existing= mit++))
   {
     if (existing->real_item()->eq(args[1], false) &&
-        (args[0]->used_tables() & ~sjm->emb_sj_nest->sj_inner_tables))
-      current_thd->change_item_tree(args+1, sjm->mat_fields[fieldno]);
+        (args[0]->used_tables() & ~sj_nest->sj_inner_tables))
+      current_thd->change_item_tree(args+1,
+                                 sj_nest->nested_join->sjm.mat_fields[fieldno]);
     fieldno++;
   }
   return this;

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2012-08-13 20:13:27 +0000
+++ b/sql/sql_class.h	2012-08-20 10:33:40 +0000
@@ -4467,26 +4467,21 @@ struct st_table_ref;
 class Semijoin_mat_exec : public Sql_alloc
 {
 public:
-  Semijoin_mat_exec(bool is_scan, TABLE_LIST *const emb_sj_nest,
-                    uint table_count, uint mat_table_index,
-                    uint inner_table_index)
-    :is_scan(is_scan), emb_sj_nest(emb_sj_nest),
-    table_count(table_count),
-    mat_table_index(mat_table_index), inner_table_index(inner_table_index),
-    subq_exprs(&emb_sj_nest->nested_join->sj_inner_exprs),
-    table_param(), table(NULL), mat_fields(NULL)
+  Semijoin_mat_exec(bool is_scan, uint table_count, uint mat_table_index,
+                    uint inner_table_index, List<Item> *const subq_exprs)
+    :is_scan(is_scan), table_count(table_count),
+     mat_table_index(mat_table_index), inner_table_index(inner_table_index),
+    subq_exprs(subq_exprs), table_param(), table(NULL)
   {}
   ~Semijoin_mat_exec()
   {}
   const bool is_scan;           ///< TRUE if executing a scan, FALSE if lookup
-  TABLE_LIST *const emb_sj_nest;///< Semi-join nest for operation
   const uint table_count;       ///< Number of tables in the sj-nest
   const uint mat_table_index;   ///< Index in join_tab for materialized table
   const uint inner_table_index; ///< Index in join_tab for first inner table
   List<Item> *const subq_exprs; ///< List of expressions describing temp. table
   TMP_TABLE_PARAM table_param;  ///< The temptable and its related info
   TABLE *table;                 ///< Reference to temporary table
-  Item_field **mat_fields;      ///< Fields in materialized table
 };
 
 

=== modified file 'sql/sql_optimizer.cc'
--- a/sql/sql_optimizer.cc	2012-08-20 07:13:10 +0000
+++ b/sql/sql_optimizer.cc	2012-08-20 10:33:40 +0000
@@ -2903,28 +2903,31 @@ static void update_depend_map(JOIN *join
 
 bool JOIN::update_equalities_for_sjm()
 {
-  if (sjm_exec_list.is_empty())
+  if (select_lex->sj_nests.is_empty())
     return false;
 
-  List_iterator<Semijoin_mat_exec> it(sjm_exec_list);
-  Semijoin_mat_exec *sjm;
-  while ((sjm= it++))
+  List_iterator<TABLE_LIST> sj_list_it(select_lex->sj_nests);
+  TABLE_LIST *sj_nest;
+  while ((sj_nest= sj_list_it++))
   {
-    TABLE_LIST *const emb_sj_nest= sjm->emb_sj_nest;
-    DBUG_ASSERT(!emb_sj_nest->outer_join_nest());
+    if (sj_nest->sj_mat_exec == NULL)
+      continue;
+
+    DBUG_ASSERT(!sj_nest->outer_join_nest());
     /*
       The table cannot actually be an outer join inner table yet, this is
-      just a preparatory step (ie emb_sj_nest->outer_join_nest() is NULL)
+      just a preparatory step (ie sj_nest->outer_join_nest() is NULL)
     */
-    Item *cond= emb_sj_nest->outer_join_nest() ?
-                  emb_sj_nest->outer_join_nest()->join_cond() :
+    Item *cond= sj_nest->outer_join_nest() ?
+                  sj_nest->outer_join_nest()->join_cond() :
                   conds;
     if (!cond)
       continue;
 
     uchar *dummy= NULL;
     cond= cond->compile(&Item::equality_substitution_analyzer, &dummy,
-                        &Item::equality_substitution_transformer, (uchar *)sjm);
+                        &Item::equality_substitution_transformer,
+                        (uchar *)sj_nest);
     if (cond == NULL)
       return true;
     cond->update_used_tables();
@@ -2933,10 +2936,13 @@ bool JOIN::update_equalities_for_sjm()
   for (uint i= const_tables; i < primary_tables; i++)
   {
     JOIN_TAB *const mat_tab= join_tab + i;
-    Semijoin_mat_exec *const sjm= mat_tab->sj_mat_exec;
-    if (sjm == NULL)
+    Semijoin_mat_exec *const sjm_exec= mat_tab->sj_mat_exec;
+    if (sjm_exec == NULL)
       continue;
 
+    TABLE_LIST *const sj_nest=
+      (join_tab + sjm_exec->inner_table_index)->emb_sj_nest;
+
     // Loop over all primary tables that follow the materialized table
     for (uint j= i + 1; j < primary_tables; j++)
     {
@@ -2946,7 +2952,7 @@ bool JOIN::update_equalities_for_sjm()
            keyuse->key == tab->position->key->key;
            keyuse++)
       {
-        List_iterator<Item> it(*sjm->subq_exprs);
+        List_iterator<Item> it(*sjm_exec->subq_exprs);
         Item *old;
         uint fieldno= 0;
         while ((old= it++))
@@ -2957,7 +2963,7 @@ bool JOIN::update_equalities_for_sjm()
               Replace the expression selected from the subquery with the
               corresponding column of the materialized temporary table.
             */
-            keyuse->val= sjm->mat_fields[fieldno];
+            keyuse->val= sj_nest->nested_join->sjm.mat_fields[fieldno];
             keyuse->used_tables= keyuse->val->used_tables();
             break;
           }

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2012-08-15 08:13:09 +0000
+++ b/sql/sql_select.cc	2012-08-20 10:33:40 +0000
@@ -1369,25 +1369,28 @@ bool JOIN::get_best_combination()
       DBUG_ASSERT(outer_target < inner_target);
 
       POSITION *const pos_table= best_positions + tableno;
+      TABLE_LIST *const sj_nest= pos_table->table->emb_sj_nest;
+
       // Handle this many inner tables of materialized semi-join
       remaining_sjm_inner= pos_table->n_sj_tables;
 
-      Semijoin_mat_exec *const sjm=
+      Semijoin_mat_exec *const sjm_exec=
         new (thd->mem_root)
         Semijoin_mat_exec((pos_table->sj_strategy == SJ_OPT_MATERIALIZE_SCAN),
-                          pos_table->table->emb_sj_nest, remaining_sjm_inner,
-                          outer_target, inner_target);
-      if (!sjm)
+                          remaining_sjm_inner, outer_target, inner_target,
+                          &sj_nest->nested_join->sj_inner_exprs);
+      if (!sjm_exec)
         DBUG_RETURN(true);
 
-      (join_tab + outer_target)->sj_mat_exec= sjm;
+      (join_tab + outer_target)->sj_mat_exec= sjm_exec;
+      sj_nest->sj_mat_exec= sjm_exec;
 
       if (setup_materialized_table(join_tab + outer_target, sjm_index,
                                    pos_table, best_positions + sjm_index))
         DBUG_RETURN(true);
 
-      all_tables[outer_target]= sjm->table;
-      map2table[sjm->table->tablenr]= join_tab + outer_target;
+      all_tables[outer_target]= sjm_exec->table;
+      map2table[sjm_exec->table->tablenr]= join_tab + outer_target;
 
       outer_target++;
       sjm_index++;
@@ -2565,8 +2568,9 @@ bool JOIN::setup_materialized_table(JOIN
 {
   DBUG_ENTER("JOIN::setup_materialized_table");
   TABLE_LIST *const emb_sj_nest= inner_pos->table->emb_sj_nest;
-  Semijoin_mat_exec *const sjm= tab->sj_mat_exec;
-  const uint field_count= sjm->subq_exprs->elements;
+  Semijoin_mat_optimize *const sjm_opt= &emb_sj_nest->nested_join->sjm;
+  Semijoin_mat_exec *const sjm_exec= tab->sj_mat_exec;
+  const uint field_count= sjm_exec->subq_exprs->elements;
 
   DBUG_ASSERT(inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_LOOKUP ||
               inner_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN);
@@ -2574,9 +2578,9 @@ bool JOIN::setup_materialized_table(JOIN
   /* 
     Set up the table to write to, do as select_union::create_result_table does
   */
-  sjm->table_param.init();
-  sjm->table_param.field_count= field_count;
-  sjm->table_param.bit_fields_as_long= true;
+  sjm_exec->table_param.init();
+  sjm_exec->table_param.field_count= field_count;
+  sjm_exec->table_param.bit_fields_as_long= true;
 
   char buffer[NAME_LEN];
   const size_t len= my_snprintf(buffer, sizeof(buffer) - 1, "<subquery%u>",
@@ -2588,8 +2592,8 @@ bool JOIN::setup_materialized_table(JOIN
   memcpy(name, buffer, len);
   name[len] = '\0';
   TABLE *table;
-  if (!(table= create_tmp_table(thd, &sjm->table_param, 
-                                *sjm->subq_exprs, NULL, 
+  if (!(table= create_tmp_table(thd, &sjm_exec->table_param, 
+                                *sjm_exec->subq_exprs, NULL, 
                                 true /* distinct */, 
                                 true /*save_sum_fields*/,
                                 thd->variables.option_bits |
@@ -2597,23 +2601,23 @@ bool JOIN::setup_materialized_table(JOIN
                                 HA_POS_ERROR /*rows_limit */, 
                                 name)))
     DBUG_RETURN(true); /* purecov: inspected */
-  sjm->table= table;
+  sjm_exec->table= table;
   table->tablenr= tableno;
   table->map= (table_map)1 << tableno;
   table->file->extra(HA_EXTRA_WRITE_CACHE);
   table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
   table->reginfo.join_tab= tab;
   sj_tmp_tables.push_back(table);
-  sjm_exec_list.push_back(sjm);
+  sjm_exec_list.push_back(sjm_exec);
 
-  if (!(sjm->mat_fields=
+  if (!(sjm_opt->mat_fields=
     (Item_field **) alloc_root(thd->mem_root,
                                field_count * sizeof(Item_field **))))
     DBUG_RETURN(true);
 
   for (uint fieldno= 0; fieldno < field_count; fieldno++)
   {
-    if (!(sjm->mat_fields[fieldno]= new Item_field(table->field[fieldno])))
+    if (!(sjm_opt->mat_fields[fieldno]= new Item_field(table->field[fieldno])))
       DBUG_RETURN(true);
   }
 
@@ -2649,14 +2653,14 @@ bool JOIN::setup_materialized_table(JOIN
     a proper ref access for this table.
   */
   Key_use_array *keyuse=
-   create_keyuse_for_table(thd, table, field_count, sjm->mat_fields,
+   create_keyuse_for_table(thd, table, field_count, sjm_opt->mat_fields,
                            emb_sj_nest->nested_join->sj_outer_exprs);
   if (!keyuse)
     DBUG_RETURN(true);
 
   double fanout= (tab == join_tab + tab->join->const_tables) ?
                  1.0 : (tab-1)->position->prefix_record_count;
-  if (!sjm->is_scan)
+  if (!sjm_exec->is_scan)
   {
     sjm_pos->key= keyuse->begin(); // MaterializeLookup will use the index
     tab->keys.set_bit(0);          // There is one index - use it always

=== modified file 'sql/table.h'
--- a/sql/table.h	2012-08-15 08:13:09 +0000
+++ b/sql/table.h	2012-08-20 10:33:40 +0000
@@ -2116,20 +2116,22 @@ public:
 
 struct Semijoin_mat_optimize
 {
-  /* Optimal join order calculated for inner tables of this semijoin op. */
+  /// Optimal join order calculated for inner tables of this semijoin op.
   struct st_position *positions;
-  /** True if data types allow the MaterializeLookup semijoin strategy */
+  /// True if data types allow the MaterializeLookup semijoin strategy
   bool lookup_allowed;
-  /** True if data types allow the MaterializeScan semijoin strategy */
+  /// True if data types allow the MaterializeScan semijoin strategy
   bool scan_allowed;
-  /* Expected #rows in the materialized table */
+  /// Expected #rows in the materialized table
   double expected_rowcount;
-  /* Materialization cost - execute sub-join and write rows to temp.table */
+  /// Materialization cost - execute sub-join and write rows to temp.table
   Cost_estimate materialization_cost;
-  /* Cost to make one lookup in the temptable */
+  /// Cost to make one lookup in the temptable
   Cost_estimate lookup_cost;
-  /* Cost of scanning the materialized table */
+  /// Cost of scanning the materialized table
   Cost_estimate scan_cost;
+  /// Array of pointers to fields in the materialized table.
+  Item_field **mat_fields;
 };
 
 /**

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.6 branch (roy.lyseng:4108 to 4109) Bug#14272788Roy Lyseng21 Aug