List:Commits« Previous MessageNext Message »
From:Tor Didriksen Date:November 17 2009 10:12am
Subject:bzr push into mysql-6.0-codebase-bugfixing branch (tor.didriksen:3711 to
3712) Bug#46550
View as plain text  
 3712 Tor Didriksen	2009-11-17
      Bug#46550 Azalea returning duplicate results for some IN subqueries w/ semijoin=on
      
      The optimizer was choosing an illegal plan for FirstMatch.
      Add an explicit check that all inner tables are part of 'remaining_tables'
      when we initiate the FirstMatch algorithm.
     @ mysql-test/r/subselect_sj.result
        Add test case.
     @ mysql-test/r/subselect_sj_jcl6.result
        Add test case.
     @ mysql-test/t/subselect_sj.test
        Add test case.
     @ sql/sql_select.cc
        Add an explicit check that all inner tables are part of 'remaining_tables'
        when we initiate the FirstMatch algorithm.
        
        Some cosmetics for readability and coding style compliance.

    modified:
      mysql-test/r/subselect_sj.result
      mysql-test/r/subselect_sj_jcl6.result
      mysql-test/t/subselect_sj.test
      sql/sql_select.cc
 3711 Alexey Botchkov	2009-11-15 [merge]
      merging

    modified:
      mysql-test/r/mysqlbinlog_row.result
      mysql-test/r/mysqlbinlog_row_innodb.result
      mysql-test/r/mysqlbinlog_row_myisam.result
      mysql-test/r/mysqlbinlog_row_trans.result
=== modified file 'mysql-test/r/subselect_sj.result'
--- a/mysql-test/r/subselect_sj.result	2009-09-01 14:44:35 +0000
+++ b/mysql-test/r/subselect_sj.result	2009-11-17 10:12:07 +0000
@@ -386,3 +386,80 @@ E4	Don
 set optimizer_switch='default';
 drop table STAFF,WORKS,PROJ;
 # End of bug#45191
+#
+# Bug#46550 Azalea returning duplicate results for some IN subqueries
+# w/ semijoin=on
+#
+DROP TABLE IF EXISTS t0, t1, t2;
+CREATE TABLE t0 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t0 VALUES
+(1,'m','m'),
+(40,'h','h'),
+(1,'r','r'),
+(1,'h','h'),
+(9,'x','x'),
+(NULL,'q','q'),
+(NULL,'k','k'),
+(7,'l','l'),
+(182,'k','k'),
+(202,'a','a'),
+(7,'x','x'),
+(6,'j','j'),
+(119,'z','z'),
+(4,'d','d'),
+(5,'h','h'),
+(1,'u','u'),
+(3,'q','q'),
+(7,'a','a'),
+(3,'e','e'),
+(6,'l','l');
+CREATE TABLE t1 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t1 VALUES (7,NULL,NULL),(4,'x','x');
+CREATE TABLE t2 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t2 VALUES (123,NULL,NULL);
+SELECT int_key  
+FROM t0  
+WHERE varchar_nokey  IN (  
+SELECT t1 .varchar_key  from t1
+);
+int_key
+9
+7
+SELECT t0.int_key  
+FROM t0
+WHERE t0.varchar_nokey  IN (  
+SELECT t1_1 .varchar_key  
+FROM t1 AS t1_1  JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+int_key
+9
+7
+SELECT t0.int_key  
+FROM t0, t2
+WHERE t0.varchar_nokey  IN (  
+SELECT t1_1 .varchar_key  
+FROM t1 AS t1_1  JOIN t1 AS t1_2 ON t1_1 .int_key  
+);
+int_key
+9
+7
+DROP TABLE t0, t1, t2;
+# End of bug#46550

=== modified file 'mysql-test/r/subselect_sj_jcl6.result'
--- a/mysql-test/r/subselect_sj_jcl6.result	2009-09-01 14:44:35 +0000
+++ b/mysql-test/r/subselect_sj_jcl6.result	2009-11-17 10:12:07 +0000
@@ -390,6 +390,83 @@ E4	Don
 set optimizer_switch='default';
 drop table STAFF,WORKS,PROJ;
 # End of bug#45191
+#
+# Bug#46550 Azalea returning duplicate results for some IN subqueries
+# w/ semijoin=on
+#
+DROP TABLE IF EXISTS t0, t1, t2;
+CREATE TABLE t0 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t0 VALUES
+(1,'m','m'),
+(40,'h','h'),
+(1,'r','r'),
+(1,'h','h'),
+(9,'x','x'),
+(NULL,'q','q'),
+(NULL,'k','k'),
+(7,'l','l'),
+(182,'k','k'),
+(202,'a','a'),
+(7,'x','x'),
+(6,'j','j'),
+(119,'z','z'),
+(4,'d','d'),
+(5,'h','h'),
+(1,'u','u'),
+(3,'q','q'),
+(7,'a','a'),
+(3,'e','e'),
+(6,'l','l');
+CREATE TABLE t1 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t1 VALUES (7,NULL,NULL),(4,'x','x');
+CREATE TABLE t2 (
+int_key int(11) DEFAULT NULL,
+varchar_key varchar(1) DEFAULT NULL,
+varchar_nokey varchar(1) DEFAULT NULL,
+KEY int_key (int_key),
+KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t2 VALUES (123,NULL,NULL);
+SELECT int_key  
+FROM t0  
+WHERE varchar_nokey  IN (  
+SELECT t1 .varchar_key  from t1
+);
+int_key
+9
+7
+SELECT t0.int_key  
+FROM t0
+WHERE t0.varchar_nokey  IN (  
+SELECT t1_1 .varchar_key  
+FROM t1 AS t1_1  JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+int_key
+9
+7
+SELECT t0.int_key  
+FROM t0, t2
+WHERE t0.varchar_nokey  IN (  
+SELECT t1_1 .varchar_key  
+FROM t1 AS t1_1  JOIN t1 AS t1_2 ON t1_1 .int_key  
+);
+int_key
+9
+7
+DROP TABLE t0, t1, t2;
+# End of bug#46550
 set join_cache_level=default;
 show variables like 'join_cache_level';
 Variable_name	Value

=== modified file 'mysql-test/t/subselect_sj.test'
--- a/mysql-test/t/subselect_sj.test	2009-09-01 14:44:35 +0000
+++ b/mysql-test/t/subselect_sj.test	2009-11-17 10:12:07 +0000
@@ -277,3 +277,83 @@ drop table STAFF,WORKS,PROJ;
 
 --echo # End of bug#45191
 
+--echo #
+--echo # Bug#46550 Azalea returning duplicate results for some IN subqueries
+--echo # w/ semijoin=on
+--echo #
+
+--disable_warnings
+DROP TABLE IF EXISTS t0, t1, t2;
+--enable_warnings
+
+CREATE TABLE t0 (
+  int_key int(11) DEFAULT NULL,
+  varchar_key varchar(1) DEFAULT NULL,
+  varchar_nokey varchar(1) DEFAULT NULL,
+  KEY int_key (int_key),
+  KEY varchar_key (varchar_key,int_key)
+);
+
+INSERT INTO t0 VALUES
+(1,'m','m'),
+(40,'h','h'),
+(1,'r','r'),
+(1,'h','h'),
+(9,'x','x'),
+(NULL,'q','q'),
+(NULL,'k','k'),
+(7,'l','l'),
+(182,'k','k'),
+(202,'a','a'),
+(7,'x','x'),
+(6,'j','j'),
+(119,'z','z'),
+(4,'d','d'),
+(5,'h','h'),
+(1,'u','u'),
+(3,'q','q'),
+(7,'a','a'),
+(3,'e','e'),
+(6,'l','l');
+
+CREATE TABLE t1 (
+  int_key int(11) DEFAULT NULL,
+  varchar_key varchar(1) DEFAULT NULL,
+  varchar_nokey varchar(1) DEFAULT NULL,
+  KEY int_key (int_key),
+  KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t1 VALUES (7,NULL,NULL),(4,'x','x');
+
+CREATE TABLE t2 (
+  int_key int(11) DEFAULT NULL,
+  varchar_key varchar(1) DEFAULT NULL,
+  varchar_nokey varchar(1) DEFAULT NULL,
+  KEY int_key (int_key),
+  KEY varchar_key (varchar_key,int_key)
+);
+INSERT INTO t2 VALUES (123,NULL,NULL);
+
+SELECT int_key  
+FROM t0  
+WHERE varchar_nokey  IN (  
+  SELECT t1 .varchar_key  from t1
+);
+
+SELECT t0.int_key  
+FROM t0
+WHERE t0.varchar_nokey  IN (  
+  SELECT t1_1 .varchar_key  
+  FROM t1 AS t1_1  JOIN t1 AS t1_2 ON t1_1 .int_key
+);
+
+SELECT t0.int_key  
+FROM t0, t2
+WHERE t0.varchar_nokey  IN (  
+  SELECT t1_1 .varchar_key  
+  FROM t1 AS t1_1  JOIN t1 AS t1_2 ON t1_1 .int_key  
+);
+
+DROP TABLE t0, t1, t2;
+
+--echo # End of bug#46550

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2009-11-12 12:22:31 +0000
+++ b/sql/sql_select.cc	2009-11-17 10:12:07 +0000
@@ -122,7 +122,7 @@ static uint build_bitmap_for_nested_join
 
 static 
 void advance_sj_state(JOIN *join, const table_map remaining_tables, 
-                      const JOIN_TAB *s, uint idx, 
+                      const JOIN_TAB *new_join_tab, uint idx, 
                       double *current_record_count, double *current_read_time,
                       POSITION *loose_scan_pos);
 
@@ -3400,7 +3400,8 @@ bool convert_subq_to_sj(JOIN *parent_joi
   /* 3. Remove the original subquery predicate from the WHERE/ON */
 
   // The subqueries were replaced for Item_int(1) earlier
-  subq_pred->exec_method= Item_in_subselect::SEMI_JOIN; // for subsequent executions
+  subq_pred->exec_method=
+    Item_in_subselect::SEMI_JOIN;         // for subsequent executions
   /*TODO: also reset the 'with_subselect' there. */
 
   /* n. Adjust the parent_join->tables counter */
@@ -3412,7 +3413,9 @@ bool convert_subq_to_sj(JOIN *parent_joi
     tl->table->map= ((table_map)1) << table_no;
     SELECT_LEX *old_sl= tl->select_lex;
     tl->select_lex= parent_join->select_lex; 
-    for(TABLE_LIST *emb= tl->embedding; emb && emb->select_lex == old_sl; emb= emb->embedding)
+    for (TABLE_LIST *emb= tl->embedding;
+         emb && emb->select_lex == old_sl;
+         emb= emb->embedding)
       emb->select_lex= parent_join->select_lex;
   }
   parent_join->tables += subq_lex->join->tables;
@@ -13025,14 +13028,14 @@ void optimize_wo_join_buffering(JOIN *jo
     advance_sj_state()
       join                        The join we're optimizing
       remaining_tables            Tables not in the join prefix
-      s                           Join tab we've just added to the join prefix
+      new_join_tab                Join tab we've just added to the join prefix
       idx                         Index of this join tab (i.e. number of tables
                                   in the prefix minus one)
       current_record_count INOUT  Estimate of #records in join prefix's output
       current_read_time    INOUT  Cost to execute the join prefix
       loose_scan_pos       IN     A POSITION with LooseScan plan to access 
-                                  table s (produced by the last best_access_path 
-                                  call)
+                                  table new_join_tab
+                                  (produced by the last best_access_path call)
 
   DESCRIPTION
     Update semi-join optimization state after we've added another tab (table 
@@ -13068,13 +13071,13 @@ void optimize_wo_join_buffering(JOIN *jo
 
 static 
 void advance_sj_state(JOIN *join, table_map remaining_tables, 
-                      const JOIN_TAB *s, uint idx, 
+                      const JOIN_TAB *new_join_tab, uint idx, 
                       double *current_record_count, double *current_read_time, 
                       POSITION *loose_scan_pos)
 {
   TABLE_LIST *emb_sj_nest;
   POSITION *pos= join->positions + idx;
-  remaining_tables &= ~s->table->map;
+  remaining_tables &= ~new_join_tab->table->map;
 
   pos->prefix_cost.convert_from_cost(*current_read_time);
   pos->prefix_record_count= *current_record_count;
@@ -13092,17 +13095,22 @@ void advance_sj_state(JOIN *join, table_
   else
   {
     // FirstMatch
-    pos->first_firstmatch_table= (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH)?
-                                  MAX_TABLES : pos[-1].first_firstmatch_table;
+    pos->first_firstmatch_table=
+      (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ?
+      MAX_TABLES : pos[-1].first_firstmatch_table;
     pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl;
     pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables;
+
     // LooseScan
-    pos->first_loosescan_table= (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN)? 
-                                  MAX_TABLES: pos[-1].first_loosescan_table;
+    pos->first_loosescan_table=
+      (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ?
+      MAX_TABLES : pos[-1].first_loosescan_table;
     pos->loosescan_need_tables= pos[-1].loosescan_need_tables;
+
     // SJ-Materialization Scan
-    pos->sjm_scan_need_tables= (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? 
-                               0: pos[-1].sjm_scan_need_tables;
+    pos->sjm_scan_need_tables=
+      (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ?
+      0 : pos[-1].sjm_scan_need_tables;
     pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner;
 
     // Duplicate Weedout
@@ -13112,33 +13120,39 @@ void advance_sj_state(JOIN *join, table_
   
   table_map handled_by_fm_or_ls= 0;
   /* FirstMatch Strategy */
+  if (new_join_tab->emb_sj_nest &&
+      optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH))
   {
+    const table_map outer_corr_tables=
+      new_join_tab->emb_sj_nest->nested_join->sj_corr_tables |
+      new_join_tab->emb_sj_nest->nested_join->sj_depends_on;
+    const table_map sj_inner_tables=
+      new_join_tab->emb_sj_nest->sj_inner_tables;
+
     /* 
       Enter condition:
        1. The next join tab belongs to semi-join nest
+          (verified for the encompassing code block above).
        2. We're not in a duplicate producer range yet
        3. All outer tables that
            - the subquery is correlated with, or
            - referred to from the outer_expr 
           are in the join prefix
+       4. All inner tables are still part of remaining_tables.
     */
-    if (s->emb_sj_nest &&            // (1)
-        !join->cur_sj_inner_tables &&   // (2)
-        !(remaining_tables &                             // (3)
-          (s->emb_sj_nest->nested_join->sj_corr_tables | // (3)
-           s->emb_sj_nest->nested_join->sj_depends_on)) && // (3)
-        optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH))
+    if (!join->cur_sj_inner_tables &&              // (2)
+        !(remaining_tables & outer_corr_tables) && // (3)
+        (sj_inner_tables ==                        // (4)
+         ((remaining_tables | new_join_tab->table->map) & sj_inner_tables)))
     {
       /* Start tracking potential FirstMatch range */
       pos->first_firstmatch_table= idx;
-      pos->firstmatch_need_tables= s->emb_sj_nest->sj_inner_tables;
+      pos->firstmatch_need_tables= sj_inner_tables;
       pos->first_firstmatch_rtbl= remaining_tables;
     }
 
-    if (pos->first_firstmatch_table != MAX_TABLES && s->emb_sj_nest)
+    if (pos->first_firstmatch_table != MAX_TABLES)
     {
-      table_map outer_corr_tables= s->emb_sj_nest->nested_join->sj_corr_tables |
-                                   s->emb_sj_nest->nested_join->sj_depends_on;
       if (outer_corr_tables & pos->first_firstmatch_rtbl)
       {
         /*
@@ -13150,7 +13164,7 @@ void advance_sj_state(JOIN *join, table_
       else
       {
         /* Record that we need all of this semi-join's inner tables, too */
-        pos->firstmatch_need_tables |= s->emb_sj_nest->sj_inner_tables;
+        pos->firstmatch_need_tables|= sj_inner_tables;
       }
     
       if (!(pos->firstmatch_need_tables & remaining_tables))
@@ -13195,7 +13209,7 @@ void advance_sj_state(JOIN *join, table_
     */
     if ((pos->first_loosescan_table != MAX_TABLES) &&   // (1)
         (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2)
-        s->emb_sj_nest != first->table->emb_sj_nest)                       //(2)
+        new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2)
     {
       pos->first_loosescan_table= MAX_TABLES;
     }
@@ -13207,9 +13221,10 @@ void advance_sj_state(JOIN *join, table_
     if (loose_scan_pos->read_time != DBL_MAX)
     {
       pos->first_loosescan_table= idx;
-      pos->loosescan_need_tables= s->emb_sj_nest->sj_inner_tables | 
-                                  s->emb_sj_nest->nested_join->sj_depends_on |
-                                  s->emb_sj_nest->nested_join->sj_corr_tables;
+      pos->loosescan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
     }
     
     if ((pos->first_loosescan_table != MAX_TABLES) && 
@@ -13253,13 +13268,14 @@ void advance_sj_state(JOIN *join, table_
     Update join->cur_sj_inner_tables (Used by FirstMatch in this function and
     LooseScan detector in best_access_path)
   */
-  if ((emb_sj_nest= s->emb_sj_nest))
+  if ((emb_sj_nest= new_join_tab->emb_sj_nest))
   {
     join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
     join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables;
 
     /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
-    if (!(remaining_tables & emb_sj_nest->sj_inner_tables & ~s->table->map))
+    if (!(remaining_tables &
+          emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
       join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
   }
   join->cur_dups_producing_tables &= ~handled_by_fm_or_ls;
@@ -13267,7 +13283,8 @@ void advance_sj_state(JOIN *join, table_
   /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */
   bool sjm_scan;
   SJ_MATERIALIZATION_INFO *mat_info;
-  if ((mat_info= at_sjmat_pos(join, remaining_tables, s, idx, &sjm_scan)))
+  if ((mat_info= at_sjmat_pos(join, remaining_tables,
+                              new_join_tab, idx, &sjm_scan)))
   {
     if (sjm_scan)
     {
@@ -13290,9 +13307,10 @@ void advance_sj_state(JOIN *join, table_
         The simple way to model this is to remove SJM-SCAN(...) fanout once
         we reach the point #2.
       */
-      pos->sjm_scan_need_tables= s->emb_sj_nest->sj_inner_tables | 
-                                 s->emb_sj_nest->nested_join->sj_depends_on |
-                                 s->emb_sj_nest->nested_join->sj_corr_tables;
+      pos->sjm_scan_need_tables=
+        new_join_tab->emb_sj_nest->sj_inner_tables | 
+        new_join_tab->emb_sj_nest->nested_join->sj_depends_on |
+        new_join_tab->emb_sj_nest->nested_join->sj_corr_tables;
       pos->sjm_scan_last_inner= idx;
     }
     else
@@ -13327,7 +13345,8 @@ void advance_sj_state(JOIN *join, table_
         pos->sj_strategy= SJ_OPT_MATERIALIZE;
         *current_read_time=    mat_read_time;
         *current_record_count= prefix_rec_count;
-        join->cur_dups_producing_tables &= ~s->emb_sj_nest->sj_inner_tables;
+        join->cur_dups_producing_tables&=
+          ~new_join_tab->emb_sj_nest->sj_inner_tables;
       }
     }
   }
@@ -13400,7 +13419,7 @@ void advance_sj_state(JOIN *join, table_
        correlated 
     */
     TABLE_LIST *nest;
-    if ((nest= s->emb_sj_nest))
+    if ((nest= new_join_tab->emb_sj_nest))
     {
       if (!pos->dupsweedout_tables)
         pos->first_dupsweedout_table= idx;
@@ -13411,7 +13430,8 @@ void advance_sj_state(JOIN *join, table_
     }
 
     if (pos->dupsweedout_tables && 
-        !((remaining_tables & ~s->table->map) & pos->dupsweedout_tables))
+        !(remaining_tables &
+          ~new_join_tab->table->map & pos->dupsweedout_tables))
     {
       /*
         Ok, reached a state where we could put a dups weedout point.


Attachment: [text/bzr-bundle] bzr/tor.didriksen@sun.com-20091117101207-y3779rzw50cm2emc.bundle
Thread
bzr push into mysql-6.0-codebase-bugfixing branch (tor.didriksen:3711 to3712) Bug#46550Tor Didriksen17 Nov