List:Commits« Previous MessageNext Message »
From:Martin Hansson Date:September 7 2010 9:21am
Subject:bzr commit into mysql-5.1-bugteam branch (martin.hansson:3502) Bug#51070
View as plain text  
#At file:///data0/martin/bzr/bug51070/5.1bt/ based on revid:dmitry.shulga@stripped

 3502 Martin Hansson	2010-09-07
      Bug#51070: Query with a NOT IN subquery predicate returns a wrong result set
            
      The EXISTS transformation has additional switches to catch the known corner
      cases that appear when transforming an IN predicate into EXISTS. Guarded
      conditions are used which are deactivated when a NULL value is seen in the
      outer expression's row. When the inner query block supplies NULL values,
      however, they are filtered out because no distinction is made between the
      guarded conditions; guarded NOT x IS NULL conditions in the HAVING clause that
      filter out NULL values cannot be de-activated in isolation from those that
      match values or from the outer expression or NULL's.
      
      The above problem is handled by making the guarded conditions remember whether
      they have rejected a NULL value or not, and index access methods are taking
      this into account as well. 
      
      The bug consisted of 
      
      1) Not resetting the property for every nested loop iteration on the inner
         query's result.
      
      2) Not propagating the NULL result properly from inner query to IN optimizer.
      
      3) A hack that may or may not have been needed at some point. According to a
         comment it was aimed to fix #2 by returning NULL when FALSE was actually
         the result. This caused failures when #2 was properly fixed. The hack is
         now removed.
      
      The fix resolves all three points.

    modified:
      mysql-test/r/subselect4.result
      mysql-test/t/subselect4.test
      sql/item_cmpfunc.cc
      sql/item_subselect.cc
=== modified file 'mysql-test/r/subselect4.result'
--- a/mysql-test/r/subselect4.result	2010-08-05 10:42:14 +0000
+++ b/mysql-test/r/subselect4.result	2010-09-07 09:21:09 +0000
@@ -77,6 +77,92 @@ Note	1249	Select 2 was reduced during op
 CREATE VIEW v1 AS SELECT 1 LIKE ( 1 IN ( SELECT 1 ) );
 CREATE VIEW v2 AS SELECT 1 LIKE '%' ESCAPE ( 1 IN ( SELECT 1 ) );
 DROP VIEW v1, v2;
+# 
+# Bug#51070: Query with a NOT IN subquery predicate returns a wrong
+# result set
+# 
+CREATE TABLE t1 ( a INT, b INT );
+INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL );
+CREATE TABLE t2 ( c INT, d INT );
+INSERT INTO t2 VALUES ( NULL, 3 ), ( NULL, 4 );
+CREATE TABLE t3 ( e INT, f INT );
+INSERT INTO t3 VALUES ( NULL, NULL ), ( NULL, NULL );
+CREATE TABLE t4 ( a INT );
+INSERT INTO t4 VALUES (1), (2), (3);
+CREATE TABLE t5 ( a INT );
+INSERT INTO t5 VALUES (NULL), (2);
+EXPLAIN
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+x	PRIMARY	x	x	x	x	x	x	x	x
+x	DEPENDENT SUBQUERY	x	x	x	x	x	x	x	x
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
+a	b
+EXPLAIN
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	2	Using where
+2	DEPENDENT SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	2	Using where
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
+a	b
+1	NULL
+2	NULL
+SELECT * FROM t1 WHERE ( a, b ) IN ( SELECT c, d FROM t2 ) IS NULL;
+a	b
+1	NULL
+2	NULL
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS UNKNOWN;
+a	b
+1	NULL
+2	NULL
+SELECT * FROM t1 WHERE (( a, b ) NOT IN ( SELECT c, d FROM t2 )) IS UNKNOWN;
+a	b
+1	NULL
+2	NULL
+SELECT * FROM t1 WHERE 1 = 1 AND ( a, b ) NOT IN ( SELECT c, d FROM t2 );
+a	b
+EXPLAIN
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+x	PRIMARY	x	x	x	x	x	x	x	x
+x	DEPENDENT SUBQUERY	x	x	x	x	x	x	x	x
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
+a	b
+EXPLAIN
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+x	PRIMARY	x	x	x	x	x	x	x	x
+x	DEPENDENT SUBQUERY	x	x	x	x	x	x	x	x
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
+c	d
+EXPLAIN
+SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+x	PRIMARY	x	x	x	x	x	x	x	x
+x	DEPENDENT SUBQUERY	x	x	x	x	x	x	x	x
+SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
+e	f
+EXPLAIN
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+x	PRIMARY	x	x	x	x	x	x	x	x
+x	DEPENDENT SUBQUERY	x	x	x	x	x	x	x	x
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
+c	d
+SELECT * FROM t1 WHERE ( a, b ) NOT IN 
+( SELECT c, d FROM t2 WHERE c = 1 AND c <> 1 );
+a	b
+1	NULL
+2	NULL
+SELECT * FROM t1 WHERE b NOT IN ( SELECT c FROM t2 WHERE c = 1 );
+a	b
+1	NULL
+2	NULL
+SELECT * FROM t1 WHERE NULL NOT IN ( SELECT c FROM t2 WHERE c = 1 AND c <> 1 );
+a	b
+1	NULL
+2	NULL
+DROP TABLE t1, t2, t3, t4, t5;
 #
 # End of 5.1 tests.
 #

=== modified file 'mysql-test/t/subselect4.test'
--- a/mysql-test/t/subselect4.test	2010-08-05 10:42:14 +0000
+++ b/mysql-test/t/subselect4.test	2010-09-07 09:21:09 +0000
@@ -74,6 +74,68 @@ CREATE VIEW v1 AS SELECT 1 LIKE ( 1 IN (
 CREATE VIEW v2 AS SELECT 1 LIKE '%' ESCAPE ( 1 IN ( SELECT 1 ) );
 DROP VIEW v1, v2;
 
+--echo # 
+--echo # Bug#51070: Query with a NOT IN subquery predicate returns a wrong
+--echo # result set
+--echo # 
+CREATE TABLE t1 ( a INT, b INT );
+INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL );
+
+CREATE TABLE t2 ( c INT, d INT );
+INSERT INTO t2 VALUES ( NULL, 3 ), ( NULL, 4 );
+
+CREATE TABLE t3 ( e INT, f INT );
+INSERT INTO t3 VALUES ( NULL, NULL ), ( NULL, NULL );
+
+CREATE TABLE t4 ( a INT );
+INSERT INTO t4 VALUES (1), (2), (3);
+
+CREATE TABLE t5 ( a INT );
+INSERT INTO t5 VALUES (NULL), (2);
+
+--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
+EXPLAIN
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 );
+
+EXPLAIN
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS NULL;
+SELECT * FROM t1 WHERE ( a, b ) IN ( SELECT c, d FROM t2 ) IS NULL;
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT c, d FROM t2 ) IS UNKNOWN;
+SELECT * FROM t1 WHERE (( a, b ) NOT IN ( SELECT c, d FROM t2 )) IS UNKNOWN;
+
+SELECT * FROM t1 WHERE 1 = 1 AND ( a, b ) NOT IN ( SELECT c, d FROM t2 );
+
+--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
+EXPLAIN
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
+SELECT * FROM t1 WHERE ( a, b ) NOT IN ( SELECT e, f FROM t3 );
+
+--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
+EXPLAIN
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT a, b FROM t1 );
+
+--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
+EXPLAIN
+SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
+SELECT * FROM t3 WHERE ( e, f ) NOT IN ( SELECT c, d FROM t2 );
+
+--replace_column 1 x 3 x 4 x 5 x 6 x 7 x 8 x 9 x 10 x
+EXPLAIN
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
+SELECT * FROM t2 WHERE ( c, d ) NOT IN ( SELECT e, f FROM t3 );
+
+SELECT * FROM t1 WHERE ( a, b ) NOT IN 
+  ( SELECT c, d FROM t2 WHERE c = 1 AND c <> 1 );
+
+SELECT * FROM t1 WHERE b NOT IN ( SELECT c FROM t2 WHERE c = 1 );
+
+SELECT * FROM t1 WHERE NULL NOT IN ( SELECT c FROM t2 WHERE c = 1 AND c <> 1 );
+
+DROP TABLE t1, t2, t3, t4, t5;
+
 
 --echo #
 --echo # End of 5.1 tests.

=== modified file 'sql/item_cmpfunc.cc'
--- a/sql/item_cmpfunc.cc	2010-08-05 10:42:14 +0000
+++ b/sql/item_cmpfunc.cc	2010-09-07 09:21:09 +0000
@@ -1751,6 +1751,76 @@ bool Item_in_optimizer::fix_fields(THD *
 }
 
 
+/**
+   The implementation of optimized \<outer expression\> [NOT] IN \<subquery\>
+   predicates. The implementation works as follows.
+
+   For the current value of the outer expression
+   
+   - If it contains only NULL values, the original (before rewrite by the
+     Item_in_subselect rewrite methods) inner subquery is non-correlated and
+     was previously executed, there is no need to re-execute it, and the
+     previous return value is returned.
+
+   - If it contains NULL values, check if there is a partial match for the
+     inner query block by evaluating it. For clarity we repeat here the
+     transformation previously performed on the sub-query. The expression
+
+     <tt>
+     ( oc_1, ..., oc_n ) 
+     \<in predicate\>
+     ( SELECT ic_1, ..., ic_n
+       FROM \<table\>
+       WHERE \<inner where\> 
+     )
+     </tt>
+
+     was transformed into
+     
+     <tt>
+     ( oc_1, ..., oc_n ) 
+     \<in predicate\>
+     ( SELECT ic_1, ..., ic_n 
+       FROM \<table\> 
+       WHERE \<inner where\> AND ... ( ic_k = oc_k OR ic_k IS NULL ) 
+       HAVING ... NOT ic_k IS NULL
+     )
+     </tt>
+
+     The evaluation will now proceed according to special rules set up
+     elsewhere. These rules include:
+
+     - The HAVING NOT \<inner column\> IS NULL conditions added by the
+       aforementioned rewrite methods will detect whether they evaluated (and
+       rejected) a NULL value and if so, will cause the subquery to evaluate
+       to NULL. 
+
+     - The added WHERE and HAVING conditions are present only for those inner
+       columns that correspond to outer column that are not NULL at the moment.
+     
+     - If there is an eligible index for executing the subquery, the special
+       access method "Full scan on NULL key" is employed which ensures that
+       the inner query will detect if there are NULL values resulting from the
+       inner query. This access method will quietly resort to table scan if it
+       needs to find NULL values as well.
+
+     - Under these conditions, the sub-query need only be evaluated in order to
+       find out whether it produced any rows.
+     
+       - If it did, we know that there was a partial match since there are
+         NULL values in the outer row expression.
+
+       - If it did not, the result is FALSE or UNKNOWN. If at least one of the
+         HAVING sub-predicates rejected a NULL value corresponding to an outer
+         non-NULL, and hence the inner query block returns UNKNOWN upon
+         evaluation, there was a partial match and the result is UNKNOWN.
+
+   - If it contains no NULL values, the call is forwarded to the inner query
+     block.
+
+     @see Item_in_subselect::val_bool()
+     @see Item_is_not_null_test::val_int()
+ */
 longlong Item_in_optimizer::val_int()
 {
   bool tmp;
@@ -1804,7 +1874,7 @@ longlong Item_in_optimizer::val_int()
           all_left_cols_null= false;
       }
 
-      if (!((Item_in_subselect*)args[1])->is_correlated && 
+      if (!item_subs->is_correlated && 
           all_left_cols_null && result_for_null_param != UNKNOWN)
       {
         /* 
@@ -1818,8 +1888,11 @@ longlong Item_in_optimizer::val_int()
       else 
       {
         /* The subquery has to be evaluated */
-        (void) args[1]->val_bool_result();
-        null_value= !item_subs->engine->no_rows();
+        (void) item_subs->val_bool_result();
+        if (item_subs->engine->no_rows())
+          null_value= item_subs->null_value;
+        else
+          null_value= TRUE;
         if (all_left_cols_null)
           result_for_null_param= null_value;
       }

=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc	2010-06-10 20:45:22 +0000
+++ b/sql/item_subselect.cc	2010-09-07 09:21:09 +0000
@@ -47,7 +47,7 @@ Item_subselect::Item_subselect():
     item value is NULL if select_subselect not changed this value
     (i.e. some rows will be found returned)
   */
-  null_value= 1;
+  null_value= TRUE;
 }
 
 
@@ -427,9 +427,9 @@ void Item_maxmin_subselect::print(String
 
 void Item_singlerow_subselect::reset()
 {
-  null_value= 1;
+  null_value= TRUE;
   if (value)
-    value->null_value= 1;
+    value->null_value= TRUE;
 }
 
 
@@ -574,7 +574,7 @@ double Item_singlerow_subselect::val_rea
   DBUG_ASSERT(fixed == 1);
   if (!exec() && !value->null_value)
   {
-    null_value= 0;
+    null_value= FALSE;
     return value->val_real();
   }
   else
@@ -589,7 +589,7 @@ longlong Item_singlerow_subselect::val_i
   DBUG_ASSERT(fixed == 1);
   if (!exec() && !value->null_value)
   {
-    null_value= 0;
+    null_value= FALSE;
     return value->val_int();
   }
   else
@@ -603,7 +603,7 @@ String *Item_singlerow_subselect::val_st
 {
   if (!exec() && !value->null_value)
   {
-    null_value= 0;
+    null_value= FALSE;
     return value->val_str(str);
   }
   else
@@ -618,7 +618,7 @@ my_decimal *Item_singlerow_subselect::va
 {
   if (!exec() && !value->null_value)
   {
-    null_value= 0;
+    null_value= FALSE;
     return value->val_decimal(decimal_value);
   }
   else
@@ -633,7 +633,7 @@ bool Item_singlerow_subselect::val_bool(
 {
   if (!exec() && !value->null_value)
   {
-    null_value= 0;
+    null_value= FALSE;
     return value->val_bool();
   }
   else
@@ -651,7 +651,7 @@ Item_exists_subselect::Item_exists_subse
   bool val_bool();
   init(select_lex, new select_exists_subselect(this));
   max_columns= UINT_MAX;
-  null_value= 0; //can't be NULL
+  null_value= FALSE; //can't be NULL
   maybe_null= 0; //can't be NULL
   value= 0;
   DBUG_VOID_RETURN;
@@ -814,15 +814,14 @@ double Item_in_subselect::val_real()
   */
   DBUG_ASSERT(0);
   DBUG_ASSERT(fixed == 1);
-  null_value= 0;
+  null_value= was_null= FALSE;
   if (exec())
   {
     reset();
-    null_value= 1;
     return 0;
   }
   if (was_null && !value)
-    null_value= 1;
+    null_value= TRUE;
   return (double) value;
 }
 
@@ -835,15 +834,14 @@ longlong Item_in_subselect::val_int()
   */
   DBUG_ASSERT(0);
   DBUG_ASSERT(fixed == 1);
-  null_value= 0;
+  null_value= was_null= FALSE;
   if (exec())
   {
     reset();
-    null_value= 1;
     return 0;
   }
   if (was_null && !value)
-    null_value= 1;
+    null_value= TRUE;
   return value;
 }
 
@@ -856,16 +854,15 @@ String *Item_in_subselect::val_str(Strin
   */
   DBUG_ASSERT(0);
   DBUG_ASSERT(fixed == 1);
-  null_value= 0;
+  null_value= was_null= FALSE;
   if (exec())
   {
     reset();
-    null_value= 1;
     return 0;
   }
   if (was_null && !value)
   {
-    null_value= 1;
+    null_value= TRUE;
     return 0;
   }
   str->set((ulonglong)value, &my_charset_bin);
@@ -876,20 +873,14 @@ String *Item_in_subselect::val_str(Strin
 bool Item_in_subselect::val_bool()
 {
   DBUG_ASSERT(fixed == 1);
-  null_value= 0;
+  null_value= was_null= FALSE;
   if (exec())
   {
     reset();
-    /* 
-      Must mark the IN predicate as NULL so as to make sure an enclosing NOT
-      predicate will return FALSE. See the comments in 
-      subselect_uniquesubquery_engine::copy_ref_key for further details.
-    */
-    null_value= 1;
     return 0;
   }
   if (was_null && !value)
-    null_value= 1;
+    null_value= TRUE;
   return value;
 }
 
@@ -900,16 +891,15 @@ my_decimal *Item_in_subselect::val_decim
     method should not be used
   */
   DBUG_ASSERT(0);
-  null_value= 0;
+  null_value= was_null= FALSE;
   DBUG_ASSERT(fixed == 1);
   if (exec())
   {
     reset();
-    null_value= 1;
     return 0;
   }
   if (was_null && !value)
-    null_value= 1;
+    null_value= TRUE;
   int2my_decimal(E_DEC_FATAL_ERROR, value, 0, decimal_value);
   return decimal_value;
 }


Attachment: [text/bzr-bundle] bzr/martin.hansson@sun.com-20100907092109-x92t31cz3xa9exn2.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (martin.hansson:3502) Bug#51070Martin Hansson7 Sep