List:Commits« Previous MessageNext Message »
From:Georgi Kodinov Date:April 3 2009 2:46pm
Subject:bzr commit into mysql-5.0-bugteam branch (joro:2743) Bug#38217
View as plain text  
#At file:///home/kgeorge/mysql/work/B38217-5.0-bugteam/ based on revid:azundris@stripped

 2743 Georgi Kodinov	2009-04-03
      Bug #38217: GROUP BY with subquery fails.
      
      When a subquery references fields from the outer context 
      the server creates either an Item_outer_ref that expands to 
      either Item_ref (pointing to the last row of the previous 
      group in a GROUP BY context) or Item_direct_ref (pointing 
      to the first row of the new group in a GROUP BY context).
      This causes wrong results with scalar subqueries in the SELECT 
      list that are referenced e.g. by name from the GROUP BY list.
      For these subqueries it's not possible to make both 
      Item_ref (because the sub-query is in the SELECT list)
      and Item_direct_ref (because the same subquery is referenced
      through an Item_field from the GROUP BY field) for a single 
      copy of Item_outer_ref in the subquery.
      No wrong results are returned when temporary table is used.
      Temporary table approach works because the scalar
      subquery from the GROUP BY is evaluated only once and 
      stored in the temporary table.
      We have two approaches to fix this:
      1. Make sure we copy the entire tree of the subquery when it's
      referenced in GROUP BY, thus effectively transforming :
      SELECT (SELECT a FROM t2 WHERE b = t1.a) c1, COUNT(*) FROM t1 
        GROUP BY c1
      to
      SELECT (SELECT a FROM t2 WHERE b = t1.a) c1, COUNT(*) FROM t1 
        GROUP BY (SELECT a FROM t2 WHERE b = t1.a)
      and processing this as usual.
      
      2. Make sure scalar subqueries in GROUP BY/ORDER BY always 
      trigger using temporary table.
      
      This fix implements 2).
      This is done to avoid multiple re-calculation of the scalar
      subquery during the calculation of GROUP BY/ORDER BY.
      MySQL is already doing similar thing for UDFs and 
      stored procedures.
     @ mysql-test/r/subselect.result
        Bug #38217: 
         - test case
         - fixed a non-sorted query result 
           (caused by a query that has ORDER BY <const>)
     @ mysql-test/t/subselect.test
        Bug #38217: 
         - test case
         - fixed a non-sorted query result 
           (caused by a query that has ORDER BY <const>)
     @ sql/item_subselect.h
        Bug #38217: subqueries are expensive expressions
     @ sql/sql_select.cc
        Bug #38217: check GROUP BY for expensive expressions as well.

    modified:
      mysql-test/r/subselect.result
      mysql-test/t/subselect.test
      sql/item_subselect.h
      sql/sql_select.cc
=== modified file 'mysql-test/r/subselect.result'
--- a/mysql-test/r/subselect.result	2009-02-09 20:52:40 +0000
+++ b/mysql-test/r/subselect.result	2009-04-03 14:46:10 +0000
@@ -3102,10 +3102,10 @@ SELECT a FROM t1
 ORDER BY IFNULL((SELECT b FROM t2 WHERE b > 2),
 (SELECT c FROM t2 WHERE c=a AND b > 2 ORDER BY b));
 a
-2
-4
 1
+2
 3
+4
 SELECT a FROM t1
 ORDER BY IFNULL((SELECT b FROM t2 WHERE b > 1),
 (SELECT c FROM t2 WHERE c=a AND b > 1 ORDER BY b));
@@ -4452,4 +4452,139 @@ WHERE 1 IN (SELECT id FROM t1) WITH CHEC
 DELETE FROM v3;
 DROP VIEW v1,v2,v3;
 DROP TABLE t1,t2;
+CREATE TABLE t1 (a int, b int);
+INSERT INTO t1 VALUES (1,1), (2,2), (3,2);
+CREATE TABLE t2 (a int, b int);
+INSERT INTO t2 VALUES (1,1), (2,2);
+EXPLAIN 
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(t1.a) col2 
+FROM t1 GROUP BY col1;
+id	1
+select_type	PRIMARY
+table	t1
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	3
+Extra	Using temporary; Using filesort
+id	2
+select_type	DEPENDENT SUBQUERY
+table	t2
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	2
+Extra	Using where
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(t1.a) col2 
+FROM t1 GROUP BY col1;
+col1	col2
+1	1
+2	2
+EXPLAIN 
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+id	1
+select_type	PRIMARY
+table	t1
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	3
+Extra	Using temporary; Using filesort
+id	2
+select_type	DEPENDENT SUBQUERY
+table	t2
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	2
+Extra	Using where
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+col1	col2
+1	1
+2	2
+EXPLAIN 
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY (SELECT a FROM t2 WHERE b = t1.b);
+id	1
+select_type	PRIMARY
+table	t1
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	3
+Extra	Using temporary; Using filesort
+id	3
+select_type	DEPENDENT SUBQUERY
+table	t2
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	2
+Extra	Using where
+id	2
+select_type	DEPENDENT SUBQUERY
+table	t2
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	2
+Extra	Using where
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY (SELECT a FROM t2 WHERE b = t1.b);
+col1	col2
+1	1
+2	2
+EXPLAIN 
+SELECT t1.b + 0 col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+id	1
+select_type	SIMPLE
+table	t1
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	3
+Extra	Using filesort
+SELECT t1.b + 0 col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+col1	col2
+1	1
+2	2
+EXPLAIN 
+SELECT t1.b + 0 col1, COUNT((t1.a)) col2 
+FROM t1 GROUP BY col1;
+id	1
+select_type	SIMPLE
+table	t1
+type	ALL
+possible_keys	NULL
+key	NULL
+key_len	NULL
+ref	NULL
+rows	3
+Extra	Using temporary; Using filesort
+SELECT t1.b + 0 col1, COUNT((t1.a)) col2 
+FROM t1 GROUP BY col1;
+col1	col2
+1	1
+2	2
+DROP TABLE t1,t2;
 End of 5.0 tests.

=== modified file 'mysql-test/t/subselect.test'
--- a/mysql-test/t/subselect.test	2009-02-09 20:52:40 +0000
+++ b/mysql-test/t/subselect.test	2009-04-03 14:46:10 +0000
@@ -2083,7 +2083,7 @@ SELECT a FROM t1 GROUP BY a
 SELECT a FROM t1 GROUP BY a
   HAVING IFNULL((SELECT b FROM t2 WHERE b > 4),
                 (SELECT c FROM t2 WHERE c=a AND b > 1 ORDER BY b)) > 3;
-
+--sorted_result
 SELECT a FROM t1
   ORDER BY IFNULL((SELECT b FROM t2 WHERE b > 2),
                   (SELECT c FROM t2 WHERE c=a AND b > 2 ORDER BY b));
@@ -3431,4 +3431,42 @@ DELETE FROM v3;
 DROP VIEW v1,v2,v3;
 DROP TABLE t1,t2;
 
+#
+# Bug #38217 : GROUP BY with subquery fails
+# 
+
+CREATE TABLE t1 (a int, b int); INSERT INTO t1 VALUES (1,1), (2,2), (3,2);
+CREATE TABLE t2 (a int, b int); INSERT INTO t2 VALUES (1,1), (2,2);
+
+query_vertical EXPLAIN 
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(t1.a) col2 
+FROM t1 GROUP BY col1;
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(t1.a) col2 
+FROM t1 GROUP BY col1;
+
+query_vertical EXPLAIN 
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+
+query_vertical EXPLAIN 
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY (SELECT a FROM t2 WHERE b = t1.b);
+SELECT (SELECT a FROM t2 WHERE b = t1.b) col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY (SELECT a FROM t2 WHERE b = t1.b);
+
+query_vertical EXPLAIN 
+SELECT t1.b + 0 col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+SELECT t1.b + 0 col1, COUNT(DISTINCT(t1.a)) col2 
+FROM t1 GROUP BY col1;
+query_vertical EXPLAIN 
+SELECT t1.b + 0 col1, COUNT((t1.a)) col2 
+FROM t1 GROUP BY col1;
+SELECT t1.b + 0 col1, COUNT((t1.a)) col2 
+FROM t1 GROUP BY col1;
+
+DROP TABLE t1,t2;
+
 --echo End of 5.0 tests.

=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h	2007-10-30 12:27:21 +0000
+++ b/sql/item_subselect.h	2009-04-03 14:46:10 +0000
@@ -132,6 +132,8 @@ public:
   */
   st_select_lex* get_select_lex();
 
+  virtual bool is_expensive_processor(byte *arg) { return 1; }
+
   friend class select_subselect;
   friend class Item_in_optimizer;
   friend bool Item_field::fix_fields(THD *, Item **);

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2009-03-19 13:44:58 +0000
+++ b/sql/sql_select.cc	2009-04-03 14:46:10 +0000
@@ -1337,6 +1337,23 @@ JOIN::optimize()
         }
       }
     }
+    if (group_list)
+    {
+      /*
+        Force using of tmp table if sorting by a SP or UDF function due to
+        their expensive and probably non-deterministic nature.
+      */
+      for (ORDER *tmp_order= group_list; tmp_order ; tmp_order=tmp_order->next)
+      {
+        Item *item= *tmp_order->item;
+        if (item->walk(&Item::is_expensive_processor,(byte*)0))
+        {
+          /* Force tmp table without sort */
+          need_tmp=1; simple_order=simple_group=0;
+          break;
+        }
+      }
+    }
   }
 
   tmp_having= having;


Attachment: [text/bzr-bundle] bzr/joro@sun.com-20090403144610-tv3scdv5rifdojfd.bundle
Thread
bzr commit into mysql-5.0-bugteam branch (joro:2743) Bug#38217Georgi Kodinov3 Apr