Hi Martin,
thank you for this patch. I have now revisited all code, and AFAIK things look
good. I have some minor suggestions that you need to consider, though.
I have also gone through all test changes, and they also look good.
Please see inline for further comments.
On 18.01.11 10.10, Martin Hansson wrote:
> #At file:///data0/martin/bzrroot/wl3724/n-mr-o-t-Roy/ based on
> revid:jorgen.loland@stripped
>
> 3272 Martin Hansson 2011-01-18
> WL#3724: Short-Cutting Join Execution: Speeding up star queries
>
> added:
> mysql-test/r/shortcut.result
> mysql-test/t/shortcut.test
> modified:
> mysql-test/r/func_in_icp.result
> mysql-test/r/func_in_icp_mrr.result
> mysql-test/r/func_in_mrr.result
> mysql-test/r/func_in_mrr_cost.result
> mysql-test/r/func_in_none.result
> mysql-test/r/greedy_optimizer.result
> mysql-test/r/join.result
> mysql-test/r/join_cache_jcl1.result
> mysql-test/r/join_cache_jcl2.result
> mysql-test/r/join_cache_jcl3.result
> mysql-test/r/join_cache_jcl4.result
> mysql-test/r/join_nested.result
> mysql-test/r/myisam_icp.result
> mysql-test/r/myisam_icp_none.result
> mysql-test/r/order_by_icp_mrr.result
> mysql-test/r/order_by_none.result
> mysql-test/r/subselect_innodb.result
> mysql-test/t/subselect_innodb.test
> sql/item.cc
> sql/item.h
> sql/sql_select.cc
> sql/sql_select.h
>
Test results clipped.
greedy_optimizer.result shows some anomalies, as discussed on IRC. Apparently,
there are some shortcuts inserted, even though there are dependencies that
should prevent them. Please investigate these issues.
> === added file 'mysql-test/t/shortcut.test'
> --- a/mysql-test/t/shortcut.test 1970-01-01 00:00:00 +0000
> +++ b/mysql-test/t/shortcut.test 2011-01-18 09:10:18 +0000
> @@ -0,0 +1,522 @@
> +--echo #
> +--echo # wl3724: Short-Cutting Join Execution: Speeding up star queries
> +--echo #
> +
> +--echo #
> +--echo # Test 1. Basic tests.
> +--echo #
> +--echo # Test 1a. Tests of EXPLAIN.
> +--echo #
> +CREATE TABLE t1 (
> + a INT,
> + b INT
> +);
> +
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 (
> + a INT,
> + KEY( a )
> +);
> +INSERT INTO t2 VALUES (1), (1), (1), (1), (1);
> +
> +CREATE TABLE t3 (
> + b INT,
> + KEY( b )
> +);
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1), (1);
> +
> +EXPLAIN SELECT * FROM t1 JOIN t2 USING( a ) JOIN t3 USING( b );
> +
> +SELECT * FROM t1 JOIN t2 USING( a ) JOIN t3 USING( b );
You could generally use this syntax to make the tests easier to maintain
consistently, when you need to both execute and explain a query:
let $query=SELECT ...;
eval EXPLAIN $query;
eval $query;
> +
> +--echo # Test that the short-cut does not cross an outer join boundary
> +EXPLAIN SELECT * FROM t1 LEFT JOIN ( t2, t3 ) USING( b );
> +EXPLAIN SELECT * FROM t1 LEFT JOIN ( t2 JOIN t3 ON a = b ) USING( b );
> +
> +--echo # Test that the optimization is robust wrt reordering of tables
> +EXPLAIN SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.b = t3.b;
> +EXPLAIN SELECT * FROM t1, t3, t2 WHERE t1.a = t2.a AND t1.b = t3.b;
> +
> +EXPLAIN SELECT * FROM t2, t1, t3 WHERE t1.a = t2.a AND t1.b = t3.b;
> +EXPLAIN SELECT * FROM t2, t3, t1 WHERE t1.a = t2.a AND t1.b = t3.b;
> +
> +EXPLAIN SELECT * FROM t3, t1, t2 WHERE t1.a = t2.a AND t1.b = t3.b;
> +EXPLAIN SELECT * FROM t3, t2, t1 WHERE t1.a = t2.a AND t1.b = t3.b;
> +
> +DROP TABLE t1, t2, t3;
> +
> +--echo # Test that short-cuts show up in EXPLAIN properly with "Using index"
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2 ( b INT KEY );
> +INSERT INTO t2 VALUES (1), (2), (3);
> +
> +CREATE TABLE t3 ( c INT KEY );
> +INSERT INTO t3 VALUES (1), (2), (3);
> +
> +EXPLAIN SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c;
> +
> +--echo # Test that we don't take a short-cut if we have successfully read a
> +--echo # record already.
record -> row.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE c> 1;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE c> 1;
> +
> +DROP TABLE t1, t2, t3;
> +--echo #
> +--echo # Test 1b.
> +--echo # Tests that short-cuts work properly with join buffers.
> +--echo #
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2 ( b INT );
> +INSERT INTO t2 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2key ( b INT KEY );
> +INSERT INTO t2key VALUES (1), (2), (3);
> +
> +CREATE TABLE t3 ( c INT, d INT );
> +INSERT INTO t3 VALUES (1, -1), (2, 1), (3, 1);
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d> 0;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d> 0;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2key ON a = b JOIN t3 ON a = c WHERE d> 0;
> +SELECT * FROM t1 JOIN t2key ON a = b JOIN t3 ON a = c WHERE d> 0;
Perhaps you should re-run these tests without join caching to see the difference.
> +
> +DROP TABLE t1, t2, t2key, t3;
> +--echo # Test that we reject a short-cut properly. We have a cartesian product
> +--echo # of a table and the result of an outer join. The optimizer places the
> +--echo # table between the outer and inner table of the outer join and hence
> +--echo # there is a short-cut from the inner table to the outer table. This
> +--echo # short-cut should be rejected.
> +CREATE TABLE t1 ( a INT PRIMARY KEY );
> +INSERT INTO t1 VALUES (1),(2);
> +
> +CREATE TABLE t2 ( a INT );
> +INSERT INTO t2 VALUES (1), (2), (3);
> +
> +CREATE TABLE t3 ( a INT );
> +INSERT INTO t3 VALUES (1), (2), (3);
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t2, t1 LEFT OUTER JOIN t3 ON (t3.a = t1.a);
> +SELECT * FROM t2, t1 LEFT OUTER JOIN t3 ON (t3.a = t1.a);
> +DROP TABLE t1, t2, t3;
> +--echo # Test 2
> +--echo # That the optimization actually works, i.e. that unneccesary reads are
> +--echo # indeed short-cut.
> +--echo #
> +--echo # Test 2a: We have two tables between giving and receiving end of short-cut:
> +--echo # t2 and t3.
> +--echo #
> +CREATE TABLE t1 ( a INT, b INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +INSERT INTO t2 SELECT * FROM t2;
> +
> +CREATE TABLE t3 ( b INT, KEY( b ) );
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( b INT, KEY( b ) );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 USING(a) JOIN t3 ON t2.a = t3.b JOIN t4 ON t1.b = t4.b;
> +SELECT * FROM t1 JOIN t2 USING(a) JOIN t3 ON t2.a = t3.b JOIN t4 ON t1.b = t4.b;
> +
> +SHOW STATUS LIKE 'handler_read_%';
You could add another predicate that does not perform any "real" filtering, but
makes shortcutting impossible, and compare handler statistics for the two queries.
> +DROP TABLE t1, t2, t3, t4;
> +--echo # Test 2b
> +--echo # Short-cuts followed by successful keys. This tests that we perform the
> +--echo # optimal number of reads after the short-cut is taken without missing
> tuples.
tuples -> rows.
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3), (4), (5);
> +
> +CREATE TABLE t2 ( a INT, KEY (a) );
> +INSERT INTO t2 VALUES (1), (2), (2), (2), (2), (2), (3), (4), (4), (4), (5);
> +
> +CREATE TABLE t3 ( a INT, KEY (a) );
> +INSERT INTO t3 VALUES (1), (3), (5), (-1), (-1), (-1), (-1), (-1), (-1);
> +
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +
> +SHOW STATUS LIKE 'handler_read_%';
> +
> +DROP TABLE t1, t2, t3;
> +--echo # Test 2c
> +--echo # Test that we don't erroneously take a short-cut at the end of a table
> scan.
Can you explain this in more detail?
> +let $requires_join_cache_level = 0;
> +--source include/have_join_cache_level.inc
> +--eval set optimizer_join_cache_level = $requires_join_cache_level
> +
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2);
> +
> +CREATE TABLE t2 ( a INT, KEY (a) );
> +INSERT INTO t2 VALUES (1), (1), (2);
> +
> +CREATE TABLE t3 ( a INT );
> +INSERT INTO t3 VALUES (1), (3);
> +
> +EXPLAIN
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +
> +DROP TABLE t1, t2, t3;
> +
> +set @@optimizer_join_cache_level = DEFAULT;
> +--echo #
> +--echo # Test 3.
> +--echo # Test for outer joins. We may not take short-cuts across outer join
> +--echo # boundaries. An outer join boundary goes before the first inner table for
> an
> +--echo # outer join.
> +--echo #
> +--echo # Test 3a.
> +--echo #
> +CREATE TABLE t1 ( a INT, b INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +INSERT INTO t2 SELECT * FROM t2;
> +
> +CREATE TABLE t3 ( b INT, KEY( b ) );
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( b INT, KEY( b ) );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +CREATE TABLE t5 ( b INT, KEY( b ) );
> +INSERT INTO t5 VALUES (1), (1), (1), (1), (1), (1), (1), (1);
> +INSERT INTO t5 SELECT * FROM t5;
> +
> +--echo # Test of two outer join boundaries in one join.
> +EXPLAIN
> +SELECT *
> +FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.a = t3.b) USING ( a )
> + LEFT JOIN t4 ON t1.a = t4.b;
> +
> +EXPLAIN
> +SELECT *
> +FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.a = t3.b) USING(a)
> + LEFT JOIN (t4 JOIN t5 USING( b )) ON t1.a = t4.b;
> +
> +DROP TABLE t1, t2, t3, t4, t5;
> +--echo # Test 3b.
> +--echo # Tests that we do not miss any data due to short-cutting.
> +
> +CREATE TABLE t1 ( a INT, b INT );
> +CREATE TABLE t2 ( a INT, b INT, PRIMARY KEY (a,b) );
> +CREATE TABLE t3 ( a INT, b INT, PRIMARY KEY (a,b) );
> +
> +INSERT INTO t1 VALUES ( 1, 1 ), ( 2, 1 ), ( 1, 3 );
> +INSERT INTO t2 VALUES ( 1, 1 ), ( 2, 2 ), ( 3, 3 );
> +INSERT INTO t3 VALUES ( 1, 1 ), ( 2, 1 ), ( 1, 3 );
> +
> +--echo # Test of the short-cut detection algorithm. When traversing the
> +--echo # push-down conditions the algorithm will first find that the
> +--echo # prospective short-cut t3->t1, but this must be invalidated due to the
> +--echo # later found dependency t3->t2.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.b = t3.b;
> +SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.b = t3.b;
> +
> +--echo # Test of internal representation of pushdown conditions.
> +--echo # The join conditions will be encoded in ref access, while the WHERE
> +--echo # expression remains in the pushdown condition.
WHERE expression -> WHERE condition.
When you mention pushdown conditions, I immediately think about ECP or ICP. I
think that we need a better term... Can table condition be better?
> +EXPLAIN
> +SELECT *
> +FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.b = t3.a
> +WHERE t3.b + 1 = t2.b;
> +
> +--echo # Short-cut is actually applicable in this case, thanks to equality
> +--echo # propagation. The WHERE condition t3.a + 1 = t2.a is rewritten into
> +--echo # t1.b + 1 = t1.a, and hence we have an optimizable star query.
> +EXPLAIN
> +SELECT *
> +FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.b = t3.a
> +WHERE t3.a + 1 = t2.a;
> +
> +DROP TABLE t1, t2, t3;
> +--echo # Test 3c
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (1), (1), (1);
> +
> +CREATE TABLE t2 ( a INT, KEY (a), b INT, c INT );
> +INSERT INTO t2 VALUES (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1);
> +INSERT INTO t2 SELECT * FROM t2;
> +INSERT INTO t2 SELECT * FROM t2;
> +
> +CREATE TABLE t3 ( a INT, KEY (a) );
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1), (3);
> +
> +CREATE TABLE t4 ( a INT, KEY (a), b INT );
> +INSERT INTO t4 VALUES (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1,
> 1), (1, 1), (1, 1), (1, 1);
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +--echo #
> +--echo # Test that the short-cut t4->t1 is rejected in favor of t4->t2.
> +--echo #
> +EXPLAIN
> +SELECT STRAIGHT_JOIN *
> +FROM t1 JOIN t2 ON t1.a = t2.a
> + JOIN t3 ON t2.b = t3.a
> + JOIN t4 ON t1.a = t4.a AND t2.c = t4.b;
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo # Test 4. Test of execution step.
> +--echo # In this test we are testing that a short-cut is properly separated from
> +--echo # the case of a normal 'end of records' state on the last table in the plan.
> +--echo # This particular case gets hit only when
> +--echo # - The optimizer reorders the tables
> +--echo # - We have a SELECT<table>.*
I did not get this...
> +CREATE TABLE t1( a INT );
> +CREATE TABLE t2( a INT PRIMARY KEY );
> +CREATE TABLE t3( a INT, INDEX( a ) );
> +
> +INSERT INTO t1( a ) VALUES ( 2 );
> +INSERT INTO t1( a ) VALUES ( 2 );
> +
> +INSERT INTO t2( a ) VALUES ( 1 );
> +INSERT INTO t2( a ) VALUES ( 2 );
> +
> +INSERT INTO t3( a ) VALUES ( 6 );
> +INSERT INTO t3( a ) VALUES ( 5 );
> +INSERT INTO t3( a ) VALUES ( 2 );
> +
> +EXPLAIN
> +SELECT t2.* FROM t2, t1, t3 WHERE t2.a = t1.a AND t1.a = t3.a;
> +SELECT t2.* FROM t2, t1, t3 WHERE t2.a = t1.a AND t1.a = t3.a;
> +
> +DROP TABLE t1, t2, t3;
> +--echo #
> +--echo # Test 5.
> +--echo # Tests that a short-cut does not cross a plan node that is using join
> +--echo # buffering.
> +--echo #
> +--echo # Test 5a.
> +--echo #
> +CREATE TABLE t1 ( a INT, c INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a ), KEY( b ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +
> +CREATE TABLE t3 ( a INT, KEY( a ) );
> +INSERT INTO t3 VALUES (2), (2), (2), (2), (2);
> +INSERT INTO t3 SELECT * FROM t3;
> +INSERT INTO t3 SELECT * FROM t3;
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( a INT, KEY( a ) );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +FLUSH STATUS;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +
> +SHOW STATUS LIKE 'handler_read_%';
Please rerun without join buffering and expose the difference.
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo #
> +--echo # Test 5b.
> +--echo #
> +CREATE TABLE t1 ( a INT, c INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a, b ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +
> +CREATE TABLE t3 ( a INT );
> +INSERT INTO t3 VALUES (2), (2), (2), (2), (2);
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( a INT );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +FLUSH STATUS;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +
> +SHOW STATUS LIKE 'handler_read_%';
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo #
> +--echo # Test 6. Test of short-cuts in conjunction with outer join.
> +--echo #
> +--echo # Test 6a. Test of outer join detection.
> +--echo # In this case the execution order is t1, t2, t3, t4.
> +--echo # t2, t3 and t4 will form an 'outer join nest' with which t1 is left
> +--echo # outer joined. But (t3, t2) is left joined with t4. Since t4 is just one
> +--echo # table, however, the 'outer join nest' of t4 is only implicitly
> +--echo # represented in the query plan. The only way to detect it is by looking
> +--echo # at t4's first_inner pointer, which would be null for the last inner table
> +--echo # in an outer join nest. But since it points to itseld, t4 is in its own
> +--echo # nest. But there is no NESTED_JOIN to represent it, since it follows the
> +--echo # 'normal' left-deep tree representation.
It's probably overkill to explain internal data structures in a test file...
"Test that combines outer join operation with one table and outer join operation
with multiple inner tables" might just do...
> +--echo #
> +CREATE TABLE t1 (a INT, b INT, c INT);
> +CREATE TABLE t2 (d INT, e INT, f INT);
> +CREATE TABLE t3 (g INT, h INT, i INT);
> +CREATE TABLE t4 (j INT, k INT, l INT);
> +
> +INSERT INTO t1 VALUES (3,1,0), (2,2,0), (3,3,0);
> +INSERT INTO t2 VALUES (1,1,0), (2,2,0);
> +INSERT INTO t3 VALUES (3,2,0), (6,2,0), (6,1,0);
> +INSERT INTO t4 VALUES (0,2,0), (1,2,0);
> +
> +EXPLAIN
> +SELECT a, b, c, d, e, f, g, h, i, j, k, l
> +FROM t1 LEFT JOIN ( (t3, t2) LEFT JOIN t4 ON t2.e = t4.k AND t3.h< 10 ) ON t1.b
> = t2.e;
> +SELECT *
> +FROM t1 LEFT JOIN ( (t3, t2) LEFT JOIN t4 ON t2.e = t4.k AND t3.h< 10 ) ON t1.b
> = t2.e;
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo #
> +--echo # Test 6b.
> +--echo # We may take short-cuts across or inside an outer join, as long as we
> +--echo # don't fail due to an outer join predicate. It goes as follows.
> +--echo #
> +--echo # - We may take a short-cut across a nested outer join.
> +--echo # - We may take short-cuts inside a nested inner join sequence.
> +--echo #
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
> +
> +CREATE TABLE t2 ( b INT, KEY ( b ) );
> +INSERT INTO t2 VALUES (1), (2), (4), (5), (6), (7), (8), (9), (10);
> +
> +CREATE TABLE t3 ( c INT, KEY( c ) );
> +INSERT INTO t3 VALUES (1), (2), (3), (4), (6), (7), (8), (9), (10);
> +
> +CREATE TABLE t4 ( d INT, KEY( d ) );
> +INSERT INTO t4 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (10);
> +INSERT INTO t4 VALUES (10), (10), (10), (10);
> +
> +CREATE TABLE t5 ( e INT, KEY( e ) );
> +INSERT INTO t5 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
> +
> +--echo # Outer join followed by a table that is inner joined with the first.
> +EXPLAIN
> +SELECT * FROM t1 LEFT JOIN t2 ON a = b JOIN t3 ON a = c;
> +SELECT * FROM t1 LEFT JOIN t2 ON a = b JOIN t3 ON a = c;
> +
> +--echo # Short-cut across a nested outer join
> +EXPLAIN
> +SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON b = c) ON a = b JOIN t4 ON a = d;
> +SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON b = c) ON a = b JOIN t4 ON a = d;
> +
> +--echo # Short-cut across a nested outer join with a nested inner join
> +EXPLAIN
> +SELECT * FROM t1
> + JOIN (t2 LEFT JOIN (t3 JOIN t4 ON c = d) ON b = d) ON a = b
> + JOIN t5 ON a = e;
> +SELECT * FROM t1
> + JOIN (t2 LEFT JOIN (t3 JOIN t4 ON c = d) ON b = d) ON a = b
> + JOIN t5 ON a = e;
> +
> +--echo # Short-cut from last inner table inside an inner join that is nested
> +--echo # within an outer join
> +EXPLAIN
> +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON b = c JOIN t4 ON b = d) on a = b;
> +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON b = c JOIN t4 ON b = d) on a = b;
> +
> +--echo # Short-cut from not last inner table inside an inner join that is nested
> +--echo # within an outer join
> +EXPLAIN
> +SELECT * FROM t1
> + LEFT JOIN
> + (t2 JOIN t3 ON b = c JOIN t4 ON b = d JOIN t5 ON d = e)
> + ON a = b;
> +SELECT * FROM t1
> + LEFT JOIN
> + (t2 JOIN t3 ON b = c JOIN t4 ON b = d JOIN t5 ON d = e)
> + ON a = b;
What would happen with "a=d" instead of "a=b" ?
> +
> +--echo # Illegal scenario for short-cut. Should not be taken.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN t3 ON a = c;
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN t3 ON a = c;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN (t3 JOIN t4 ON c = d) ON a = c;
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN (t3 JOIN t4 ON c = d) ON a = c;
> +
> +DROP TABLE t1, t2, t3, t4, t5;
> +--echo #
> +--echo # Test 6c.
> +--echo # Tests that short-cuts work even if a record was read and then rejected.
record -> row
> +--echo # This happens for table scan without join buffering.
> +--echo #
> +let $requires_join_cache_level = 0;
> +--source include/have_join_cache_level.inc
> +--eval set optimizer_join_cache_level = $requires_join_cache_level;
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2 ( b INT );
> +INSERT INTO t2 VALUES (1), (2), (2), (2), (2), (3);
> +
> +CREATE TABLE t3 ( c INT, d INT );
> +INSERT INTO t3(c) VALUES (1), (3), (4), (4), (4), (4);
> +
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c;
> +
> +SHOW STATUS LIKE 'handler_read%';
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d> 0;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d> 0;
> +
> +SHOW STATUS LIKE 'handler_read%';
> +
> +set @@optimizer_join_cache_level = DEFAULT;
> +
> +DROP TABLE t1, t2, t3;
>
>
> === modified file 'sql/item.cc'
> --- a/sql/item.cc 2011-01-12 15:28:35 +0000
> +++ b/sql/item.cc 2011-01-18 09:10:18 +0000
> @@ -2295,6 +2295,9 @@ bool Item_field::eq(const Item *item, bo
> }
>
>
> +/**
> + Returns simply the table to which the field belongs.
> +*/
> table_map Item_field::used_tables() const
> {
> if (field->table->const_table)
>
> === modified file 'sql/item.h'
> --- a/sql/item.h 2010-12-29 00:38:59 +0000
> +++ b/sql/item.h 2011-01-18 09:10:18 +0000
> @@ -868,7 +868,14 @@ public:
> virtual bool val_bool_result() { return val_bool(); }
> virtual bool is_null_result() { return is_null(); }
>
> - /* bit map of tables used by item */
> + /**
> + This is the set of tables directly referenced by this Item.
Please remove "directly" from this sentence, I think it can be misunderstood.
> +
> + Returns the set of tables referenced by this item. An item can be
> + recursively defined in terms of other items, and the set of tables is
> + generally calculated by forming the union of the set of tables referenced
> + by the embedded items.
> + */
> virtual table_map used_tables() const { return (table_map) 0L; }
> /*
> Return table map of tables that can't be NULL tables (tables that are
>
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc 2010-12-29 00:38:59 +0000
> +++ b/sql/sql_select.cc 2011-01-18 09:10:18 +0000
> @@ -1727,6 +1727,135 @@ static int clear_sj_tmp_tables(JOIN *joi
> }
>
>
> +/**
> + @defgroup Shortcut Short-cut
> + @{
> +*/
> +/**
> + Updates a short-cut from from one plan node to another. The function
typo: from from
> + assumes that it is invoked on plan nodes in reverse order in which they
> + appear in the query execution plan.
> +
> + @param join The execution plan.
> +
> + @param join_tab The plan node from which a short-cut may potentially be
> + taken.
> +
> + @param dependent A plan node that access to join_tab depends on.
> +
> + @retval true There may be other short-cuts possible early in the plan.
> + @retval false There are no earlier short-cuts available. Either the only
> + possible one was found and completely initialized, or there are none.
> +*/
> +static bool update_star_dependency(JOIN *join, JOIN_TAB *join_tab,
> + JOIN_TAB *dependent)
> +{
> + if (dependent == NULL)
> + return false;
I think that this statement can be omitted. I did DBUG_ASSERT(dependent) w/o crash.
> +
> + if (join_tab<= dependent)
> + return true;
> +
> + if (dependent == join_tab - 1)
> + {
> + DBUG_PRINT("info", ("Invalidated a join short-cut from %s to %s",
> + join_tab->table->alias,
> + dependent->table->alias));
> + return false;
> + }
> + else if (join->are_inner_joined(dependent, join_tab)&&
> + !join->contains_join_buffering(dependent, join_tab))
> + {
> + DBUG_PRINT("info", ("Found a join short-cut from %s to %s",
> + join_tab->table->alias,
> + dependent->table->alias));
> +
> + join_tab->shortcut= dependent;
> +
> + return false;
> + }
> + return true;
> +}
> +
> +
> +/**
> + Sets the short-cut from one plan node to another, if one is found.
> +
> + @param join The execution plan.
> +
> + @param join_tab The plan node from which a short-cut will potentially be
> + taken.
> +*/
> +static void set_star_dependency(JOIN *join, JOIN_TAB *join_tab)
> +{
> + table_map where_clause_tables;
> + if (join_tab->select_cond != NULL)
> + {
> + Item *where_condition= join_tab->select_cond;
> + where_condition->update_used_tables();
> + where_clause_tables=
> + where_condition->used_tables()& ~PSEUDO_TABLE_BITS;
> + }
> + else
> + where_clause_tables= (table_map)0L;
> +
> + table_map ref_access_tables=
> + join_tab->ref.depend_map& ~PSEUDO_TABLE_BITS;
> +
> + table_map tables_depended_upon= where_clause_tables | ref_access_tables;
> +
> + if (tables_depended_upon == 0)
> + return;
> +
> + uint table_index = join->tables;
> + table_map mask= 1<< table_index;
Pls make this computation 64-bit safe: table_map mask= (table_map)1 << table_index
> + while (mask> 0) {
-> while (mask != 0)
> + if ((mask& tables_depended_upon) != 0)
> + if (!update_star_dependency(join, join_tab, join->map2table[table_index]))
> + return;
> + --table_index;
> + mask>>= 1;
> + }
> +}
> +
> +
> +/**
> + Pre-compute short-cuts for the join short-cut optimization.
> +
> + This function adorns the query exeution plan with star join short-cuts
> + where applicable. Short-cuts are triggered by the executioner. The
> + algorithm is implemented as a nested loop, where the outer loop walks the
> + nodes (JOIN_TAB's) in the query execution plan, and the inner loop iterates
> + through all tables that this table depends upon. Valid short-cuts have the
> + following properties.
> +
> + - The short-cut has a minimum length of 2. If a table is dependent on the
> + table directly preceding it, the short-cut would be of length 1, and it
> + would indicate that there are no valid short-cuts from this table.
> +
> + - The short-cut does not cross an outer join boundary. In this case the
> + optimization is not applicable, because a valid result would be missed.
> +
> + - The short-cut does not cross a node using join buffers. Short-cuts are
> + not compatible with join buffering.
> +
> + - If there are several applicable shortcuts by the above requirements, the
> + shortest one is chosen.
> +
> + @param join The query execution plan. The plan is assumed to be complete
> + and ready for execution when this method is invoked.
> +*/
> +static void setup_shortcuts(JOIN *join)
> +{
> + if(join->join_tab == NULL)
> + return;
Space before parenthesis, please.
> +
> + for (uint i= join->const_tables + 2; i< join->tables; ++i)
> + set_star_dependency(join,&join->join_tab[i]);
> +}
> +
> +/* @} */
> +
>
> /**
> global select optimisation.
> @@ -2466,6 +2595,9 @@ JOIN::optimize()
> }
>
> tmp_having= having;
> +
> + setup_shortcuts(this);
> +
> if (select_options& SELECT_DESCRIBE)
> {
> error= 0;
> @@ -17154,6 +17286,7 @@ sub_select_cache(JOIN *join, JOIN_TAB *j
> DBUG_ASSERT(cache != NULL);
>
> cache->reset_join(join);
> + join->return_tab= join_tab;
>
> DBUG_ENTER("sub_select_cache");
>
> @@ -17328,6 +17461,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab
> join_tab->table->null_row=0;
> if (end_of_records)
> {
> + join->return_tab= join_tab;
Maybe move this ahead of the if and delete the following similar assignment?
> enum_nested_loop_state nls=
> (*join_tab->next_select)(join,join_tab+1,end_of_records);
> DBUG_RETURN(nls);
> @@ -17356,6 +17490,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab
> }
> join->thd->warning_info->reset_current_row_for_warning();
>
> + join_tab->produced_rows= 0;
> error= (*join_tab->read_first_record)(join_tab);
>
> if (join_tab->keep_current_rowid)
> @@ -17532,7 +17667,11 @@ evaluate_join_record(JOIN *join, JOIN_TA
> if (error> 0 || (join->thd->is_error())) // Fatal error
> DBUG_RETURN(NESTED_LOOP_ERROR);
> if (error< 0)
> + {
> + if (join_tab->shortcut != NULL&& join_tab->produced_rows == 0)
> + join->return_tab= join_tab->shortcut;
> DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
> + }
> if (join->thd->killed) // Aborted by user
> {
> join->thd->send_kill_message();
> @@ -17550,6 +17689,7 @@ evaluate_join_record(JOIN *join, JOIN_TA
> }
> if (found)
> {
> + ++join_tab->produced_rows;
I think this should go into the *last* "if (found)" block in the function.
Otherwise you need to document why it is here.
> /*
> There is no select condition or the attached pushed down
> condition is true => a match is found.
> @@ -23072,6 +23212,13 @@ void select_describe(JOIN *join, bool ne
> extra.append(STRING_WITH_LEN(", incremental buffers)"));
> }
>
> + if (tab->shortcut != NULL)
> + {
> + extra.append(STRING_WITH_LEN("; Shortcut("));
> + extra.append(tab->shortcut->table->alias);
> + extra.append(STRING_WITH_LEN(")"));
> + }
> +
> /* Skip initial "; "*/
> const char *str= extra.ptr();
> uint32 len= extra.length();
>
> === modified file 'sql/sql_select.h'
> --- a/sql/sql_select.h 2010-12-29 00:38:59 +0000
> +++ b/sql/sql_select.h 2011-01-18 09:10:18 +0000
> @@ -237,6 +237,17 @@ typedef struct st_join_table : public Sq
> TABLE *table;
> Key_use *keyuse; /**< pointer to first used key */
> SQL_SELECT *select;
> + /**
> + The condition to be evaluated when a row from this table has been made
> + available inside query execution. It may reference a row from all tables
> + in the current join prefix (ie this table and all tables preceeding it in
> + the join plan). Generally, a condition consists of a set of predicates
> + combined using AND, OR and XOR.
> +
> + @note Predicates that can be fully evaluated using a ref access condition
> + or that have been pushed down to the storage engine are excluded from this
> + condition.
> + */
> Item *select_cond;
> QUICK_SELECT_I *quick;
> Item **on_expr_ref; /**< pointer to the associated on expression
> */
> @@ -378,6 +389,18 @@ typedef struct st_join_table : public Sq
> /* NestedOuterJoins: Bitmap of nested joins this table is part of */
> nested_join_map embedding_map;
>
> + /**
> + The number of rows produced at this plan node with a common prefix
> + produced by the preceding plan nodes.
> +
> + The counter is reset each time a partial row has been produced (and
> + pushdown conditions are true) for the current table that corresponds to
> + the partial row produced for all previous tables in the current join.
> + */
> + uint produced_rows;
Should be ulonglong. The last sentence of the comment was difficult to understand.
> +
> + st_join_table *shortcut;
This is probably worthy a comment as well?
> +
> void cleanup();
> inline bool is_using_loose_index_scan()
> {
> @@ -506,7 +529,8 @@ st_join_table::st_join_table()
> found_match(FALSE),
>
> keep_current_rowid(0),
> - embedding_map(0)
> + embedding_map(0),
> + shortcut(NULL)
> {
> /**
> @todo Add constructor to READ_RECORD.
> @@ -1808,12 +1832,15 @@ public:
> TABLE_LIST *tables_list; ///<hold 'tables' parameter of
> mysql_select
> List<TABLE_LIST> *join_list; ///< list of joined tables in
> reverse order
> COND_EQUAL *cond_equal;
> - /*
> - Join tab to return to. Points to an element of join->join_tab array, or to
> - join->join_tab[-1].
> - This is used at execution stage to shortcut join enumeration. Currently
> - shortcutting is done to handle outer joins or handle semi-joins with
> - FirstMatch strategy.
> + /**
> + Interface for taking short-cuts through join execution. Join tab to return
> + to. Points to an element of join->join_tab array, or to
> + join->join_tab[-1]. This is used at execution stage to shortcut join
> + enumeration. Shortcutting is done to
> +
> + - handle outer joins, and
> + - handle semi-joins with FirstMatch strategy, and
> + - optimize execution of star joins.
> */
> JOIN_TAB *return_tab;
> Item **ref_pointer_array; ///<used pointer reference for this select
> @@ -1981,6 +2008,37 @@ public:
> ((group || tmp_table_param.sum_func_count)&& !group_list)) ?
> NULL : join_tab+const_tables;
> }
> +
> +
> + /**
> + Returns true if JOIN_TAB's a and b participate on the same nesting level
> + within a chain of inner joined tables.
> +
> + @note The optimizer execution plan is considered, and outer joins may
> + have been rewritten into inner joins.
> + */
> + static bool are_inner_joined(JOIN_TAB *a, JOIN_TAB *b)
> + {
> + DBUG_ASSERT(a< b);
> + return
> + a->table->pos_in_table_list->embedding ==
> + b->table->pos_in_table_list->embedding&&
> + (b->on_expr_ref == NULL || *b->on_expr_ref == NULL);
> + }
I think that you can safely delete the part "b->on_expr_ref == NULL".
> +
> +
> + /**
> + True if this query execution plan uses join buffering between plan nodes
> + a and b.
> + */
> + static bool contains_join_buffering(JOIN_TAB *a, JOIN_TAB *b)
> + {
> + for (JOIN_TAB *join_tab= a; join_tab<= b; join_tab++)
> + if (join_tab->use_join_cache)
> + return TRUE;
> + return FALSE;
> + }
> +
> private:
> /**
> TRUE if the query contains an aggregate function but has no GROUP
Thanks,
Roy