List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:February 11 2011 1:46pm
Subject:Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724
View as plain text  
Hi Martin,

thank you for this patch. I have now revisited all code, and AFAIK things look 
good. I have some minor suggestions that you need to consider, though.

I have also gone through all test changes, and they also look good.

Please see inline for further comments.

On 18.01.11 10.10, Martin Hansson wrote:
> #At file:///data0/martin/bzrroot/wl3724/n-mr-o-t-Roy/ based on
> revid:jorgen.loland@stripped
>
>   3272 Martin Hansson	2011-01-18
>        WL#3724: Short-Cutting Join Execution: Speeding up star queries
>
>      added:
>        mysql-test/r/shortcut.result
>        mysql-test/t/shortcut.test
>      modified:
>        mysql-test/r/func_in_icp.result
>        mysql-test/r/func_in_icp_mrr.result
>        mysql-test/r/func_in_mrr.result
>        mysql-test/r/func_in_mrr_cost.result
>        mysql-test/r/func_in_none.result
>        mysql-test/r/greedy_optimizer.result
>        mysql-test/r/join.result
>        mysql-test/r/join_cache_jcl1.result
>        mysql-test/r/join_cache_jcl2.result
>        mysql-test/r/join_cache_jcl3.result
>        mysql-test/r/join_cache_jcl4.result
>        mysql-test/r/join_nested.result
>        mysql-test/r/myisam_icp.result
>        mysql-test/r/myisam_icp_none.result
>        mysql-test/r/order_by_icp_mrr.result
>        mysql-test/r/order_by_none.result
>        mysql-test/r/subselect_innodb.result
>        mysql-test/t/subselect_innodb.test
>        sql/item.cc
>        sql/item.h
>        sql/sql_select.cc
>        sql/sql_select.h
>
Test results clipped.

greedy_optimizer.result shows some anomalies, as discussed on IRC. Apparently, 
there are some shortcuts inserted, even though there are dependencies that 
should prevent them. Please investigate these issues.
> === added file 'mysql-test/t/shortcut.test'
> --- a/mysql-test/t/shortcut.test	1970-01-01 00:00:00 +0000
> +++ b/mysql-test/t/shortcut.test	2011-01-18 09:10:18 +0000
> @@ -0,0 +1,522 @@
> +--echo #
> +--echo # wl3724: Short-Cutting Join Execution: Speeding up star queries
> +--echo #
> +
> +--echo #
> +--echo # Test 1. Basic tests.
> +--echo #
> +--echo # Test 1a. Tests of EXPLAIN.
> +--echo #
> +CREATE TABLE t1 (
> +  a INT,
> +  b INT
> +);
> +
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 (
> +  a INT,
> +  KEY( a )
> +);
> +INSERT INTO t2 VALUES (1), (1), (1), (1), (1);
> +
> +CREATE TABLE t3 (
> +  b INT,
> +  KEY( b )
> +);
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1), (1);
> +
> +EXPLAIN SELECT * FROM t1 JOIN t2 USING( a ) JOIN t3 USING( b );
> +
> +SELECT * FROM t1 JOIN t2 USING( a ) JOIN t3 USING( b );

You could generally use this syntax to make the tests easier to maintain 
consistently, when you need to both execute and explain a query:

let $query=SELECT ...;
eval EXPLAIN $query;
eval $query;
> +
> +--echo # Test that the short-cut does not cross an outer join boundary
> +EXPLAIN SELECT * FROM t1 LEFT JOIN ( t2, t3 ) USING( b );
> +EXPLAIN SELECT * FROM t1 LEFT JOIN ( t2 JOIN t3 ON a = b ) USING( b );
> +
> +--echo # Test that the optimization is robust wrt reordering of tables
> +EXPLAIN SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.b = t3.b;
> +EXPLAIN SELECT * FROM t1, t3, t2 WHERE t1.a = t2.a AND t1.b = t3.b;
> +
> +EXPLAIN SELECT * FROM t2, t1, t3 WHERE t1.a = t2.a AND t1.b = t3.b;
> +EXPLAIN SELECT * FROM t2, t3, t1 WHERE t1.a = t2.a AND t1.b = t3.b;
> +
> +EXPLAIN SELECT * FROM t3, t1, t2 WHERE t1.a = t2.a AND t1.b = t3.b;
> +EXPLAIN SELECT * FROM t3, t2, t1 WHERE t1.a = t2.a AND t1.b = t3.b;
> +
> +DROP TABLE t1, t2, t3;
> +
> +--echo # Test that short-cuts show up in EXPLAIN properly with "Using index"
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2 ( b INT KEY );
> +INSERT INTO t2 VALUES (1), (2), (3);
> +
> +CREATE TABLE t3 ( c INT KEY );
> +INSERT INTO t3 VALUES (1), (2), (3);
> +
> +EXPLAIN SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c;
> +
> +--echo # Test that we don't take a short-cut if we have successfully read a
> +--echo # record already.

record -> row.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE c>  1;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE c>  1;
> +
> +DROP TABLE t1, t2, t3;
> +--echo #
> +--echo # Test 1b.
> +--echo # Tests that short-cuts work properly with join buffers.
> +--echo #
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2 ( b INT );
> +INSERT INTO t2 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2key ( b INT KEY );
> +INSERT INTO t2key VALUES (1), (2), (3);
> +
> +CREATE TABLE t3 ( c INT, d INT );
> +INSERT INTO t3 VALUES (1, -1), (2, 1), (3, 1);
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d>  0;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d>  0;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2key ON a = b JOIN t3 ON a = c WHERE d>  0;
> +SELECT * FROM t1 JOIN t2key ON a = b JOIN t3 ON a = c WHERE d>  0;

Perhaps you should re-run these tests without join caching to see the difference.
> +
> +DROP TABLE t1, t2, t2key, t3;
> +--echo # Test that we reject a short-cut properly. We have a cartesian product
> +--echo # of a table and the result of an outer join. The optimizer places the
> +--echo # table between the outer and inner table of the outer join and hence
> +--echo # there is a short-cut from the inner table to the outer table. This
> +--echo # short-cut should be rejected.
> +CREATE TABLE t1 ( a INT PRIMARY KEY );
> +INSERT INTO t1 VALUES (1),(2);
> +
> +CREATE TABLE t2 ( a INT );
> +INSERT INTO t2 VALUES (1), (2), (3);
> +
> +CREATE TABLE t3 ( a INT );
> +INSERT INTO t3 VALUES (1), (2), (3);
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t2, t1 LEFT OUTER JOIN t3 ON (t3.a = t1.a);
> +SELECT * FROM t2, t1 LEFT OUTER JOIN t3 ON (t3.a = t1.a);
> +DROP TABLE t1, t2, t3;
> +--echo # Test 2
> +--echo # That the optimization actually works, i.e. that unneccesary reads are
> +--echo # indeed short-cut.
> +--echo #
> +--echo # Test 2a: We have two tables between giving and receiving end of short-cut:
> +--echo # t2 and t3.
> +--echo #
> +CREATE TABLE t1 ( a INT, b INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +INSERT INTO t2 SELECT * FROM t2;
> +
> +CREATE TABLE t3 ( b INT, KEY( b ) );
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( b INT, KEY( b ) );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 USING(a) JOIN t3 ON t2.a = t3.b JOIN t4 ON t1.b = t4.b;
> +SELECT * FROM t1 JOIN t2 USING(a) JOIN t3 ON t2.a = t3.b JOIN t4 ON t1.b = t4.b;
> +
> +SHOW STATUS LIKE 'handler_read_%';

You could add another predicate that does not perform any "real" filtering, but 
makes shortcutting impossible, and compare handler statistics for the two queries.
> +DROP TABLE t1, t2, t3, t4;
> +--echo # Test 2b
> +--echo # Short-cuts followed by successful keys. This tests that we perform the
> +--echo # optimal number of reads after the short-cut is taken without missing
> tuples.
tuples -> rows.
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3), (4), (5);
> +
> +CREATE TABLE t2 ( a INT, KEY (a) );
> +INSERT INTO t2 VALUES (1), (2), (2), (2), (2), (2), (3), (4), (4), (4), (5);
> +
> +CREATE TABLE t3 ( a INT, KEY (a) );
> +INSERT INTO t3 VALUES (1), (3), (5), (-1), (-1), (-1), (-1), (-1), (-1);
> +
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +
> +SHOW STATUS LIKE 'handler_read_%';
> +
> +DROP TABLE t1, t2, t3;
> +--echo # Test 2c
> +--echo # Test that we don't erroneously take a short-cut at the end of a table
> scan.
Can you explain this in more detail?
> +let $requires_join_cache_level = 0;
> +--source include/have_join_cache_level.inc
> +--eval set optimizer_join_cache_level = $requires_join_cache_level
> +
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2);
> +
> +CREATE TABLE t2 ( a INT, KEY (a) );
> +INSERT INTO t2 VALUES (1), (1), (2);
> +
> +CREATE TABLE t3 ( a INT );
> +INSERT INTO t3 VALUES (1), (3);
> +
> +EXPLAIN
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +SELECT STRAIGHT_JOIN * FROM t1 JOIN t2 USING(a) JOIN t3 USING(a);
> +
> +DROP TABLE t1, t2, t3;
> +
> +set @@optimizer_join_cache_level = DEFAULT;
> +--echo #
> +--echo # Test 3.
> +--echo # Test for outer joins. We may not take short-cuts across outer join
> +--echo # boundaries. An outer join boundary goes before the first inner table for
> an
> +--echo # outer join.
> +--echo #
> +--echo # Test 3a.
> +--echo #
> +CREATE TABLE t1 ( a INT, b INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +INSERT INTO t2 SELECT * FROM t2;
> +
> +CREATE TABLE t3 ( b INT, KEY( b ) );
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( b INT, KEY( b ) );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +CREATE TABLE t5 ( b INT, KEY( b ) );
> +INSERT INTO t5 VALUES (1), (1), (1), (1), (1), (1), (1), (1);
> +INSERT INTO t5 SELECT * FROM t5;
> +
> +--echo # Test of two outer join boundaries in one join.
> +EXPLAIN
> +SELECT *
> +FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.a = t3.b) USING ( a )
> +        LEFT JOIN t4 ON t1.a = t4.b;
> +
> +EXPLAIN
> +SELECT *
> +FROM t1 LEFT JOIN (t2 JOIN t3 ON t2.a = t3.b) USING(a)
> +        LEFT JOIN (t4 JOIN t5 USING( b )) ON t1.a = t4.b;
> +
> +DROP TABLE t1, t2, t3, t4, t5;
> +--echo # Test 3b.
> +--echo # Tests that we do not miss any data due to short-cutting.
> +
> +CREATE TABLE t1 ( a INT, b INT );
> +CREATE TABLE t2 ( a INT, b INT, PRIMARY KEY (a,b) );
> +CREATE TABLE t3 ( a INT, b INT, PRIMARY KEY (a,b) );
> +
> +INSERT INTO t1 VALUES ( 1, 1 ), ( 2, 1 ), ( 1, 3 );
> +INSERT INTO t2 VALUES ( 1, 1 ), ( 2, 2 ), ( 3, 3 );
> +INSERT INTO t3 VALUES ( 1, 1 ), ( 2, 1 ), ( 1, 3 );
> +
> +--echo # Test of the short-cut detection algorithm. When traversing the
> +--echo # push-down conditions the algorithm will first find that the
> +--echo # prospective short-cut t3->t1, but this must be invalidated due to the
> +--echo # later found dependency t3->t2.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.b = t3.b;
> +SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.b = t3.b;
> +
> +--echo # Test of internal representation of pushdown conditions.
> +--echo # The join conditions will be encoded in ref access, while the WHERE
> +--echo # expression remains in the pushdown condition.

WHERE expression -> WHERE condition.

When you mention pushdown conditions, I immediately think about ECP or ICP. I 
think that we need a better term... Can table condition be better?
> +EXPLAIN
> +SELECT *
> +FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.b = t3.a
> +WHERE t3.b + 1 = t2.b;
> +
> +--echo # Short-cut is actually applicable in this case, thanks to equality
> +--echo # propagation. The WHERE condition t3.a + 1 = t2.a is rewritten into
> +--echo # t1.b + 1 = t1.a, and hence we have an optimizable star query.
> +EXPLAIN
> +SELECT *
> +FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.b = t3.a
> +WHERE t3.a + 1 = t2.a;
> +
> +DROP TABLE t1, t2, t3;
> +--echo # Test 3c
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (1), (1), (1);
> +
> +CREATE TABLE t2 ( a INT, KEY (a), b INT, c INT );
> +INSERT INTO t2 VALUES (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1), (1, 1, 1);
> +INSERT INTO t2 SELECT * FROM t2;
> +INSERT INTO t2 SELECT * FROM t2;
> +
> +CREATE TABLE t3 ( a INT, KEY (a) );
> +INSERT INTO t3 VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1), (3);
> +
> +CREATE TABLE t4 ( a INT, KEY (a), b INT );
> +INSERT INTO t4 VALUES (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1,
> 1), (1, 1), (1, 1), (1, 1);
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +--echo #
> +--echo # Test that the short-cut t4->t1 is rejected in favor of t4->t2.
> +--echo #
> +EXPLAIN
> +SELECT STRAIGHT_JOIN *
> +FROM t1 JOIN t2 ON t1.a = t2.a
> +        JOIN t3 ON t2.b = t3.a
> +        JOIN t4 ON t1.a = t4.a AND t2.c = t4.b;
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo # Test 4. Test of execution step.
> +--echo # In this test we are testing that a short-cut is properly separated from
> +--echo # the case of a normal 'end of records' state on the last table in the plan.
> +--echo # This particular case gets hit only when
> +--echo # - The optimizer reorders the tables
> +--echo # - We have a SELECT<table>.*

I did not get this...

> +CREATE TABLE t1( a INT );
> +CREATE TABLE t2( a INT PRIMARY KEY );
> +CREATE TABLE t3( a INT, INDEX( a ) );
> +
> +INSERT INTO t1( a ) VALUES ( 2 );
> +INSERT INTO t1( a ) VALUES ( 2 );
> +
> +INSERT INTO t2( a ) VALUES ( 1 );
> +INSERT INTO t2( a ) VALUES ( 2 );
> +
> +INSERT INTO t3( a ) VALUES ( 6 );
> +INSERT INTO t3( a ) VALUES ( 5 );
> +INSERT INTO t3( a ) VALUES ( 2 );
> +
> +EXPLAIN
> +SELECT t2.* FROM t2, t1, t3 WHERE t2.a = t1.a AND t1.a = t3.a;
> +SELECT t2.* FROM t2, t1, t3 WHERE t2.a = t1.a AND t1.a = t3.a;
> +
> +DROP TABLE t1, t2, t3;
> +--echo #
> +--echo # Test 5.
> +--echo # Tests that a short-cut does not cross a plan node that is using join
> +--echo # buffering.
> +--echo #
> +--echo # Test 5a.
> +--echo #
> +CREATE TABLE t1 ( a INT, c INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a ), KEY( b ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +
> +CREATE TABLE t3 ( a INT, KEY( a ) );
> +INSERT INTO t3 VALUES (2), (2), (2), (2), (2);
> +INSERT INTO t3 SELECT * FROM t3;
> +INSERT INTO t3 SELECT * FROM t3;
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( a INT, KEY( a ) );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +FLUSH STATUS;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +
> +SHOW STATUS LIKE 'handler_read_%';

Please rerun without join buffering and expose the difference.

> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo #
> +--echo # Test 5b.
> +--echo #
> +CREATE TABLE t1 ( a INT, c INT );
> +INSERT INTO t1 VALUES (1, 2), (-1, -1);
> +
> +CREATE TABLE t2 ( a INT, b INT, KEY( a, b ) );
> +INSERT INTO t2 VALUES (1, 2),(1, 2),(1, 2),(1, 2),(1, 2);
> +
> +CREATE TABLE t3 ( a INT );
> +INSERT INTO t3 VALUES (2), (2), (2), (2), (2);
> +INSERT INTO t3 SELECT * FROM t3;
> +
> +CREATE TABLE t4 ( a INT );
> +INSERT INTO t4 VALUES (1), (1), (1), (1), (1);
> +INSERT INTO t4 SELECT * FROM t4;
> +INSERT INTO t4 SELECT * FROM t4;
> +
> +FLUSH STATUS;
> +
> +--echo # Should not use short-cuts.
> +EXPLAIN
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +SELECT * FROM t1, t2, t3, t4 WHERE t1.a = t2.a AND t2.b = t3.a AND t1.c = t4.a;
> +
> +SHOW STATUS LIKE 'handler_read_%';
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo #
> +--echo # Test 6. Test of short-cuts in conjunction with outer join.
> +--echo #
> +--echo # Test 6a. Test of outer join detection.
> +--echo # In this case the execution order is t1, t2, t3, t4.
> +--echo # t2, t3 and t4 will form an 'outer join nest' with which t1 is left
> +--echo # outer joined. But (t3, t2) is left joined with t4. Since t4 is just one
> +--echo # table, however, the 'outer join nest' of t4 is only implicitly
> +--echo # represented in the query plan. The only way to detect it is by looking
> +--echo # at t4's first_inner pointer, which would be null for the last inner table
> +--echo # in an outer join nest. But since it points to itseld, t4 is in its own
> +--echo # nest. But there is no NESTED_JOIN to represent it, since it follows the
> +--echo # 'normal' left-deep tree representation.

It's probably overkill to explain internal data structures in a test file...

"Test that combines outer join operation with one table and outer join operation 
with multiple inner tables" might just do...
> +--echo #
> +CREATE TABLE t1 (a INT, b INT, c INT);
> +CREATE TABLE t2 (d INT, e INT, f INT);
> +CREATE TABLE t3 (g INT, h INT, i INT);
> +CREATE TABLE t4 (j INT, k INT, l INT);
> +
> +INSERT INTO t1 VALUES (3,1,0), (2,2,0), (3,3,0);
> +INSERT INTO t2 VALUES (1,1,0), (2,2,0);
> +INSERT INTO t3 VALUES (3,2,0), (6,2,0), (6,1,0);
> +INSERT INTO t4 VALUES (0,2,0), (1,2,0);
> +
> +EXPLAIN
> +SELECT a, b, c, d, e, f, g, h, i, j, k, l
> +FROM t1 LEFT JOIN ( (t3, t2) LEFT JOIN t4 ON t2.e = t4.k AND t3.h<  10 ) ON t1.b
> = t2.e;
> +SELECT *
> +FROM t1 LEFT JOIN ( (t3, t2) LEFT JOIN t4 ON t2.e = t4.k AND t3.h<  10 ) ON t1.b
> = t2.e;
> +
> +DROP TABLE t1, t2, t3, t4;
> +--echo #
> +--echo # Test 6b.
> +--echo # We may take short-cuts across or inside an outer join, as long as we
> +--echo # don't fail due to an outer join predicate. It goes as follows.
> +--echo #
> +--echo # - We may take a short-cut across a nested outer join.
> +--echo # - We may take short-cuts inside a nested inner join sequence.
> +--echo #
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
> +
> +CREATE TABLE t2 ( b INT, KEY    ( b ) );
> +INSERT INTO t2 VALUES (1), (2),      (4), (5), (6), (7), (8), (9), (10);
> +
> +CREATE TABLE t3 ( c INT, KEY( c ) );
> +INSERT INTO t3 VALUES (1), (2), (3), (4),      (6), (7), (8), (9), (10);
> +
> +CREATE TABLE t4 ( d INT, KEY( d ) );
> +INSERT INTO t4 VALUES (1), (2), (3), (4), (5), (6), (7), (8),      (10);
> +INSERT INTO t4 VALUES (10), (10), (10), (10);
> +
> +CREATE TABLE t5 ( e INT, KEY( e ) );
> +INSERT INTO t5 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
> +
> +--echo # Outer join followed by a table that is inner joined with the first.
> +EXPLAIN
> +SELECT * FROM t1 LEFT JOIN t2 ON a = b JOIN t3 ON a = c;
> +SELECT * FROM t1 LEFT JOIN t2 ON a = b JOIN t3 ON a = c;
> +
> +--echo # Short-cut across a nested outer join
> +EXPLAIN
> +SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON b = c) ON a = b JOIN t4 ON a = d;
> +SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON b = c) ON a = b JOIN t4 ON a = d;
> +
> +--echo # Short-cut across a nested outer join with a nested inner join
> +EXPLAIN
> +SELECT * FROM t1
> +         JOIN (t2 LEFT JOIN (t3 JOIN t4 ON c = d) ON b = d) ON a = b
> +         JOIN t5 ON a = e;
> +SELECT * FROM t1
> +         JOIN (t2 LEFT JOIN (t3 JOIN t4 ON c = d) ON b = d) ON a = b
> +         JOIN t5 ON a = e;
> +
> +--echo # Short-cut from last inner table inside an inner join that is nested
> +--echo # within an outer join
> +EXPLAIN
> +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON b = c JOIN t4 ON b = d) on a = b;
> +SELECT * FROM t1 LEFT JOIN (t2 JOIN t3 ON b = c JOIN t4 ON b = d) on a = b;
> +
> +--echo # Short-cut from not last inner table inside an inner join that is nested
> +--echo # within an outer join
> +EXPLAIN
> +SELECT * FROM t1
> +         LEFT JOIN
> +         (t2 JOIN t3 ON b = c JOIN t4 ON b = d JOIN t5 ON d = e)
> +         ON a = b;
> +SELECT * FROM t1
> +         LEFT JOIN
> +         (t2 JOIN t3 ON b = c JOIN t4 ON b = d JOIN t5 ON d = e)
> +         ON a = b;

What would happen with "a=d" instead of "a=b" ?
> +
> +--echo # Illegal scenario for short-cut. Should not be taken.
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN t3 ON a = c;
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN t3 ON a = c;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN (t3 JOIN t4 ON c = d) ON a = c;
> +SELECT * FROM t1 JOIN t2 ON a = b LEFT JOIN (t3 JOIN t4 ON c = d) ON a = c;
> +
> +DROP TABLE t1, t2, t3, t4, t5;
> +--echo #
> +--echo # Test 6c.
> +--echo # Tests that short-cuts work even if a record was read and then rejected.
record -> row
> +--echo # This happens for table scan without join buffering.
> +--echo #
> +let $requires_join_cache_level = 0;
> +--source include/have_join_cache_level.inc
> +--eval set optimizer_join_cache_level = $requires_join_cache_level;
> +CREATE TABLE t1 ( a INT );
> +INSERT INTO t1 VALUES (1), (2), (3);
> +
> +CREATE TABLE t2 ( b INT );
> +INSERT INTO t2 VALUES (1), (2), (2), (2), (2), (3);
> +
> +CREATE TABLE t3 ( c INT, d INT );
> +INSERT INTO t3(c) VALUES (1), (3), (4), (4), (4), (4);
> +
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c;
> +
> +SHOW STATUS LIKE 'handler_read%';
> +FLUSH STATUS;
> +
> +EXPLAIN
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d>  0;
> +SELECT * FROM t1 JOIN t2 ON a = b JOIN t3 ON a = c WHERE d>  0;
> +
> +SHOW STATUS LIKE 'handler_read%';
> +
> +set @@optimizer_join_cache_level = DEFAULT;
> +
> +DROP TABLE t1, t2, t3;
>
>
> === modified file 'sql/item.cc'
> --- a/sql/item.cc	2011-01-12 15:28:35 +0000
> +++ b/sql/item.cc	2011-01-18 09:10:18 +0000
> @@ -2295,6 +2295,9 @@ bool Item_field::eq(const Item *item, bo
>   }
>
>
> +/**
> +   Returns simply the table to which the field belongs.
> +*/
>   table_map Item_field::used_tables() const
>   {
>     if (field->table->const_table)
>
> === modified file 'sql/item.h'
> --- a/sql/item.h	2010-12-29 00:38:59 +0000
> +++ b/sql/item.h	2011-01-18 09:10:18 +0000
> @@ -868,7 +868,14 @@ public:
>     virtual bool val_bool_result() { return val_bool(); }
>     virtual bool is_null_result() { return is_null(); }
>
> -  /* bit map of tables used by item */
> +  /**
> +     This is the set of tables directly referenced by this Item.

Please remove  "directly" from this sentence, I think it can be misunderstood.
> +
> +     Returns the set of tables referenced by this item. An item can be
> +     recursively defined in terms of other items, and the set of tables is
> +     generally calculated by forming the union of the set of tables referenced
> +     by the embedded items.
> +  */
>     virtual table_map used_tables() const { return (table_map) 0L; }
>     /*
>       Return table map of tables that can't be NULL tables (tables that are
>
> === modified file 'sql/sql_select.cc'
> --- a/sql/sql_select.cc	2010-12-29 00:38:59 +0000
> +++ b/sql/sql_select.cc	2011-01-18 09:10:18 +0000
> @@ -1727,6 +1727,135 @@ static int clear_sj_tmp_tables(JOIN *joi
>   }
>
>
> +/**
> +   @defgroup Shortcut Short-cut
> +   @{
> +*/
> +/**
> +   Updates a short-cut from from one plan node to another. The function

typo: from from

> +   assumes that it is invoked on plan nodes in reverse order in which they
> +   appear in the query execution plan.
> +
> +   @param join The execution plan.
> +
> +   @param join_tab The plan node from which a short-cut may potentially be
> +   taken.
> +
> +   @param dependent A plan node that access to join_tab depends on.
> +
> +   @retval true There may be other short-cuts possible early in the plan.
> +   @retval false There are no earlier short-cuts available. Either the only
> +   possible one was found and completely initialized, or there are none.
> +*/
> +static bool update_star_dependency(JOIN *join, JOIN_TAB *join_tab,
> +                                   JOIN_TAB *dependent)
> +{
> +  if (dependent == NULL)
> +    return false;
I think that this statement can be omitted. I did DBUG_ASSERT(dependent) w/o crash.
> +
> +  if (join_tab<= dependent)
> +    return true;
> +
> +  if (dependent == join_tab - 1)
> +  {
> +    DBUG_PRINT("info", ("Invalidated a join short-cut from %s to %s",
> +                        join_tab->table->alias,
> +                        dependent->table->alias));
> +    return false;
> +  }
> +  else if (join->are_inner_joined(dependent, join_tab)&&
> +           !join->contains_join_buffering(dependent, join_tab))
> +  {
> +    DBUG_PRINT("info", ("Found a join short-cut from %s to %s",
> +                        join_tab->table->alias,
> +                        dependent->table->alias));
> +
> +    join_tab->shortcut= dependent;
> +
> +    return false;
> +  }
> +  return true;
> +}
> +
> +
> +/**
> +   Sets the short-cut from one plan node to another, if one is found.
> +
> +   @param join The execution plan.
> +
> +   @param join_tab The plan node from which a short-cut will potentially be
> +   taken.
> +*/
> +static void set_star_dependency(JOIN *join, JOIN_TAB *join_tab)
> +{
> +  table_map where_clause_tables;
> +  if (join_tab->select_cond != NULL)
> +  {
> +    Item *where_condition= join_tab->select_cond;
> +    where_condition->update_used_tables();
> +    where_clause_tables=
> +      where_condition->used_tables()&  ~PSEUDO_TABLE_BITS;
> +  }
> +  else
> +    where_clause_tables= (table_map)0L;
> +
> +  table_map ref_access_tables=
> +    join_tab->ref.depend_map&  ~PSEUDO_TABLE_BITS;
> +
> +  table_map tables_depended_upon= where_clause_tables | ref_access_tables;
> +
> +  if (tables_depended_upon == 0)
> +    return;
> +
> +  uint table_index = join->tables;
> +  table_map mask= 1<<  table_index;

Pls make this computation 64-bit safe: table_map mask= (table_map)1 << table_index
> +  while (mask>  0) {
-> while (mask != 0)
> +    if ((mask&  tables_depended_upon) != 0)
> +      if (!update_star_dependency(join, join_tab, join->map2table[table_index]))
> +        return;
> +    --table_index;
> +    mask>>= 1;
> +  }
> +}
> +
> +
> +/**
> +   Pre-compute short-cuts for the join short-cut optimization.
> +
> +   This function adorns the query exeution plan with star join short-cuts
> +   where applicable. Short-cuts are triggered by the executioner. The
> +   algorithm is implemented as a nested loop, where the outer loop walks the
> +   nodes (JOIN_TAB's) in the query execution plan, and the inner loop iterates
> +   through all tables that this table depends upon. Valid short-cuts have the
> +   following properties.
> +
> +   - The short-cut has a minimum length of 2. If a table is dependent on the
> +   table directly preceding it, the short-cut would be of length 1, and it
> +   would indicate that there are no valid short-cuts from this table.
> +
> +   - The short-cut does not cross an outer join boundary. In this case the
> +   optimization is not applicable, because a valid result would be missed.
> +
> +   - The short-cut does not cross a node using join buffers. Short-cuts are
> +   not compatible with join buffering.
> +
> +   - If there are several applicable shortcuts by the above requirements, the
> +   shortest one is chosen.
> +
> +   @param join The query execution plan. The plan is assumed to be complete
> +   and ready for execution when this method is invoked.
> +*/
> +static void setup_shortcuts(JOIN *join)
> +{
> +  if(join->join_tab == NULL)
> +    return;

Space before parenthesis, please.
> +
> +  for (uint i= join->const_tables + 2; i<  join->tables; ++i)
> +    set_star_dependency(join,&join->join_tab[i]);
> +}
> +
> +/* @} */
> +
>
>   /**
>     global select optimisation.
> @@ -2466,6 +2595,9 @@ JOIN::optimize()
>     }
>
>     tmp_having= having;
> +
> +  setup_shortcuts(this);
> +
>     if (select_options&  SELECT_DESCRIBE)
>     {
>       error= 0;
> @@ -17154,6 +17286,7 @@ sub_select_cache(JOIN *join, JOIN_TAB *j
>     DBUG_ASSERT(cache != NULL);
>
>     cache->reset_join(join);
> +  join->return_tab= join_tab;
>
>     DBUG_ENTER("sub_select_cache");
>
> @@ -17328,6 +17461,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab
>     join_tab->table->null_row=0;
>     if (end_of_records)
>     {
> +    join->return_tab= join_tab;
Maybe move this ahead of the if and delete the following similar assignment?
>       enum_nested_loop_state nls=
>         (*join_tab->next_select)(join,join_tab+1,end_of_records);
>       DBUG_RETURN(nls);
> @@ -17356,6 +17490,7 @@ sub_select(JOIN *join,JOIN_TAB *join_tab
>     }
>     join->thd->warning_info->reset_current_row_for_warning();
>
> +  join_tab->produced_rows= 0;
>     error= (*join_tab->read_first_record)(join_tab);
>
>     if (join_tab->keep_current_rowid)
> @@ -17532,7 +17667,11 @@ evaluate_join_record(JOIN *join, JOIN_TA
>     if (error>  0 || (join->thd->is_error()))     // Fatal error
>       DBUG_RETURN(NESTED_LOOP_ERROR);
>     if (error<  0)
> +  {
> +    if (join_tab->shortcut != NULL&&  join_tab->produced_rows == 0)
> +      join->return_tab= join_tab->shortcut;
>       DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
> +  }
>     if (join->thd->killed)			// Aborted by user
>     {
>       join->thd->send_kill_message();
> @@ -17550,6 +17689,7 @@ evaluate_join_record(JOIN *join, JOIN_TA
>     }
>     if (found)
>     {
> +    ++join_tab->produced_rows;

I think this should go into the *last* "if (found)" block in the function. 
Otherwise you need to document why it is here.
>       /*
>         There is no select condition or the attached pushed down
>         condition is true =>  a match is found.
> @@ -23072,6 +23212,13 @@ void select_describe(JOIN *join, bool ne
>               extra.append(STRING_WITH_LEN(", incremental buffers)"));
>           }
>
> +        if (tab->shortcut != NULL)
> +        {
> +          extra.append(STRING_WITH_LEN("; Shortcut("));
> +          extra.append(tab->shortcut->table->alias);
> +          extra.append(STRING_WITH_LEN(")"));
> +        }
> +
>           /* Skip initial "; "*/
>           const char *str= extra.ptr();
>           uint32 len= extra.length();
>
> === modified file 'sql/sql_select.h'
> --- a/sql/sql_select.h	2010-12-29 00:38:59 +0000
> +++ b/sql/sql_select.h	2011-01-18 09:10:18 +0000
> @@ -237,6 +237,17 @@ typedef struct st_join_table : public Sq
>     TABLE		*table;
>     Key_use	*keyuse;			/**<  pointer to first used key */
>     SQL_SELECT	*select;
> +  /**
> +     The condition to be evaluated when a row from this table has been made
> +     available inside query execution. It may reference a row from all tables
> +     in the current join prefix (ie this table and all tables preceeding it in
> +     the join plan). Generally, a condition consists of a set of predicates
> +     combined using AND, OR and XOR.
> +
> +     @note Predicates that can be fully evaluated using a ref access condition
> +     or that have been pushed down to the storage engine are excluded from this
> +     condition.
> +  */
>     Item		*select_cond;
>     QUICK_SELECT_I *quick;
>     Item	       **on_expr_ref;   /**<  pointer to the associated on expression  
> */
> @@ -378,6 +389,18 @@ typedef struct st_join_table : public Sq
>     /* NestedOuterJoins: Bitmap of nested joins this table is part of */
>     nested_join_map embedding_map;
>
> +  /**
> +     The number of rows produced at this plan node with a common prefix
> +     produced by the preceding plan nodes.
> +
> +     The counter is reset each time a partial row has been produced (and
> +     pushdown conditions are true) for the current table that corresponds to
> +     the partial row produced for all previous tables in the current join.
> +  */
> +  uint produced_rows;
Should be ulonglong. The last sentence of the comment was difficult to understand.
> +
> +  st_join_table *shortcut;

This is probably worthy a comment as well?
> +
>     void cleanup();
>     inline bool is_using_loose_index_scan()
>     {
> @@ -506,7 +529,8 @@ st_join_table::st_join_table()
>       found_match(FALSE),
>
>       keep_current_rowid(0),
> -    embedding_map(0)
> +    embedding_map(0),
> +    shortcut(NULL)
>   {
>     /**
>       @todo Add constructor to READ_RECORD.
> @@ -1808,12 +1832,15 @@ public:
>     TABLE_LIST *tables_list;           ///<hold 'tables' parameter of
> mysql_select
>     List<TABLE_LIST>  *join_list;       ///<  list of joined tables in
> reverse order
>     COND_EQUAL *cond_equal;
> -  /*
> -    Join tab to return to. Points to an element of join->join_tab array, or to
> -    join->join_tab[-1].
> -    This is used at execution stage to shortcut join enumeration. Currently
> -    shortcutting is done to handle outer joins or handle semi-joins with
> -    FirstMatch strategy.
> +  /**
> +    Interface for taking short-cuts through join execution. Join tab to return
> +    to. Points to an element of join->join_tab array, or to
> +    join->join_tab[-1]. This is used at execution stage to shortcut join
> +    enumeration. Shortcutting is done to
> +
> +    - handle outer joins, and
> +    - handle semi-joins with FirstMatch strategy, and
> +    - optimize execution of star joins.
>     */
>     JOIN_TAB *return_tab;
>     Item **ref_pointer_array; ///<used pointer reference for this select
> @@ -1981,6 +2008,37 @@ public:
>               ((group || tmp_table_param.sum_func_count)&&  !group_list)) ?
>                 NULL : join_tab+const_tables;
>     }
> +
> +
> +  /**
> +     Returns true if JOIN_TAB's a and b participate on the same nesting level
> +     within a chain of inner joined tables.
> +
> +     @note The optimizer execution plan is considered, and outer joins may
> +     have been rewritten into inner joins.
> +  */
> +  static bool are_inner_joined(JOIN_TAB *a, JOIN_TAB *b)
> +  {
> +    DBUG_ASSERT(a<  b);
> +    return
> +      a->table->pos_in_table_list->embedding ==
> +      b->table->pos_in_table_list->embedding&&
> +      (b->on_expr_ref == NULL || *b->on_expr_ref == NULL);
> +  }
I think that you can safely delete the part "b->on_expr_ref == NULL".
> +
> +
> +  /**
> +     True if this query execution plan uses join buffering between plan nodes
> +     a and b.
> +  */
> +  static bool contains_join_buffering(JOIN_TAB *a, JOIN_TAB *b)
> +  {
> +    for (JOIN_TAB *join_tab= a; join_tab<= b; join_tab++)
> +      if (join_tab->use_join_cache)
> +        return TRUE;
> +    return FALSE;
> +  }
> +
>   private:
>     /**
>       TRUE if the query contains an aggregate function but has no GROUP

Thanks,
Roy
Thread
Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724Roy Lyseng11 Feb
  • Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724Martin Hansson11 Feb
    • Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724Roy Lyseng11 Feb
  • Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724Martin Hansson16 Feb
    • Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724Roy Lyseng21 Feb
      • Re: bzr commit into mysql-trunk branch (martin.hansson:3272) WL#3724Martin Hansson16 Mar