List:Commits« Previous MessageNext Message »
From:Sergey Petrunia Date:September 27 2006 8:56pm
Subject:bk commit into 5.1 tree (sergefp:1.2279) BUG#14940
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-09-27 22:56:50+04:00, sergefp@stripped +4 -0
  BUG#14940: Slow join order is chosen: 
  - Re-worked the prev_record_reads() function to return the lower bound of
    number of different table access scans that will be performed.

  mysql-test/r/join.result@stripped, 2006-09-27 22:56:47+04:00, sergefp@stripped +28 -0
    BUG#14940: testcase

  mysql-test/t/join.test@stripped, 2006-09-27 22:56:47+04:00, sergefp@stripped +21 -0
    BUG#14940: testcase

  sql/sql_select.cc@stripped, 2006-09-27 22:56:47+04:00, sergefp@stripped +91 -11
    BUG#14940: Slow join order is chosen: 
    - Re-worked the prev_record_reads() function to return the lower bound of
      number of different table access scans that will be performed.

  sql/sql_select.h@stripped, 2006-09-27 22:56:47+04:00, sergefp@stripped +14 -0
    BUG#14940: Slow join order is chosen:
    - Added comments in struct POSITION
    - Added POSITION::ref_depend_map: bitmap of tables that the table access
      method depends on.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	sergefp
# Host:	pylon.mylan
# Root:	/home/psergey/mysql-5.1-bug14940-r10a

--- 1.430/sql/sql_select.cc	2006-09-27 22:56:55 +04:00
+++ 1.431/sql/sql_select.cc	2006-09-27 22:56:55 +04:00
@@ -70,7 +70,7 @@
 static void find_best(JOIN *join,table_map rest_tables,uint index,
 		      double record_count,double read_time);
 static uint cache_record_length(JOIN *join,uint index);
-static double prev_record_reads(JOIN *join,table_map found_ref);
+static double prev_record_reads(JOIN *join, uint idx, table_map found_ref);
 static bool get_best_combination(JOIN *join);
 static store_key *get_store_key(THD *thd,
 				KEYUSE *keyuse, table_map used_tables,
@@ -3373,6 +3373,7 @@
   join->positions[idx].table= table;
   join->positions[idx].key=key;
   join->positions[idx].records_read=1.0;	/* This is a const table */
+  join->positions[idx].ref_depend_map= 0;
 
   /* Move the const table as down as possible in best_ref */
   JOIN_TAB **pos=join->best_ref+idx+1;
@@ -3430,6 +3431,7 @@
   double best=              DBL_MAX;
   double best_time=         DBL_MAX;
   double records=           DBL_MAX;
+  table_map best_ref_depends_map;
   double tmp;
   ha_rows rec;
 
@@ -3458,13 +3460,20 @@
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
-      do
-      { /* for each keypart */
+
+      do /* For each keypart */
+      {
         uint keypart= keyuse->keypart;
         table_map best_part_found_ref= 0;
         double best_prev_record_reads= DBL_MAX;
-        do
+        
+        do /* For each way to access the keypart */
         {
+
+          /*
+            if 1. expression doesn't refer to forward tables
+               2. we won't get two ref-or-null's
+          */
           if (!(remaining_tables & keyuse->used_tables) &&
               !(ref_or_null_part && (keyuse->optimize &
                                      KEY_OPTIMIZE_REF_OR_NULL)))
@@ -3472,8 +3481,9 @@
             found_part|= keyuse->keypart_map;
             if (!(keyuse->used_tables & ~join->const_table_map))
               const_part|= keyuse->keypart_map;
-            double tmp= prev_record_reads(join, (found_ref |
-                                                 keyuse->used_tables));
+
+            double tmp= prev_record_reads(join, idx, (found_ref |
+                                                      keyuse->used_tables));
             if (tmp < best_prev_record_reads)
             {
               best_part_found_ref= keyuse->used_tables;
@@ -3512,7 +3522,7 @@
           Really, there should be records=0.0 (yes!)
           but 1.0 would be probably safer
         */
-        tmp= prev_record_reads(join, found_ref);
+        tmp= prev_record_reads(join, idx, found_ref);
         records= 1.0;
       }
       else
@@ -3527,7 +3537,7 @@
           max_key_part= (uint) ~0;
           if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
           {
-            tmp = prev_record_reads(join, found_ref);
+            tmp = prev_record_reads(join, idx, found_ref);
             records=1.0;
           }
           else
@@ -3757,6 +3767,7 @@
         best_records= records;
         best_key= start_key;
         best_max_key_part= max_key_part;
+        best_ref_depends_map= found_ref;
       }
     }
     records= best_records;
@@ -3885,6 +3896,8 @@
       best= tmp;
       records= rows2double(rnd_records);
       best_key= 0;
+      /* range/index_merge/ALL/index access method are "independent", so: */
+      best_ref_depends_map= 0;
     }
   }
 
@@ -3893,6 +3906,7 @@
   join->positions[idx].read_time=    best;
   join->positions[idx].key=          best_key;
   join->positions[idx].table=        s;
+  join->positions[idx].ref_depend_map= best_ref_depends_map;
 
   if (!best_key &&
       idx == join->const_tables &&
@@ -4660,17 +4674,81 @@
 }
 
 
+/*
+  Get the number of different row combinations for given subset of join tables
+
+  SYNOPSIS
+    prev_record_reads()
+      join       The join structure
+      found_ref  Bitmap of tables in join->positions
+
+  DESCRIPTION
+    Get the number of different record combinations for given subset of join
+    tables.
+    This is used as follows: Suppose we have a table accessed with ref-based
+    method. The access depends on tables specified in bitmap found_ref.
+    
+    We want to count only different table accesses. The assume two table
+    accesses are different if we can't definitely guarantee they are the
+    same. Example: consider a query
+
+    SELECT * FROM t1, t2, t3 WHERE t1.key=c1 AND t2.key=c2 AND t3.key=t1.field
+
+    And a join order:
+     t1,  ref access on t1.key=c1
+     t2,  ref access on t1.key=c2       
+     t3,  ref access on t3.key=t1.field 
+    
+    The number of [different] accesses are:
+     t1: 1 ref access scan
+     t2: #records_read(t1) ref access scans,  1 distinct scan.
+     t3: #records_read(t1) * #records_read(t2) ref access scans,
+         but only #records_read(t1) distinct ref access scans.
+    
+    The reason for having this function (at least the latest version of it)
+    is: we need to account for buffering in join execution. e.g. border case:
+    if we have ref(const), or ref(expr) with small number of different
+    values of expr as a non-first table in the join, the access to this table
+    will likely go into disk cache and not require disk seeks.
+    
+    At some point in the future we'll need a proper model (e.g. assume an LRU
+    disk cache of some size). For now we just count identical ref scans as
+    one.
+
+  RETURN 
+    Expected number of row combinations
+*/
+
 static double
-prev_record_reads(JOIN *join,table_map found_ref)
+prev_record_reads(JOIN *join, uint idx, table_map found_ref)
 {
   double found=1.0;
   found_ref&= ~OUTER_REF_TABLE_BIT;
-  for (POSITION *pos=join->positions ; found_ref ; pos++)
+  for (POSITION *pos=join->positions + idx - 1 ; pos!=join->positions - 1 ;
+       pos--)
   {
     if (pos->table->table->map & found_ref)
     {
       found_ref&= ~pos->table->table->map;
-      found*=pos->records_read;
+      found_ref|= pos->ref_depend_map;
+      /* 
+        For the case of "t1 LEFT JOIN t2 ON ..." where t2 is a const table 
+        with no matching row we will get position[t2].records_read==0. 
+        Actually the size of output is one null-complemented row, therefore 
+        we will use value of 1 whenever we get records_read==0.
+
+        Note
+        - the above case can't occur if inner part of outer join has more 
+          than one table: table with no matches will not be marked as const.
+
+        - Ideally we should add 1 to records_read for every possible null-
+          complemented row. We're not doing it because: 1. it will require
+          non-trivial code and add overhead. 2. The value of records_read
+          is an inprecise estimate and adding 1 (or, in the worst case,
+          #max_nested_outer_joins=64-1) will not make it any more precise.
+      */
+      if (pos->records_read)
+        found*= pos->records_read;
     }
   }
   return found;
@@ -10242,6 +10320,7 @@
       tab->info="const row not found";
       /* Mark for EXPLAIN that the row was not found */
       pos->records_read=0.0;
+      pos->ref_depend_map= 0;
       if (!table->maybe_null || error > 0)
 	DBUG_RETURN(error);
     }
@@ -10267,6 +10346,7 @@
       tab->info="unique row not found";
       /* Mark for EXPLAIN that the row was not found */
       pos->records_read=0.0;
+      pos->ref_depend_map= 0;
       if (!table->maybe_null || error > 0)
 	DBUG_RETURN(error);
     }

--- 1.110/sql/sql_select.h	2006-09-27 22:56:55 +04:00
+++ 1.111/sql/sql_select.h	2006-09-27 22:56:55 +04:00
@@ -176,7 +176,13 @@
 */
 typedef struct st_position
 {
+  /*
+    Number of records we will retrieve from this table per each row
+    combination of previous tables. 
+      PROD{over all positions}(pos[i]->records_read) = E(#rows in join output)
+  */
   double records_read;
+
   /* 
     Cost accessing the table in course of the entire complete join execution,
     i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times 
@@ -184,7 +190,15 @@
   */
   double read_time;
   JOIN_TAB *table;
+
+  /*
+    NULL  -  'index' or 'range' or 'index_merge' or 'ALL' access is used.
+    Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
+  */
   KEYUSE *key;
+
+  /* If ref-based access is used: bitmap of tables this table depends on  */
+  table_map ref_depend_map;
 } POSITION;
 
 

--- 1.40/mysql-test/r/join.result	2006-09-27 22:56:55 +04:00
+++ 1.41/mysql-test/r/join.result	2006-09-27 22:56:55 +04:00
@@ -776,3 +776,31 @@
 1	SIMPLE	t2	ALL	a,b	NULL	NULL	NULL	1000	Using where
 1	SIMPLE	t3	ref	b	b	5	test.t2.b	1	Using where
 drop table t1, t2, t3;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, b int, primary key(a));
+insert into t2 select @v:=A.a+10*B.a, @v  from t1 A, t1 B;
+explain select * from t1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	
+show status like '%cost%';
+Variable_name	Value
+Last_query_cost	4.016090
+select 'The cost of accessing t1 (dont care if it changes' '^';
+The cost of accessing t1 (dont care if it changes
+The cost of accessing t1 (dont care if it changes^
+select 'vv: Following query must use ALL(t1), eq_ref(A), eq_ref(B): vv' Z;
+Z
+vv: Following query must use ALL(t1), eq_ref(A), eq_ref(B): vv
+explain select * from t1, t2 A, t2 B where A.a = t1.a and B.a=A.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	
+1	SIMPLE	A	eq_ref	PRIMARY	PRIMARY	4	test.t1.a	1	
+1	SIMPLE	B	eq_ref	PRIMARY	PRIMARY	4	test.A.b	1	
+show status like '%cost%';
+Variable_name	Value
+Last_query_cost	24.016090
+select '^^: The above should be ~= 20 + cost(select * from t1). Value less than 20 is an
error' Z;
+Z
+^^: The above should be ~= 20 + cost(select * from t1). Value less than 20 is an error
+drop table t1, t2;

--- 1.34/mysql-test/t/join.test	2006-09-27 22:56:55 +04:00
+++ 1.35/mysql-test/t/join.test	2006-09-27 22:56:55 +04:00
@@ -609,3 +609,24 @@
 
 drop table t1, t2, t3;
 
+# BUG#14940 {Wrong query plan is chosen because of odd results of
+# prev_record_reads() function }
+create table t1 (a int); 
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t2 (a int, b int, primary key(a));
+insert into t2 select @v:=A.a+10*B.a, @v  from t1 A, t1 B;
+
+explain select * from t1;
+show status like '%cost%';
+select 'The cost of accessing t1 (dont care if it changes' '^';
+
+select 'vv: Following query must use ALL(t1), eq_ref(A), eq_ref(B): vv' Z;
+
+explain select * from t1, t2 A, t2 B where A.a = t1.a and B.a=A.b;
+show status like '%cost%';
+select '^^: The above should be ~= 20 + cost(select * from t1). Value less than 20 is an
error' Z;
+
+
+
+drop table t1, t2;
Thread
bk commit into 5.1 tree (sergefp:1.2279) BUG#14940Sergey Petrunia27 Sep